# 1. Import and Read CSV

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from IPython.display import display

pd.options.display.precision = 2
pd.set_option('display.max_columns', 160)
sns.set_style("darkgrid")
sns.set(font_scale = 1.5)

import warnings
warnings.resetwarnings()
#後で修正する
warnings.simplefilter('ignore', pd.core.common.SettingWithCopyWarning)

colorlist={
    'ARI': '#97233F', 'ATL': '#a71930', 'BAL': '#4e33ff', 'BUF': '#004bcc', 
    'CAR': '#0085CA', 'CHI': '#C83803', 'CIN': '#FB4F14', 'CLE': '#FF3C00', 
    'DAL': '#004d99', 'DEN': '#FB4F14', 'DET': '#0076B6', 'GB': '#3b665b', 
    'HOU': '#1f5a99', 'IND': '#005fcc', 'JAX': '#006778', 'KC': '#E31837', 
    'LA': '#0066cc',  'LAC': '#0080C6', 'LV': '#A5ACAF',  'MIA': '#00becc', 
    'MIN': '#7a3bcc', 'NE': '#1f5c99',  'NO': '#D3BD8C',  'NYG': '#1644cc', 
    'NYJ': '#006649', 'PHI': '#008a99', 'PIT': '#FFB612', 'SEA': '#69BE28', 
    'SF': '#D50A0A',  'TB': '#D50A0A',  'TEN': '#4B92DB', 'WAS': '#992222'}

PFFScoutingData = pd.read_csv('../input/nfl-big-data-bowl-2022/PFFScoutingData.csv')
plays = pd.read_csv('../input/nfl-big-data-bowl-2022/plays.csv')
games = pd.read_csv('../input/nfl-big-data-bowl-2022/games.csv')

# 2. Arrange Data

In [None]:
df_all = PFFScoutingData.merge(plays, on=['gameId','playId'])
df_all = df_all.merge(games, on=['gameId'])

# Treat Oakland Raiders as LasVegas Raiders
df_all.replace('OAK', 'LV', inplace=True)

# Remove onsidekick
Kickoff = df_all[df_all['specialTeamsPlayType']=='Kickoff']
Kickoff = Kickoff[Kickoff['kickType']!='O']
# Add ReturnTeam
Kickoff.loc[Kickoff['homeTeamAbbr'] == Kickoff['possessionTeam'] , 'returnTeam'] = Kickoff['visitorTeamAbbr']
Kickoff.loc[Kickoff['homeTeamAbbr'] != Kickoff['possessionTeam'] , 'returnTeam'] = Kickoff['homeTeamAbbr']
# Add NexOffensePosition
Kickoff['nextOffensePosition'] = abs(Kickoff['kickLength'] + Kickoff['yardlineNumber'] - Kickoff['kickReturnYardage'] -100)

Kickoff2018 = Kickoff[Kickoff['season']==2018]

QuartileKickoff2018 = pd.DataFrame()
QuartileKickoff2018['kickReturnYardageQuartile'] = Kickoff2018.groupby('returnTeam').quantile([0, 0.25, 0.5, 0.75, 1.0])['kickReturnYardage']
QuartileKickoff2018['nextOffensePositionQuartile'] = Kickoff2018.groupby('returnTeam').quantile([0, 0.25, 0.5, 0.75, 1.0])['nextOffensePosition']

Kickoff2018Info = pd.DataFrame()
Kickoff2018Info['kickReturnAttemptCount'] = Kickoff2018.groupby('returnTeam').count()['kickReturnYardage'].round().astype(int)
Kickoff2018Info['kickReturnAverage'] = Kickoff2018.groupby('returnTeam').mean()['kickReturnYardage']
Kickoff2018Info['nextOffensePositionAverage'] = Kickoff2018.groupby('returnTeam').mean()['nextOffensePosition']

# 3. What's Quartile? (Reference from Wikipedia)
### Detail (https://en.wikipedia.org/wiki/Quartile)

# 4. Analyze Kick-off Return 2018
### Use Seaborn.Boxplot to analyze special Team data. Black dot means Outlier.

> ## 4-1 Kick-off Return Yardage
> ### Yards gained by return team if there was a return on a kickoff or punt. 

In [None]:
f = plt.figure(figsize=(20, 14))
f.suptitle('Yards Gained by Kick-Off Return Teams 2018')
sns.boxplot(data = Kickoff2018.sort_values('returnTeam'), x = 'returnTeam', y = 'kickReturnYardage', palette=colorlist)
display(Kickoff2018Info[['kickReturnAttemptCount','kickReturnAverage']].T)
display(QuartileKickoff2018[['kickReturnYardageQuartile']].T)
f.tight_layout()

> ### For examples, above figure shows:
> #### ・TEN gained 21.5-32.5 yardage in the middle 50% of the Kick Rerurn. This is the best of all teams.
> #### ・GB gained 19.25-23 yardage in the middle 50% of the 30 Kick Rerurns.  
> #### ・JAX gained 9.75-26.75 yardage in the middle 50% of the 26 Kick Rerurns.  
> #### ・GB and JAX have similar average, but have different variability of the middle 50%.

> ## 4.2 Offense Position After Kick-off Return.

In [None]:
f = plt.figure(figsize=(20, 14))
f.suptitle('Offense Position After Kick-off Return 2018')
sns.boxplot(data = Kickoff2018.sort_values('returnTeam'), x = 'returnTeam', y = 'nextOffensePosition', palette=colorlist)
display(Kickoff2018Info[['kickReturnAttemptCount','nextOffensePositionAverage']].T)
display(QuartileKickoff2018[['nextOffensePositionQuartile']].T)
f.tight_layout()

> ### For examples, above figure shows:
> #### ・ARI,CAR,CHI,NE,PHI and SEA start offense under 20 yards in the lowest 25% of Kick Rerturn.
> #### ・CIN starts offense from 23 to 34 yardage in the middle 50% of the Kick Rerurn. This is the best of all teams.
> #### ・CHI starts offense from 16.75 to 23.5 yardage in the middle 50% of the Kick Rerurn. This is the worst of all teams.