# A few things before we start..

1. I don't watch American Football. I care more about round ball football (soccer) and Australian Rules Football.  
2. I will focus more on punts, because I like Pat McAfee, and I like Australian punters. Also, as Jon Bois said, [kickoffs are stupid and bad.](https://www.youtube.com/watch?v=t_SsIKgwvz4)

With that out of the way..

In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import plotly.express as px

In [None]:
os.getcwd()

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
games = pd.read_csv('../input/nfl-big-data-bowl-2022/games.csv')

In [None]:
scouting_data = pd.read_csv('../input/nfl-big-data-bowl-2022/PFFScoutingData.csv')
plays = pd.read_csv('../input/nfl-big-data-bowl-2022/plays.csv')
players = pd.read_csv('../input/nfl-big-data-bowl-2022/players.csv')
tracking_2020 = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2020.csv')

In [None]:
plays_joined = plays.merge(scouting_data,how="left", left_index=True, right_index=True, suffixes=('', '_y'))
plays_joined = plays_joined.merge(games,how="left", on="gameId")


#Cut plays to 2020 season only

punts = plays_joined[(plays_joined["season"]==2020) & (plays["specialTeamsPlayType"]=="Punt")]

#Convert ID and Jersey Number to int instead of float.
to_convert = ['nflId', 'jerseyNumber']
for col in to_convert:
    tracking_2020[col] = tracking_2020[col].astype('Int64')

Let's see what events are there in the tracking data.

In [None]:
tracking_2020['event'].unique()

Whoops, looks like there's a typo. Let's fix that, shall we?

In [None]:
tracking_2020['event'] = tracking_2020['event'].replace({"field_goal_miseed":"field_goal_missed"})

# Visualization of plays

This code improves upon ones in notebooks that have already been published with this dataset. This visualization code allows you to not only see players, but also see their roles based on play and PFF data. You can see who the gunners are, who the vises are, who made the tackle, who missed the tackle, etc.

Expand the cell below to see the code.

In [None]:
def draw_play(game_id, play_id):
    tracking_data = tracking_2020.loc[(tracking_2020['gameId'] == game_id) & (tracking_2020['playId'] == play_id)]
    tracking_data = tracking_data.fillna({'jerseyNumber':0, 'nflId': 0})
    
    event = tracking_data[['frameId', 'event']].drop_duplicates().reset_index()
    
    
    game_data = games[(games['gameId'] == game_id)]
    scout_data = scouting_data[(scouting_data['gameId'] == game_id) & (scouting_data['playId'] == play_id)]
    play_data = plays[(plays['gameId'] == game_id) & (plays['playId'] == play_id)]
    
    home_team = game_data['homeTeamAbbr'].item()
    visitor_team = game_data['visitorTeamAbbr'].item()
    
    # Grab gunners, vises, kicker, returner, tackler, assist tacklers, missed tacklers.    
    
    # new column, merge Team and jerseyNumber to match PFF Scouting Data.
    tracking_data["teamJerseyCombined"] = ""
    tracking_data["teamJerseyCombined"] = tracking_data.agg('{0[team]} {0[jerseyNumber]}'.format, axis=1)
    
    def split_and_return_teams(df, col):
        if type(df[col].item()) != str:
            return []
        result = df[col].item().split(";")
        for i in range(len(result)):
            result[i] = result[i].strip().replace(home_team, "home")\
                .replace(visitor_team,"away")
        return result
    
    gunners = split_and_return_teams(scout_data, "gunners")
    vises = split_and_return_teams(scout_data, "vises")
    tackler = split_and_return_teams(scout_data, "tackler")
    missed_tackler = split_and_return_teams(scout_data, "missedTackler")
    assist_tackler = split_and_return_teams(scout_data, "assistTackler")
    rushers = split_and_return_teams(scout_data, "puntRushers")
    safeties = split_and_return_teams(scout_data, "specialTeamsSafeties")
    try:
        kicker = int(play_data['kickerId'].item())
    except:
        kicker = ""
    try:
        returner = int(play_data['returnerId'].item())
    except:
        returner = ""
    conditions = np.array([
        tracking_data['nflId'] == kicker,
        tracking_data['nflId'] == returner,
        tracking_data["teamJerseyCombined"].isin(gunners),
        tracking_data["teamJerseyCombined"].isin(vises),
        tracking_data["teamJerseyCombined"].isin(tackler),
        tracking_data["teamJerseyCombined"].isin(missed_tackler),
        tracking_data["teamJerseyCombined"].isin(assist_tackler),
        tracking_data["teamJerseyCombined"].isin(rushers),
        tracking_data["teamJerseyCombined"].isin(safeties),
        tracking_data["team"] == "Football"
    ], dtype=bool)
    
    
    choices = ["Kicker", "Returner", "Gunner", "Vise", "Tackler", "Missed Tackler", "Assist Tackler",
               "Punt Rushers", "Safety", "Football"]
    
    tracking_data["role"]=""
    
    tracking_data = tracking_data.reset_index(drop=True)
    roles = np.select(conditions, choices, default="Normal")
    tracking_data['role'] = roles
    
    #set border colors of points based on grabbed data
    
    def set_color(role):
        role = role[-1]
        if(role == "Normal"):
            return "black"
        elif(role == "Gunner"):
            return "red"
        elif(role == "Vise"):
            return "green"
        elif(role == "Kicker"):
            return "blue"
        elif(role == "Returner"):
            return "yellow"
        elif(role == "Tackler"):
            return "gold"
        elif(role == "Missed Tackler"):
            return "DarkSlateGrey"
        elif(role == "Safety"):
            return "cyan"
        elif(role == "Football"):
            return "white"
        else:
            return "purple"
        
    players = tracking_data.loc[:,["nflId","displayName", "role"]].drop_duplicates()
    game_string = "{} @ {} - {} Week {}, Play ID {}".format(visitor_team, home_team, game_data["season"].item(),
                                               game_data["week"].item(), play_data["playId"].item())
    fig = px.scatter(tracking_data, x="x", y="y", animation_frame="frameId", animation_group="nflId",
                     color="team",
                     hover_name="displayName", range_x=[0,120], range_y=[-2,55.3],
                     width=1047, height=600,
                     title=game_string,
                    hover_data=["position", "jerseyNumber", "role"])
    fig.add_shape(type="rect",
            xref="x", yref="y",
            x0=10, y0=0,
                      layer="below",
            x1=110, y1=53.3,
            line=dict(
                color="black",
                width=2,
            ),
            fillcolor="white",
        )
    fig.update_xaxes(tick0=0, dtick=10)
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 100
    fig.layout.updatemenus[0].buttons[0].args[1]['transition'] = {"duration":100, "easing":"quad"}
    
    
    for i in range(len(fig.data)):
        fig.data[i]['marker']=dict(size=12,
                                      line=dict(width=2,
                                                color=list(map(set_color, fig.data[i]['customdata']))))
        
    # This was for dynamic titles to show the event at frame but it was laggy due to redrawing
    #for button in fig.layout.updatemenus[0].buttons:
    #    button['args'][1]['frame']['redraw'] = True
    #for step in fig.layout.sliders[0].steps:
    #    step["args"][1]["frame"]["redraw"] = True
    
    notable_events = event[(event["event"] != "None")]
    print(play_data["playDescription"].item())
    print("Notable events")
    print(notable_events)
    return fig

Now, let's see some plays!

What's the longest punt return of the 2020 season?

In [None]:
punt_returns = punts.loc[plays_joined.specialTeamsResult=="Return"]
max_return_yardage_play = punt_returns[punt_returns["kickReturnYardage"] == punt_returns.loc[:,"kickReturnYardage"].max()]
draw_play(max_return_yardage_play["gameId"].item(),max_return_yardage_play["playId"].item())

On the other hand, what's the saddest punt return out there?

In [None]:
min_return_yardage_play = punt_returns[punt_returns["kickReturnYardage"] == punt_returns.loc[:,"kickReturnYardage"].min()]
draw_play(min_return_yardage_play["gameId"].item(),min_return_yardage_play["playId"].item())

# Punting EDA

In [None]:
punts.iloc[:,:].describe(include="all")

In [None]:
punts["kickDirectionSameAsIntended"] = np.where(punts.kickDirectionIntended == punts.kickDirectionActual, True, False)

In [None]:
kick_direction_grouped = punts.groupby("kickDirectionSameAsIntended").size().reset_index(name='count')
px.bar(kick_direction_grouped, x="kickDirectionSameAsIntended", y="count", title="How often do kickers kick in different directions than the setup intended?").show()

kick_direction_result = punts.groupby(["kickDirectionSameAsIntended", "specialTeamsResult"]).size().reset_index(name='count')
kick_direction_result = kick_direction_result.apply(lambda x: x/x.sum() if x.name == "count" else x )

kick_dir_result_pct = punts.groupby(["kickDirectionSameAsIntended", "specialTeamsResult"]).size().groupby(level=0)\
.apply(lambda x: x/x.sum()).reset_index(name='pct')

px.bar(kick_dir_result_pct, x="kickDirectionSameAsIntended", y="pct", color= "specialTeamsResult",
      barmode="group", title="Kick Direction vs Special Teams Result").show()

returned_punts = punts[(punts.specialTeamsResult=="Return")]
px.box(punts, x="kickDirectionSameAsIntended", y="kickReturnYardage", points="all",
      title="Did changing kick direction impact return yardage?",).show()

### Just to check what's on it, let's look at the plays with non-special teams result, and watch one of them.

In [None]:
non_special_teams_result = punts[(punts["specialTeamsResult"] == "Non-Special Teams Result")]


from wordcloud import WordCloud
wordcloud = WordCloud (
                    background_color = 'white',
                    width = 800,
                    height = 600
                        ).generate(" ".join(non_special_teams_result.playDescription))

plt.imshow(wordcloud)

In [None]:
# Let's watch one.
test_play = non_special_teams_result.iloc[8,:]
draw_play(test_play["gameId"].item(),test_play["playId"].item())

In [None]:
kick_contact_grouped = punts.groupby("kickContactType").size().reset_index(name='count')
px.bar(kick_contact_grouped, x="kickContactType", y="count", title="Kick contact type")

## Who has the top 10 punting averages and net punting avg in the 2020 season?

In [None]:
kicks_and_yards = punts.loc[:,["kickerId", "kickLength", "playDescription", "playResult"]]

In [None]:
kicker_avg = kicks_and_yards.groupby("kickerId").agg({"playDescription":"count",
      "playResult":"mean",
      "kickLength":"mean"})\
      .rename(columns={'playDescription':'count','playResult':'netPuntingAvg',
                      'kickLength':'puntingAvg'})\
      .sort_values(["puntingAvg","netPuntingAvg"]).reset_index()
kicker_avg = pd.merge(kicker_avg, players, left_on='kickerId', right_on='nflId', how="left")

#Exclude kickers that didn't kick 10 or more punts.
kicker_avg = kicker_avg[(kicker_avg["count"] >= 10)]

px.bar(kicker_avg.sort_values("puntingAvg",ascending=False).iloc[:10,:],x="displayName",y="puntingAvg",
       hover_data=["count"], title="Top 10 punting average").show()
px.bar(kicker_avg.sort_values("netPuntingAvg",ascending=False).iloc[:10,:],x="displayName",y="netPuntingAvg",
       hover_data=["count"], title="Top 10 net punting average").show()

## Let's see the other side of the play: returners!

Who are the returners that has the most return yardage?
Note: Returners are more often ranked based on total yardage instead of their average. You know, with fair catch and stuff.

In [None]:
returners_and_yards = punts.loc[:,["returnerId", "kickReturnYardage", "playDescription"]]

#We don't have to exclude returners based on the number of times they've returned
#since we're ranking it by total yardage.

player_dup = players.loc[:,:]
player_dup["nflId"] = player_dup["nflId"].astype('str')

returners_yards = returners_and_yards.groupby("returnerId").agg({"playDescription":"count",
        "kickReturnYardage":"sum"})\
        .rename(columns={'playDescription':'count'}).sort_values("kickReturnYardage").reset_index()
returners_yards = pd.merge(returners_yards, player_dup, left_on='returnerId', right_on='nflId', how="left")
px.bar(returners_yards.sort_values("kickReturnYardage",ascending=False).iloc[:10,:],x="displayName",y="kickReturnYardage",
       hover_data=["count"], title="Top 10 total return yards").show()