<font size = "6"> Fordham Sports Analytics Society Big Data Bowl 2023 - Exploratory Analysis </font>

<font size = "4"> Run through ideas for eventual model, create new model features, and understand provided materials. </font>

- Authors:  Peter Majors, Chris Orlando, Jack Townsend, and Etienne Busnel
- Kaggle:  https://www.kaggle.com/competitions/nfl-big-data-bowl-2023/overview (Resources)
- Our Github:  https://github.com/peterlmajors/FSAS_BigDataBowl_2023 (Up-To-Date Code)

In [1]:
#Import Required Packages

#Data Manipulation
import pandas as pd
import numpy as np
import math

#Data Vizualization
import seaborn as sns 
import matplotlib.pyplot as plt 
import plotly.graph_objects as go
from haversine import haversine

#Notebook Settings
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)

In [2]:
#Importing Kaggle Data (Needed For Animated Plays Function (Takes Original Data))

# #Games - Basic Information On All Games
games = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/games.csv")

# #pffScout - PFF Judgements For Each Player On Each Play
pffScout = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/pffScoutingData.csv")

# #Players - Basics On Players
players = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/players.csv")

# #Plays - Everthing About Specific Plays
plays = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/plays.csv")

# #Week - Frame-By-Frame Player Tracking
week1 = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/week1.csv")
week2 = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/week2.csv")
week3 = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/week3.csv")
week4 = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/week4.csv")
week5 = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/week5.csv")
week6 = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/week6.csv")
week7 = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/week7.csv")
week8 = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/case_data/week8.csv")

In [3]:
#Define Function To Animate Plays
colors = {'ARI':"#97233F", 'ATL':"#A71930", 'BAL':'#241773', 'BUF':"#00338D", 'CAR':"#0085CA", 'CHI':"#C83803", 'CIN':"#FB4F14", 'CLE':"#311D00", 
          'DAL':'#003594', 'DEN':"#FB4F14", 'DET':"#0076B6", 'GB':"#203731", 'HOU':"#03202F", 'IND':"#002C5F", 'JAX':"#9F792C", 'KC':"#E31837", 
          'LA':"#003594", 'LAC':"#0080C6", 'LV':"#000000", 'MIA':"#008E97", 'MIN':"#4F2683", 'NE':"#002244", 'NO':"#D3BC8D", 'NYG':"#0B2265", 
          'NYJ':"#125740", 'PHI':"#004C54", 'PIT':"#FFB612", 'SEA':"#69BE28", 'SF':"#AA0000", 'TB':'#D50A0A', 'TEN':"#4B92DB", 'WAS':"#5A1414", 
          'football':'#CBB67C'}

def animate_play(tracking_df, play_df,players,pffScoutingData, gameId,playId):
    selected_play_df = play_df[(play_df.playId==playId)&(play_df.gameId==gameId)].copy()
    
    tracking_players_df = pd.merge(tracking_df,players,how="left",on = "nflId")
    tracking_players_df = pd.merge(tracking_players_df,pffScoutingData,how="left",on = ["nflId","playId","gameId"])
    selected_tracking_df = tracking_players_df[(tracking_players_df.playId==playId)&(tracking_players_df.gameId==gameId)].copy()

    sorted_frame_list = selected_tracking_df.frameId.unique()
    sorted_frame_list.sort()

    # get play General information 
    line_of_scrimmage = selected_play_df.absoluteYardlineNumber.values[0]
    first_down_marker = line_of_scrimmage + selected_play_df.yardsToGo.values[0]
    down = selected_play_df.down.values[0]
    quarter = selected_play_df.quarter.values[0]
    gameClock = selected_play_df.gameClock.values[0]
    playDescription = selected_play_df.playDescription.values[0]
    # Handle case where we have a really long Play Description and want to split it into two lines
    if len(playDescription.split(" "))>15 and len(playDescription)>115:
        playDescription = " ".join(playDescription.split(" ")[0:16]) + "<br>" + " ".join(playDescription.split(" ")[16:])

    # initialize plotly start and stop buttons for animation
    updatemenus_dict = [
        {
             "buttons": [
                {
                    "args": [None, {"frame": {"duration": 100, "redraw": False},
                                "fromcurrent": True, "transition": {"duration": 0}}],
                    "label": "Play",
                    "method": "animate"
                },
                {
                    "args": [[None], {"frame": {"duration": 0, "redraw": False},
                                      "mode": "immediate",
                                      "transition": {"duration": 0}}],
                    "label": "Pause",
                    "method": "animate"
                }
            ],
            "direction": "left",
            "pad": {"r": 10, "t": 87},
            "showactive": False,
            "type": "buttons",
            "x": 0.1,
            "xanchor": "right",
            "y": 0,
            "yanchor": "top"
        }
    ]
    # initialize plotly slider to show frame position in animation
    sliders_dict = {
        "active": 0,
        "yanchor": "top",
        "xanchor": "left",
        "currentvalue": {
            "font": {"size": 20},
            "prefix": "Frame:",
            "visible": True,
            "xanchor": "right"
        },
        "transition": {"duration": 300, "easing": "cubic-in-out"},
        "pad": {"b": 10, "t": 50},
        "len": 0.9,
        "x": 0.1,
        "y": 0,
        "steps": []
    }


    frames = []
    for frameId in sorted_frame_list:
        data = []
        # Add Numbers to Field 
        data.append(
            go.Scatter(
                x=np.arange(20,110,10), 
                y=[5]*len(np.arange(20,110,10)),
                mode='text',
                text=list(map(str,list(np.arange(20, 61, 10)-10)+list(np.arange(40, 9, -10)))),
                textfont_size = 30,
                textfont_family = "Courier New, monospace",
                textfont_color = "#ffffff",
                showlegend=False,
                hoverinfo='none'
            )
        )
        data.append(
            go.Scatter(
                x=np.arange(20,110,10), 
                y=[53.5-5]*len(np.arange(20,110,10)),
                mode='text',
                text=list(map(str,list(np.arange(20, 61, 10)-10)+list(np.arange(40, 9, -10)))),
                textfont_size = 30,
                textfont_family = "Courier New, monospace",
                textfont_color = "#ffffff",
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Add line of scrimage 
        data.append(
            go.Scatter(
                x=[line_of_scrimmage,line_of_scrimmage], 
                y=[0,53.5],
                line_dash='dash',
                line_color='blue',
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Add First down line 
        data.append(
            go.Scatter(
                x=[first_down_marker,first_down_marker], 
                y=[0,53.5],
                line_dash='dash',
                line_color='yellow',
                showlegend=False,
                hoverinfo='none'
            )
        )
        # Plot Players
        for team in selected_tracking_df.team.unique():
            plot_df = selected_tracking_df[(selected_tracking_df.team==team)&(selected_tracking_df.frameId==frameId)].copy()
            if team != "football":
                hover_text_array=[]
                for nflId in plot_df.nflId:
                    selected_player_df = plot_df[plot_df.nflId==nflId]
                    hover_text_array.append("nflId:{}<br>displayName:{}<br>Position:{}<br>Role:{}".format(selected_player_df["nflId"].values[0],
                                                                                      selected_player_df["displayName"].values[0],
                                                                                      selected_player_df["pff_positionLinedUp"].values[0],
                                                                                      selected_player_df["pff_role"].values[0]))
                data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],mode = 'markers',marker_color=colors[team],name=team,hovertext=hover_text_array,hoverinfo="text"))
            else:
                data.append(go.Scatter(x=plot_df["x"], y=plot_df["y"],mode = 'markers',marker_color=colors[team],name=team,hoverinfo='none'))
        # add frame to slider
        slider_step = {"args": [
            [frameId],
            {"frame": {"duration": 100, "redraw": False},
             "mode": "immediate",
             "transition": {"duration": 0}}
        ],
            "label": str(frameId),
            "method": "animate"}
        sliders_dict["steps"].append(slider_step)
        frames.append(go.Frame(data=data, name=str(frameId)))

    scale=10
    layout = go.Layout(
        autosize=False,
        width=120*scale,
        height=60*scale,
        xaxis=dict(range=[0, 120], autorange=False, tickmode='array',tickvals=np.arange(10, 111, 5).tolist(),showticklabels=False),
        yaxis=dict(range=[0, 53.3], autorange=False,showgrid=False,showticklabels=False),

        plot_bgcolor='#00B140',
        # Create title and add play description at the bottom of the chart for better visual appeal
        title=f"GameId: {gameId}, PlayId: {playId}<br>{gameClock} {quarter}Q"+"<br>"*19+f"{playDescription}",
        updatemenus=updatemenus_dict,
        sliders = [sliders_dict]
    )

    fig = go.Figure(
        data=frames[0]["data"],
        layout= layout,
        frames=frames[1:]
    )
    # Create First Down Markers 
    for y_val in [0,53]:
        fig.add_annotation(
                x=first_down_marker,
                y=y_val,
                text=str(down),
                showarrow=False,
                font=dict(
                    family="Courier New, monospace",
                    size=16,
                    color="black"
                    ),
                align="center",
                bordercolor="black",
                borderwidth=2,
                borderpad=4,
                bgcolor="#ff7f0e",
                opacity=1
                )

    return fig

In [4]:
#Import Our Merged DataFrames
ptrack = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/merged_data/ptrack.csv") #Player Tracking
ptrack_qb_poss = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/merged_data/ptrack_qb_poss.csv") #Player Tracking Only On Frames Where QB The Target For Pass Rushers
pbp = pd.read_csv("C:/Users/Peter/Python Scripts/Case Competitions/NFL Big Data Bowl 2023/merged_data/pbp.csv") #Play-By-Play Data

<font size="5">Check Out Individual Plays</font>

In [103]:
#Choose A Game and Play
gameId = 2021100305	
playId = 317

#Run Animate Plays Function (Must Input Correct Week)
animate_play(week4, plays, players, pffScout, gameId, playId)

In [99]:
#Print Out All Frames From A Player and Play
player_in_play = ptrack.loc[(ptrack['gameId'] == gameId) & (ptrack['playId'] == playId)]['nflId'].reset_index().loc[0, 'nflId']
ptrack.loc[(ptrack['gameId'] == gameId) & (ptrack['nflId'] == player_in_play) & (ptrack['playId'] == playId)].sort_values(by = 'frameId', ascending = True)[['frameId', 'event']]

#Print Out All Frames From Selected Game
#ptrack.loc[(ptrack['gameId'] == gameId) & (ptrack['playId'] == playId) & (ptrack['nflId'] == 53440)]

#Print Out All Plays From Selected Game
#ptrack.loc[(ptrack.gameId == gameId) & (ptrack.playDirection == 'right')]['playId'].sort_values().unique()

#Print Out All pbp Data From Selected Play
#pbp[(pbp['gameId'] == gameId) & (pbp['playId'] == playId)]


Unnamed: 0,frameId,event
1926694,1,
1926716,2,
1926738,3,
1926760,4,
1926782,5,
1926804,6,ball_snap
1926826,7,
1926848,8,
1926870,9,
1926892,10,


<font size="5">Explore Effect Of Block Types On Hurry Rate</font>

In [None]:
#For Each Block Type, Find The Success Rate of A Defesnvie Player Creating A Pressure

# #How We Found Positions
# #pffScout.pff_positionLinedUp.sort_values().unique()

# #Create A New Data Frame Only With 9 D-Linemen Positons
# ptrackl = ptrack.loc[(ptrack['pff_positionLinedUp'] == "LOLB") | (ptrack['pff_positionLinedUp'] == "ROLB") | (ptrack['pff_positionLinedUp'] == "LEO") |
#     (ptrack['pff_positionLinedUp'] == "REO") | (ptrack['pff_positionLinedUp'] == "DLT") | (ptrack['pff_positionLinedUp'] == "DRT") |
#     (ptrack['pff_positionLinedUp'] == "NLT") | (ptrack['pff_positionLinedUp'] == "NRT") | (ptrack['pff_positionLinedUp'] == "NT")]

# #Must First Change The pff_blockType in ptrack To A String (Data Did Not Group By Properly When An Int)
# ptrackl.pff_blockType = ptrack.pff_blockType.astype("str")

# #For Each Block Type, Find The Percentage of Time A Hury Occurs
# ptrackl.groupby('pff_blockType')['pff_hurry'].count() #Ideally Would Be .average() Instead

# #Frequency of Different Type of Blocks Performed (EDA)
# #ptrack.pff_blockType.value_counts()

# #Issue Is Only The Players Issuing The Block (Offensive Players) Are Assigned A Block Type
# #However, The Defensive Players They Blocked Are Noted In The pff_nflIdBlockedPlayer Column In ptrack

In [None]:
# #Create A Data Frame Containing Only Plays Where A Pass Block Occured (One Row Per Block)
# ptrack_blocks = ptrack.loc[ptrack['pff_blockType'] != '0'][['gameId', 'playId', 'nflId', 'pff_nflIdBlockedPlayer', 'pff_blockType', 'pff_hurryAllowed']]

# #Reduce To Each Play (One Row Per Interaction Between Blocked Player And Blocker) (First Interation of A Defensive Player Being Blocked In Play)
# ptrack_blocks = ptrack_blocks.drop_duplicates() 

# #Sampler of The ptrack_blocks Data Frame
# ptrack_blocks.sort_values('gameId', ascending = False).head(5)

# #Show How Often Each Block Type Resulted In A Hurry
# ptrack_block_hurry = pd.DataFrame(ptrack_blocks.groupby('pff_blockType')['pff_hurryAllowed'].mean().round(4).sort_values(ascending = False))
# ptrack_block_hurry

In [None]:
# #Can Each Offensive Player Only Get One Block Per Play (I'd Assume Not, But Lets Check) (Yes They Do!)
# ptrack_blocks_per_play = pd.DataFrame(ptrack_blocks.groupby(['gameId', 'playId', 'nflId'])['pff_blockType'].count())
# ptrack_blocks_per_play = ptrack_blocks_per_play.reset_index()

# ptrack_blocks_per_play[ptrack_blocks_per_play['pff_blockType'] != 1].head() #No Rows Appear
# #Yes! Each Offensive Player Only Gets One Block Per Play!

<font size="5"> Explore  Distance From Quarterback At Each Second </font>

Each D-Line Position and Each Second Post Snap

In [None]:
#Create Role-Specific Data Frames
ptrack_prush = ptrack[ptrack.pff_role == "Pass Rush"]
ptrack_pblock = ptrack[ptrack.pff_role == "Pass Block"]

#Descriptive Stats On Frames In Plays
pd.DataFrame(ptrack.groupby(['gameId', 'playId'])['frameId'].max()).mean()

#Available Roles On The Field
ptrack.pff_role.value_counts()

#Top 8 Most Popular Pass Rush Positions
ptrack_prush.pff_positionLinedUp.value_counts().reset_index().head(8)

In [None]:
#Display Scatter Plot
ptrack_prush_drt_dist_from_qb = ptrack_prush[ptrack_prush['pff_positionLinedUp'] == 'RE'].groupby('frameId')['dist_from_qb'].mean().reset_index()
plt.scatter(x = ptrack_prush_drt_dist_from_qb.frameId, y = ptrack_prush_drt_dist_from_qb.dist_from_qb, marker = "8")
plt.title("Average Distance From The Quarterback As Time Progresses (RE)")
plt.xlabel("Frames (10 Frames = 1 Second Post-Snap)")
plt.ylabel("Euclidead Distance From Quarterback")

<font size="5"> Explore Angle of Offensive Linemen Relative To Direction of Pass Rusher During Contact </font>

In [None]:
#While Engaged In The Immediate Zone, How Do Shoulder Angles Change?

<font size="5"> Define The "Immediate Zone" Depth For Pass Blockers </font>

In [None]:
#Gather Data And Fields Relevant To Answering The Question

#Crate Data Frame With Only Pass Blocking Plays
ptrack_block = ptrack.loc[ptrack['pff_blockType'] != '0']

#Find Rows With Players Who Were Blocked Against (And Who Have The Role of Pass Rusher)
ptrack_block_rushers = ptrack.loc[(ptrack.nflId.isin(ptrack_block.pff_nflIdBlockedPlayer)) & (ptrack.pff_role == "Pass Rush")]
ptrack_block_rushers = ptrack_block_rushers[['gameId', 'playId', 'nflId', 'frameId', 'pff_role', 'pff_positionLinedUp', 'x', 'y', 's', 'a', 'dis', 'o', 'dir', 'displayName']]

#Merge Pass Blocking Plays Data From O-Linemen Perspecitve With Pass Rusher Tracking Data
ptrack_imm_box = ptrack_block.merge(ptrack_block_rushers, left_on = ['gameId', 'playId', 'pff_nflIdBlockedPlayer', 'frameId'], 
                                                        right_on = ['gameId', 'playId', 'nflId', 'frameId'], how = 'inner')

#Reduce To Columns Of Interest
ptrack_imm_box = ptrack_imm_box[['gameId', 'playId', 'nflId_x', 'frameId', 'pff_role_x', 'pff_positionLinedUp_x', 'pff_blockType','x_x', 'y_x', 's_x', 'a_x', 'dis_x', 'o_x', 'dir_x', 
            'displayName_x','nflId_y', 'pff_role_y', 'pff_positionLinedUp_y', 'x_y', 'y_y', 's_y', 'a_y', 'dis_y', 'o_y', 'dir_y', 'displayName_y']]

#Rename Columns
ptrack_imm_box = ptrack_imm_box.rename(columns = {"nflId_x":"nflId_blocker", "displayName_x": "displayName_blocker", "pff_role_x": "pff_role_blocker", 
            "pff_positionLinedUp_x": "pff_positionLinedUp_blocker", "x_x": "x_blocker", "y_x": "y_blocker", "s_x": "s_blocker", "a_x": "a_blocker", "dis_x": "dis_blocker", 
            "o_x": "o_blocker", "dir_x": "dir_blocker", "nflId_y":"nflId_rusher", "displayName_y": "displayName_rusher", "pff_role_y": "pff_role_rusher", 
            "pff_positionLinedUp_y": "pff_positionLinedUp_rusher", "x_y": "x_rusher", "y_y": "y_rusher", "s_y": "s_rusher", "a_y": "a_rusher", "dis_y": "dis_rusher", 
            "o_y": "o_rusher", "dir_y": "dir_rusher"})

#Calculate Distance Between Pass Blocker And Pass Rusher at Each Frame
ptrack_imm_box['blocker_rusher_distance'] = np.hypot((ptrack_imm_box.y_blocker - ptrack_imm_box.y_rusher), (ptrack_imm_box.x_blocker - ptrack_imm_box.x_rusher))

#Calculate Difference Between Rusher Direction and Blocker Orientation
ptrack_imm_box['diff_btw_rusher_dir_blocker_o'] = abs((ptrack_imm_box.dir_rusher - 180) - ptrack_imm_box.o_blocker)

In [None]:
#How Far Away From A Blocker Does A Pass Rusher Slow Down or Deccelerate? (1 Yard In Front and 1.5 Yards Across (.75 Either Side))
#Filter Down To Where Rusher Direction and Blocker Orientation Are Generally Matched (25 Degrees In Either Direction)
ptrack_imm_box_10orless = ptrack_imm_box[ptrack_imm_box.diff_btw_rusher_dir_blocker_o < 10]

#Speed
#distance_and_speed = pd.DataFrame(ptrack_imm_box_10orless.groupby(round(ptrack_imm_box_10orless.blocker_rusher_distance,0))['s_rusher'].mean()).reset_index()
#sns.scatterplot(distance_and_speed.blocker_rusher_distance, distance_and_speed.s_rusher)

#Acceleration
distance_and_acceleration = pd.DataFrame(ptrack_imm_box_10orless.groupby(round(ptrack_imm_box_10orless.blocker_rusher_distance,0))['a_rusher'].mean()).reset_index()
sns.scatterplot(distance_and_acceleration.blocker_rusher_distance, distance_and_acceleration.a_rusher)

#Acceleration Hit Their Lowest Rates Within 1-2 Yards of The Pass Blocker!!

<font size="5"> Explore The Immediate Zone At Each Frame For Pass Blockers </font>

In [None]:
#Create The Immediate Zone In The Main ptrack Data Frame 

#Define The Width And Depth Of This Zone
imm_box_width = .75
imm_box_depth = 1

#bl = Bottom Left
ptrack['bl_rz_x'] = ptrack.x - (imm_box_width * np.cos(np.radians(ptrack.o)))
ptrack['bl_rz_y'] = ptrack.y + (imm_box_width * np.sin(np.radians(ptrack.o)))

#br = Bottom Right
ptrack['br_rz_x'] = ptrack.x + (imm_box_width * np.cos(np.radians(ptrack.o)))
ptrack['br_rz_y'] = ptrack.y - (imm_box_width * np.sin(np.radians(ptrack.o)))

#fl = Front Left
ptrack['fl_rz_x'] = ptrack.bl_rz_x + (imm_box_depth * np.cos(np.radians(ptrack.o - 90)))
ptrack['fl_rz_y'] = ptrack.bl_rz_y - (imm_box_depth * np.sin(np.radians(ptrack.o - 90)))

#fr = Front Right
ptrack['fr_rz_x'] = ptrack.br_rz_x + (imm_box_depth * np.cos(np.radians(ptrack.o - 90)))
ptrack['fr_rz_y'] = ptrack.br_rz_y - (imm_box_depth * np.sin(np.radians(ptrack.o - 90)))


In [None]:
#Plot Instances of Rushers In A Blocker's Immediate Zone

#Provide Orientation Angle (Whole Number)
o_angle = 50

#In Order To Test If The Above Calculations Are Correct, First Find Plays With A Known Orientation Angle (Choose One)
ptrack_o_filter = ptrack.loc[(ptrack.gameId == 2021090900) & (round(ptrack.o,0) == o_angle)].head(1).reset_index()

#Use The Player, Play, and Frame To Create A Scatter Plot With The Immediate Zone
ptrack_filter = ptrack.loc[(ptrack.gameId == ptrack_o_filter.at[0, 'gameId']) & (ptrack.playId == ptrack_o_filter.at[0, 'playId']) & (ptrack.nflId == ptrack_o_filter.at[0, 'nflId']) & (ptrack.frameId == ptrack_o_filter.at[0, 'frameId'])].reset_index()

#Plot The Immediate Zone On A Play
sns.scatterplot(data = ptrack_filter, x = 'bl_rz_x', y = 'bl_rz_y', marker = '^', s = 200)
sns.scatterplot(data = ptrack_filter, x = 'br_rz_x', y = 'br_rz_y', marker = '^', s = 200)
sns.scatterplot(data = ptrack_filter, x = 'fl_rz_x', y = 'fl_rz_y', marker = 'v', s = 200)
sns.scatterplot(data = ptrack_filter, x = 'fr_rz_x', y = 'fr_rz_y', marker = 'v', s = 200)
sns.scatterplot(x = ptrack_filter.x, y = ptrack_filter.y, marker = 'o', s = 100)
plt.scatter(x = 41.5, y = 21.75, s = 100, marker = 'X')
print("Provided Player Orientation In Degrees (Second Number): ", ptrack_filter['o'])

<font size="5"> Understand The Immediate Zone On Plays Where Blocks Are Known To Occur </font>

In [42]:
#Look At Block Types On Known Blocking Plays
ptrack.loc[(ptrack.pff_blockType != 0) & (ptrack.pff_role == "Pass Block")]['pff_blockType'].value_counts()

PP    1015853
PA     291269
PT     251556
SW     130627
CL     112396
NB      54063
PU      36795
BH      18921
UP      13621
PR      11794
0        7899
SR       4768
CH       2554
Name: pff_blockType, dtype: int64

<font size="5"> Explore Events Classifications (Understand Beginning and End Of QB Possession) </font>

In [43]:
#Determine Which Events Are The End Of QB Possession
ptrack.event.value_counts()

None                         7339728
ball_snap                     187704
pass_forward                  166056
autoevent_ballsnap             82874
autoevent_passforward          82148
play_action                    43494
run                            10428
qb_sack                         9922
pass_arrived                    8074
autoevent_passinterrupted       4422
man_in_motion                   3894
line_set                        3080
shift                           2926
pass_tipped                     2442
first_contact                   1760
qb_strip_sack                   1276
pass_outcome_incomplete          880
pass_outcome_caught              506
fumble                           374
fumble_offense_recovered         242
handoff                          220
tackle                            66
huddle_break_offense              66
penalty_flag                      44
out_of_bounds                     22
dropped_pass                      22
lateral                           22
N

In [16]:
#Establish The Frame In Each Play At Which The Starting/Ending Of QB Possession Occurs With New Columns (2 Minutes Run-Time)

#Temporary Data Frame
ptrack_qb_poss = ptrack

#Arrays of Starting and Ending Values
poss_events = ['ball_snap', 'autoevent_ballsnap','pass_forward', 'autoevent_passforward', 'run', 'qb_sack', 'qb_strip_sack']

#Add Frames Of Events To Each Frame of Each Plays
for i in range(len(poss_events)):
    temp = ptrack.loc[ptrack.event == poss_events[i]][['gameId', 'playId', 'frameId']].drop_duplicates()
    ptrack_qb_poss = ptrack_qb_poss.merge(temp, on = ['gameId', 'playId'], how = "left")

#Rename Columns
indices_to_be_changed = [46, 47, 48, 49, 50, 51, 52, 15]
names_to_insert = ["ball_snap_event_frame", "autoevent_ballsnap_event_frame", "pass_forward_event_frame", "autoevent_passforward_event_frame", 
                   "run_event_frame", "qb_sack_event_frame", "qb_strip_sack_event_frame", "frameId"]
for i in range(len(indices_to_be_changed)):
    ptrack_qb_poss.columns.values[indices_to_be_changed[i]] = names_to_insert[i]


Passing 'suffixes' which cause duplicate columns {'frameId_x'} in the result is deprecated and will raise a MergeError in a future version.


Passing 'suffixes' which cause duplicate columns {'frameId_x'} in the result is deprecated and will raise a MergeError in a future version.


Passing 'suffixes' which cause duplicate columns {'frameId_x'} in the result is deprecated and will raise a MergeError in a future version.



In [17]:
#Reduce 'ball_snap', 'autoevent_ballsnap','pass_forward', and 'autoevent_passforward' To Just Two Columns

#If Any Column of The Four of Interest Is Null, Fill Them So They Can Be Compared
poss_events_to_be_condensed = ['ball_snap_event_frame', "autoevent_ballsnap_event_frame", "pass_forward_event_frame", "autoevent_passforward_event_frame"]
for i in range(len(poss_events_to_be_condensed)):
    if i <= 1: #For Ball Snap, Fill With A Frame Greater Than The Max In Any Play (Since We Want The Lesser)
        ptrack_qb_poss[poss_events_to_be_condensed[i]] = ptrack_qb_poss[poss_events_to_be_condensed[i]].fillna(250)
    else: #For Pass Forward, Fill With A Frame Lesser Than The Min In Any Play (Since We Want Greater Than)
        ptrack_qb_poss[poss_events_to_be_condensed[i]] = ptrack_qb_poss[poss_events_to_be_condensed[i]].fillna(-1)
    
#Determine Earlier of "ball_snap_event_frame" and "autoevent_ballsnap_event_frame", Smaller Of The Two Into "ball_snap_event_frame_" (Doesn't Affect Calculations Much & More Time The Better)
ptrack_qb_poss["ball_snap_event_frame_"] = np.where(ptrack_qb_poss["ball_snap_event_frame"] <= ptrack_qb_poss["autoevent_ballsnap_event_frame"], ptrack_qb_poss["ball_snap_event_frame"], ptrack_qb_poss["autoevent_ballsnap_event_frame"])

#Determine Earlier of "pass_forward_event_frame" and "autoevent_passforward_event_frame", Larger Of The Two Into "pass_forward_event_frame_" (Expect Rushers To Not Give Up Until Ball Fully Out of Pocket)
ptrack_qb_poss["pass_forward_event_frame_"] = np.where(ptrack_qb_poss["pass_forward_event_frame"] >= ptrack_qb_poss["autoevent_passforward_event_frame"], ptrack_qb_poss["pass_forward_event_frame"], ptrack_qb_poss["autoevent_passforward_event_frame"])

#When Both Auto and Non-Auto Columns Are Null, Set Their Values Appropriately
ptrack_qb_poss.loc[ptrack_qb_poss["ball_snap_event_frame_"] == 250, "ball_snap_event_frame_"] = 1 #If No Ball Snap Is Recorded, Set QB Possesion Starting at Frame 1
ptrack_qb_poss.loc[ptrack_qb_poss["pass_forward_event_frame_"] == -1, "pass_forward_event_frame_"] = np.nan #If No Pass Forward Is Recorded, Don't Make It End Of Possession

#Remove The Four Uneccessary Columns and Rename The Two Now In Their Place
ptrack_qb_poss = ptrack_qb_poss.drop(['ball_snap_event_frame', 'autoevent_ballsnap_event_frame', 'pass_forward_event_frame',	'autoevent_passforward_event_frame'], axis = 1)
ptrack_qb_poss = ptrack_qb_poss.rename(columns = {"ball_snap_event_frame_": "ball_snap_event_frame", "pass_forward_event_frame_": "pass_forward_event_frame"})

In [None]:
#Do Strip Sacks and Sacks Occur In The Same Play? (Nope, They Are Mutually Exclusive)
ptrack_qb_poss.loc[(ptrack_qb_poss.qb_sack_event_frame.notnull() == True) & (ptrack_qb_poss.qb_strip_sack_event_frame.notnull == True)]

In [46]:
#Who Is Being Fumbled On The 17 Fumble Plays? We Need To Identify If These Fumbles Are The QB (End Possession) And If The QB Recovered To Keep Retain Possssion
ptrack_fumbles = ptrack_qb_poss.loc[ptrack_qb_poss['event'] == 'fumble'] [['gameId', 'playId', 'event']].drop_duplicates()
ptrack_fumbles = pbp.merge(ptrack_fumbles, on = ['gameId', 'playId'], how = 'inner')[['gameId', 'playId', 'playDescription', 'week']]
#ptrack_fumbles

#Should Make Fumbles End of Possession?

# Index GameId      PlayId  Week    QB Fumbled?  QB Recovered? End Of Possession?
# 0     2021090900	1776	1       Yes          Yes           No
# 1	    2021091207	3148	1       Yes          No            No (Bc Sack Came Right After)
# 2	    2021091913	865	    2       Yes          Yes           No
# 3	    2021092606	3870	3       Yes          No            No (Poss Already Ended W Strip Sack)
# 4	    2021092610	839	    3       Yes          Yes           No
# 5	    2021092610	2033	3       Yes          Yes           No
# 6	    2021100302	3524	4       Yes          No            No (Poss Already Ended W Strip Sack)
# 7	    2021100304	293	    4       Yes          Yes(Not Rec.) No (Ends Up Throwing)
# 8	    2021100304	2703    4	    Yes          No            No (Poss Already Ended W Strip Sack)
# 9	    2021100307	2997    4	    Yes          Yes           No
# 10	2021101706	73	    6       Yes          No(Not Rec.)  No (Ends Up Throwing Somehow)
# 11	2021101707	2181    6	    Yes          Yes           No
# 12	2021102405	507     7       Yes          No            No (Poss Already Ended W Strip Sack)
# 13	2021102500	1953	7       Yes          Yes           No
# 14	2021103105	2596	8       Yes          Yes           No
# 15	2021103110	1742	8       Yes          Yes           No (Sack Came Later In Play)
# 16	2021103111	3246    8       Yes          Yes           No

#Fumbles Will Not Be Marked As The End of Any Possession, Because In All Case The QB Either Recovers The Football Himself Or Has Already Been Sacked


Unnamed: 0,gameId,playId,playDescription,week
0,2021090900,1776,(4:25) (Shotgun) D.Prescott to TB 10 for -5 ya...,1
1,2021091207,3148,(5:28) (Shotgun) R.Tannehill sacked at TEN 34 ...,1
2,2021091913,865,(:18) (Shotgun) C.Humphrey to KC 12 for -9 yar...,2
3,2021092606,3870,(2:12) (Shotgun) D.Jones sacked at NYG 40 for ...,3
4,2021092610,839,(1:36) (Shotgun) D.Carr Aborted. A.James FUMBL...,3
5,2021092610,2033,(:27) (Shotgun) J.Brissett FUMBLES (Aborted) a...,3
6,2021100302,3524,(10:06) J.Fields sacked at CHI 33 for -10 yard...,4
7,2021100304,293,(10:49) (Shotgun) J.Brissett to IND 27 for -5 ...,4
8,2021100304,2703,(15:00) (Shotgun) J.Brissett sacked at MIA 23 ...,4
9,2021100307,2997,(12:55) Fumbled snap. Z.Wilson to TEN 36 for -...,4


In [None]:
#Filter Out Rows That Don't Satisfy QB Possesion Conditions

#Drop All That Come BEFORE Ball Snap
ptrack_qb_poss_test = ptrack_qb_poss.drop(ptrack_qb_poss[ptrack_qb_poss.frameId < ptrack_qb_poss.ball_snap_event_frame].index) 

#Drop All That Come AFTER QB Sack, QB Strip Sack, Run, Pass Forward In That Order (If The Event Isn't In The Play Nothing Will Be Dropped)

#If A QB Is Sacked, There's Nothing More A Line Can Do (Although Sometimes The QB Can Get The Pass Off). That's Why Strip & Normal Sacks Are The First Two.
#If A QB Runs And Then Throws, We Say That He's Too Far Away For Linemen For It To Be Fair. If Pass Then Run, Count The Pass Frame First.

ending_event_frames = ['qb_sack_event_frame', 'qb_strip_sack_event_frame', 'run_event_frame', 'pass_forward_event_frame']
for i in range(len(ending_event_frames)):
    ptrack_qb_poss_test = ptrack_qb_poss_test.drop(ptrack_qb_poss_test[ptrack_qb_poss_test['frameId'] > ptrack_qb_poss_test[ending_event_frames[i]]].index) 

In [None]:
#Ensure That The Possession Beginning and End Are Properly Filtered Out

#Test Different Events Happening
test = ptrack_qb_poss.loc[ptrack_qb_poss['qb_sack_event_frame'].isna() == False][['gameId', 'nflId', 'playId']].reset_index()
gameId = test.loc[0, "gameId"]
nflId = test.loc[0, "nflId"]
playId = test.loc[0, "playId"]

#Show All Plays And Look If Frames Are Properly Filtered
ptrack_qb_poss_test.loc[(ptrack_qb_poss_test['gameId'] == gameId) & (ptrack_qb_poss_test['nflId'] == nflId) & (ptrack_qb_poss_test['playId'] == playId)][['frameId', 'event', 'ball_snap_event_frame','run_event_frame', 'pass_forward_event_frame', 'qb_sack_event_frame', 'qb_strip_sack_event_frame']]

<font size="5"> Explore Drop Back Types </font>

In [None]:
#Frequency of Drop Backs Types
pbp.dropBackType.value_counts()

In [None]:
#How Is Drop Back Type Influenced By Other Factors
pbp.groupby('dropBackType')['defendersInBox'].mean().sort_values()