In [1]:
import pandas as pd
import numpy as np
import os

# Input Data

In [None]:
file_path = os.environ.get("NFL_DATA_PATH")

csvs = os.listdir(file_path)

df_list = []

for file in csvs:
    if file.startswith('input'):
        dfs = pd.read_csv(f'{file_path}/{file}')

        df_list.append(dfs)

df_input = pd.concat(df_list, ignore_index=True)

df_input.head()


Unnamed: 0,game_id,play_id,player_to_predict,nfl_id,frame_id,play_direction,absolute_yardline_number,player_name,player_height,player_weight,...,player_role,x,y,s,a,dir,o,num_frames_output,ball_land_x,ball_land_y
0,2023090700,101,False,54527,1,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.94,0.09,0.39,322.4,238.24,21,63.259998,-0.22
1,2023090700,101,False,54527,2,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.94,0.04,0.61,200.89,236.05,21,63.259998,-0.22
2,2023090700,101,False,54527,3,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.33,36.93,0.12,0.73,147.55,240.6,21,63.259998,-0.22
3,2023090700,101,False,54527,4,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.35,36.92,0.23,0.81,131.4,244.25,21,63.259998,-0.22
4,2023090700,101,False,54527,5,right,42,Bryan Cook,6-1,210,...,Defensive Coverage,52.37,36.9,0.35,0.82,123.26,244.25,21,63.259998,-0.22


# Supplementary Data

In [None]:
supp_path = os.environ.get("NFL_DATA_PATH")

df_sup = pd.read_csv(supp_path)
df_sup.head()

  df_sup = pd.read_csv(supp_path)


Unnamed: 0,game_id,season,week,game_date,game_time_eastern,home_team_abbr,visitor_team_abbr,play_id,play_description,quarter,...,team_coverage_type,penalty_yards,pre_penalty_yards_gained,yards_gained,expected_points,expected_points_added,pre_snap_home_team_win_probability,pre_snap_visitor_team_win_probability,home_team_win_probability_added,visitor_team_win_probility_added
0,2023090700,2023,1,09/07/2023,20:20:00,KC,DET,3461,(10:46) (Shotgun) J.Goff pass deep left to J.R...,4,...,COVER_2_ZONE,,18,18,-0.664416,2.945847,0.834296,0.165704,-0.081149,0.081149
1,2023090700,2023,1,09/07/2023,20:20:00,KC,DET,461,(7:30) J.Goff pass short right to J.Reynolds t...,1,...,COVER_6_ZONE,,21,21,1.926131,1.345633,0.544618,0.455382,-0.029415,0.029415
2,2023090700,2023,1,09/07/2023,20:20:00,KC,DET,1940,(:09) (Shotgun) J.Goff pass incomplete deep ri...,2,...,COVER_2_ZONE,,0,0,0.281891,-0.081964,0.771994,0.228006,0.000791,-0.000791
3,2023090700,2023,1,09/07/2023,20:20:00,KC,DET,1711,"(:45) (No Huddle, Shotgun) P.Mahomes pass deep...",2,...,COVER_2_ZONE,,26,26,3.452352,2.342947,0.663187,0.336813,0.041843,-0.041843
4,2023090700,2023,1,09/07/2023,20:20:00,KC,DET,1588,(1:54) (Shotgun) P.Mahomes pass incomplete dee...,2,...,COVER_4_ZONE,,0,0,1.921525,-0.324035,0.615035,0.384965,6.1e-05,-6.1e-05


# Offensive Player DataFrame

In [None]:
#select needed columns for df
offense_df = df_input[df_input['player_side'] == 'Offense'][['nfl_id', 'player_name', 'player_height', 'player_weight', 'player_birth_date', 'player_position', 'player_side', 'player_role', 's', 'a', 'dir']].copy()

#convert height to total inches
offense_df['height_in_inches'] = offense_df['player_height'].astype(str).apply(lambda x: int(x.split('-')[0]) * 12 + int(x.split('-')[1]))

#aggregate player averages
offense_df = offense_df.groupby(['nfl_id', 'player_name', 'player_position', 'player_side'], as_index=False).agg(
    player_height=('height_in_inches', 'mean'),
    player_weight=('player_weight', 'mean'),
    targets=('player_role', lambda x: (x == 'Targeted Receiver').sum()),
    routes=('player_role', lambda x: (x.isin(['Targeted Receiver', 'Other Route Runner'])).sum()),
    avg_speed=('s', 'mean'),
    avg_acceleration=('a', 'mean'),
    max_speed=('s', 'max'),
    max_acceleration=('a', 'max'),
    total_rows=('nfl_id', 'count')
)

offense_df.head()

Unnamed: 0,nfl_id,player_name,player_position,player_side,player_height,player_weight,targets,routes,avg_speed,avg_acceleration,max_speed,max_acceleration,total_rows
0,30842,Marcedes Lewis,TE,Offense,78.0,267.0,115,912,2.638827,2.023586,7.08,5.11,912
1,31446,Matt Prater,K,Offense,70.0,201.0,0,23,0.564348,0.414348,1.23,0.77,23
2,33099,Joe Flacco,QB,Offense,78.0,245.0,0,0,2.020843,1.973177,7.48,8.43,4945
3,34452,Matthew Stafford,QB,Offense,75.0,220.0,0,0,1.903507,2.14315,7.53,8.98,10835
4,34843,Brian Hoyer,QB,Offense,74.0,215.0,0,0,1.636848,1.988934,5.58,8.83,882


# Center of Play

In [None]:
# extract the quarterback's y position to find the center of the formation
center = df_input[(df_input['player_position'] == 'QB') & (df_input['frame_id'] == 1)][['game_id', 'play_id', 'y']].rename(columns={'y': 'qb_y'})

center.head()

Unnamed: 0,game_id,play_id,qb_y
182,2023090700,101,30.07
586,2023090700,194,30.15
837,2023090700,219,23.79
1330,2023090700,361,30.09
1612,2023090700,436,29.68


# Angle DataFrame

In [None]:
# Angle df creation and transformation
angle_df = df_input[(df_input['player_side'] == 'Offense') & (df_input['player_role'] == 'Targeted Receiver') & (df_input['frame_id'] <= 46)][['game_id', 'play_id', 'nfl_id', 'player_name', 'frame_id', 'play_direction', 'x', 'y', 's', 'a', 'o', 'dir']].sort_values(['game_id', 'play_id', 'nfl_id', 'player_name', 'frame_id']).copy()

angle_df = angle_df.merge(center, on=['game_id', 'play_id'], how='left')

angle_df.head()

Unnamed: 0,game_id,play_id,nfl_id,player_name,frame_id,play_direction,x,y,s,a,o,dir,qb_y
0,2023090700,101,44930,Josh Reynolds,1,right,41.03,12.17,0.0,0.0,80.97,156.35,30.07
1,2023090700,101,44930,Josh Reynolds,2,right,41.03,12.17,0.0,0.0,82.26,119.09,30.07
2,2023090700,101,44930,Josh Reynolds,3,right,41.05,12.18,0.02,0.47,83.33,65.03,30.07
3,2023090700,101,44930,Josh Reynolds,4,right,41.07,12.2,0.18,1.54,84.29,56.06,30.07
4,2023090700,101,44930,Josh Reynolds,5,right,41.11,12.22,0.57,3.09,88.21,59.41,30.07


In [None]:
starting_position = angle_df.groupby(['game_id', 'play_id', 'nfl_id'], as_index=False).agg(
    start_x=('x', 'first'),
    start_y=('y', 'first')
)

position_df = angle_df.merge(starting_position, on=['game_id', 'play_id', 'nfl_id'], how='left')

position_df['side_of_formation'] = np.where(
    ((position_df['play_direction'] == 'right') & (position_df['start_y'] > position_df['qb_y'])), 'left',
    np.where(
        ((position_df['play_direction'] == 'left') & (position_df['start_y'] < position_df['qb_y'])),
        'left',
        'right'
    )
)

position_df['delta_x'] = np.where(position_df['play_direction'] == 'left', position_df['start_x'] - position_df['x'], position_df['x'] - position_df['start_x'])

position_df['delta_y'] = position_df['y'] - position_df['start_y']

position_df = position_df.merge(df_sup[['game_id', 'play_id', 'route_of_targeted_receiver']], on=['game_id', 'play_id'], how='left')

position_df.head(35)

Unnamed: 0,game_id,play_id,nfl_id,player_name,frame_id,play_direction,x,y,s,a,o,dir,qb_y,start_x,start_y,side_of_formation,delta_x,delta_y,route_of_targeted_receiver
0,2023090700,101,44930,Josh Reynolds,1,right,41.03,12.17,0.0,0.0,80.97,156.35,30.07,41.03,12.17,right,0.0,0.0,CORNER
1,2023090700,101,44930,Josh Reynolds,2,right,41.03,12.17,0.0,0.0,82.26,119.09,30.07,41.03,12.17,right,0.0,0.0,CORNER
2,2023090700,101,44930,Josh Reynolds,3,right,41.05,12.18,0.02,0.47,83.33,65.03,30.07,41.03,12.17,right,0.02,0.01,CORNER
3,2023090700,101,44930,Josh Reynolds,4,right,41.07,12.2,0.18,1.54,84.29,56.06,30.07,41.03,12.17,right,0.04,0.03,CORNER
4,2023090700,101,44930,Josh Reynolds,5,right,41.11,12.22,0.57,3.09,88.21,59.41,30.07,41.03,12.17,right,0.08,0.05,CORNER
5,2023090700,101,44930,Josh Reynolds,6,right,41.2,12.26,1.15,4.78,83.4,61.83,30.07,41.03,12.17,right,0.17,0.09,CORNER
6,2023090700,101,44930,Josh Reynolds,7,right,41.33,12.33,1.71,5.15,78.78,63.45,30.07,41.03,12.17,right,0.3,0.16,CORNER
7,2023090700,101,44930,Josh Reynolds,8,right,41.54,12.42,2.45,5.56,83.86,66.2,30.07,41.03,12.17,right,0.51,0.25,CORNER
8,2023090700,101,44930,Josh Reynolds,9,right,41.79,12.53,3.06,5.15,86.39,66.61,30.07,41.03,12.17,right,0.76,0.36,CORNER
9,2023090700,101,44930,Josh Reynolds,10,right,42.12,12.68,3.74,5.09,82.0,66.31,30.07,41.03,12.17,right,1.09,0.51,CORNER


In [70]:
check = df_input['a'].unique()

print(check)

[ 0.39  0.61  0.73 ... 12.28 11.41  9.98]


# Angle Groupings

In [None]:
angle_df = df_input[(df_input['player_role'] == 'Targeted Receiver') & (df_input['frame_id'] > 8)][['game_id', 'play_id', 'nfl_id', 'frame_id', 'dir']].copy()

frames = 6

angle_df['frame_int1'] = (angle_df['frame_id'] - 1) // frames

#define a tolerance level
tolerance = 10

#find mins and maxs over angle partitions
angles_1 = angle_df.groupby(['game_id', 'play_id', 'nfl_id', 'frame_int1'], as_index=False).agg(
    dir_min = ('dir', 'min'),
    dir_max = ('dir', 'max'),
    avg_dir = ('dir', 'mean')
    ).assign(within_tol=lambda x: (x['dir_max'] - x['dir_min']) <= tolerance)

#count how many avg falls within specified angle partitions
angles_1 = angles_1[angles_1['within_tol']].groupby(['game_id', 'play_id', 'nfl_id'], as_index=False).apply(
    [lambda x: x['avg_dir'].between(low, high).sum() for low, high in [(0, 30), (31, 65), (66, 115), (116, 155), (156, 205), (206, 255), (256, 295), (296, 345), (346, 360))]]
)
angles_1.head(20)

Unnamed: 0,game_id,play_id,nfl_id,frame_int1,dir_min,dir_max,avg_dir,within_tol
0,2023090700,101,44930,1,66.31,67.82,66.875,True
1,2023090700,101,44930,2,68.65,77.86,73.21,True
2,2023090700,101,44930,3,80.42,94.02,87.575,False
3,2023090700,101,44930,4,96.68,99.25,97.965,True
4,2023090700,194,41325,1,78.82,116.31,99.4375,False
5,2023090700,194,41325,2,132.51,190.49,162.1,False
6,2023090700,194,41325,3,197.43,221.27,210.333333,False
7,2023090700,194,41325,4,224.88,240.96,233.185,False
8,2023090700,194,41325,5,243.66,245.74,244.7,True
9,2023090700,219,53591,1,260.74,265.35,263.115,True


# Route Classification

In [71]:
#filter the input dataframe to rows needed for route classification
route_df = df_input[
    (df_input['player_side'] == 'Offense') & (df_input['frame_id'] <= 46)
    ][['game_id', 'play_id', 'nfl_id', 'play_direction', 'frame_id', 'x', 'y', 'a', 'dir']].sort_values(
        ['game_id', 'play_id', 'nfl_id', 'frame_id']
    ).copy()

#merge qb center y position
route_df = route_df.merge(center, on=['game_id', 'play_id'], how='left')

#aggregate measures to classify routes
route_summary = (route_df.groupby(['game_id', 'play_id', 'nfl_id', 'play_direction', 'qb_y'], as_index=False)
    .agg(
        start_x=('x', 'first'),
        end_x=('x', 'last'),
        max_x=('x', 'max'),
        min_x=('x', 'min'),
        start_y=('y', 'first'),
        end_y=('y', 'last'),
        max_y=('y', 'max'),
        min_y=('y', 'min'),
        last_a=('a', 'last'),
        last_dir=('dir', 'last')
    )
)

#calculate deltas and furthest_x for route classification
route_summary['delta_x'] = np.where(route_summary['play_direction'] == 'left', route_summary['start_x'] - route_summary['end_x'], route_summary['end_x'] - route_summary['start_x'])

route_summary['furthest_x'] = np.where(route_summary['play_direction'] == 'right', route_summary['max_x'] - route_summary['start_x'], abs(route_summary['min_x'] - route_summary['start_x']))
route_summary['delta_y'] = route_summary['end_y'] - route_summary['start_y']

#determine side of formation
route_summary['side_of_formation'] = np.where(
    ((route_summary['play_direction'] == 'right') & (route_summary['start_y'] > route_summary['qb_y'])), 'left',
    np.where(
        ((route_summary['play_direction'] == 'left') & (route_summary['start_y'] < route_summary['qb_y'])),
        'left',
        'right'
    )
)


#function to classify routes based on deltas
def classify_route(row):
    dx = row['delta_x']
    dy = row['delta_y']
    sof = row['side_of_formation']
    direction = row['play_direction']
    accel = row['last_a']
    dis = row.get('furthest_x', None)
    dir = row['last_dir']  

    # Short routes (Slant, Flat, Screen)
    if dx <= 0:
        return 'Screen'
    elif 0 < dx <= 3:
        if direction == 'right':
            if sof == 'right':
                if dy >= 0 and 30 < dir < 70:
                    return 'Slant'
                elif dy < 0 and 110 < dir < 160:
                    return 'Flat' 
            elif sof == 'left':
                if dy >= 0 and 110 < dir < 160:
                    return 'Flat'
                elif dy < 0 and 30 < dir < 70:
                    return 'Slant'
        if direction == 'left':
            if sof == 'right':
                if dy >= 0 and 290 < dir < 340:
                    return 'Flat' 
                elif dy < 0 and 200 < dir < 250:
                    return 'Slant'
            elif sof == 'left':
                if dy > 2 and 290 < dir < 340:
                    return 'Slant' 
                elif dy < -2 and 200 < dir < 250:
                    return 'Flat' 

    # Hitch 
    elif dx <= 7 and accel < 1:
        return 'Hitch'

    # Quick routes (short cross/out)
    elif 3 < dx <= 7:
        if direction == 'right':
            if sof == 'right' and 110 < dir < 160:
                return 'Quick Out'
            elif sof == 'right' and 20 < dir < 70:
                return 'Quick In'
            elif sof == 'left' and  100 < dir < 160:
                return 'Quick In'
            else:
                return 'Quick Out'
        elif direction == 'left':
            if sof == 'right' and 290 < dir < 340:
                return 'Quick In'
            elif sof == 'right' and 200 < dir < 250:
                return 'Quick Out'
            elif sof == 'left' and 200 < dir < 250:
                return 'Quick Out'
            else:
                return 'Quick In'

    # Intermediate routes (curl/comeback/in/out)
    elif 7 < dx <= 12:
        if accel > 3:
            # Deep In/Out classification
            if direction == 'right':
                if sof == 'right':
                    return 'Deep In' if 20 < dir < 70 else 'Deep Out'
                elif sof == 'left':
                    return 'Deep Out' if 110 < dir < 160 else 'Deep In'
            elif direction == 'left':
                if sof == 'right':
                    return 'Deep Out' if dy > 0 else 'Deep In'
                elif sof == 'left':
                    return 'Deep In' if dy > 0 else 'Deep Out'
        elif accel <= 3:
            # Curl/Comeback classification
            if direction == 'right':
                if sof == 'right':
                    return 'Curl' if dy > 0 else 'Comeback'
                elif sof == 'left':
                    return 'Comeback' if dy > 0 else 'Curl'
            elif direction == 'left':
                if sof == 'right':
                    return 'Comeback' if dy > 0 else 'Curl'
                elif sof == 'left':
                    return 'Curl' if dy > 0 else 'Comeback'

    # Deep routes (Go, Post, Corner)
    elif dx > 12:
        if 75 <= dir <= 105 or 255 <= dir <= 285:
            return 'Go'
        else:
            if direction == 'right':
                if sof == 'right':
                    return 'Post' if dy > 0 else 'Corner'
                elif sof == 'left':
                    return 'Corner' if dy > 0 else 'Post'
            elif direction == 'left':
                if sof == 'right':
                    return 'Corner' if dy > 0 else 'Post'
                elif sof == 'left':
                    return 'Post' if dy > 0 else 'Corner'

    # Default case if none of the above match
    return 'Other'

route_summary['route'] = route_summary.apply(classify_route, axis=1)                    

route_summary.head()

Unnamed: 0,game_id,play_id,nfl_id,play_direction,qb_y,start_x,end_x,max_x,min_x,start_y,end_y,max_y,min_y,last_a,last_dir,delta_x,furthest_x,delta_y,side_of_formation,route
0,2023090700,101,43290,right,30.07,37.36,35.41,37.36,35.23,30.07,29.99,30.15,29.99,0.47,108.83,-1.95,0.0,-0.08,right,Screen
1,2023090700,101,44930,right,30.07,41.03,52.43,52.43,41.03,12.17,14.14,14.37,12.17,2.68,99.25,11.4,11.4,1.97,right,Curl
2,2023090700,101,53541,right,30.07,40.61,46.85,46.85,40.61,19.12,13.31,19.12,13.31,3.4,174.03,6.24,6.24,-5.81,right,Quick Out
3,2023090700,101,53959,right,30.07,40.91,50.6,50.6,40.91,35.14,36.43,36.73,35.14,4.11,107.03,9.69,9.69,1.29,left,Deep In
4,2023090700,194,41325,left,30.15,93.33,88.98,93.98,88.98,27.85,22.23,27.93,22.23,2.14,245.74,4.35,4.35,-5.62,left,Quick Out


In [None]:
#filter the input dataframe to rows needed for route classification
route_df = df_input[
    (df_input['player_side'] == 'Offense') & (df_input['frame_id'] <= 46)
    ][['game_id', 'play_id', 'nfl_id', 'play_direction', 'frame_id', 'x', 'y', 'a', 'dir']].sort_values(
        ['game_id', 'play_id', 'nfl_id', 'frame_id']
    ).copy()

#merge qb center y position
route_df = route_df.merge(center, on=['game_id', 'play_id'], how='left')

#aggregate measures to classify routes
route_summary = (route_df.groupby(['game_id', 'play_id', 'nfl_id', 'play_direction', 'qb_y'], as_index=False)
    .agg(
        start_x=('x', 'first'),
        end_x=('x', 'last'),
        max_x=('x', 'max'),
        min_x=('x', 'min'),
        start_y=('y', 'first'),
        end_y=('y', 'last'),
        max_y=('y', 'max'),
        min_y=('y', 'min'),
        last_a=('a', 'last'),
        last_dir=('dir', 'last')
    )
)

#calculate deltas and furthest_x for route classification
route_summary['delta_x'] = np.where(route_summary['play_direction'] == 'left', route_summary['start_x'] - route_summary['end_x'], route_summary['end_x'] - route_summary['start_x'])

route_summary['furthest_x'] = np.where(route_summary['play_direction'] == 'right', route_summary['max_x'] - route_summary['start_x'], abs(route_summary['min_x'] - route_summary['start_x']))
route_summary['delta_y'] = route_summary['end_y'] - route_summary['start_y']

#determine side of formation
route_summary['side_of_formation'] = np.where(
    ((route_summary['play_direction'] == 'right') & (route_summary['start_y'] > route_summary['qb_y'])), 'left',
    np.where(
        ((route_summary['play_direction'] == 'left') & (route_summary['start_y'] < route_summary['qb_y'])),
        'left',
        'right'
    )
)


#function to classify routes based on deltas
def classify_route(row):
    dx = row['delta_x']
    dy = row['delta_y']
    sof = row['side_of_formation']
    direction = row['play_direction']
    accel = row['last_a']
    dis = row.get('furthest_x', None)
    dir = row['last_dir']  

    # Short routes (Slant, Flat, Screen)
    if dx <= 0:
        return 'Screen'
    elif 0 < dx <= 3:
        if direction == 'right':
            if sof == 'right':
                if dy >= 0 and 30 < dir < 70:
                    return 'Slant'
                elif dy < 0 and 110 < dir < 160:
                    return 'Flat' 
            elif sof == 'left':
                if dy >= 0 and 110 < dir < 160:
                    return 'Flat'
                elif dy < 0 and 30 < dir < 70:
                    return 'Slant'
        if direction == 'left':
            if sof == 'right':
                if dy >= 0 and 290 < dir < 340:
                    return 'Flat' 
                elif dy < 0 and 200 < dir < 250:
                    return 'Slant'
            elif sof == 'left':
                if dy > 2 and 290 < dir < 340:
                    return 'Slant' 
                elif dy < -2 and 200 < dir < 250:
                    return 'Flat' 

    # Hitch 
    elif dx <= 7 and accel < 1:
        return 'Hitch'

    # Quick routes (short cross/out)
    elif 3 < dx <= 7:
        if direction == 'right':
            if sof == 'right' and 110 < dir < 160:
                return 'Quick Out'
            elif sof == 'right' and 20 < dir < 70:
                return 'Quick In'
            elif sof == 'left' and  100 < dir < 160:
                return 'Quick In'
            else:
                return 'Quick Out'
        elif direction == 'left':
            if sof == 'right' and 290 < dir < 340:
                return 'Quick In'
            elif sof == 'right' and 200 < dir < 250:
                return 'Quick Out'
            elif sof == 'left' and 200 < dir < 250:
                return 'Quick Out'
            else:
                return 'Quick In'

    # Intermediate routes (curl/comeback/in/out)
    elif 7 < dx <= 12:
        if accel > 3:
            # Deep In/Out classification
            if direction == 'right':
                if sof == 'right':
                    return 'Deep In' if 20 < dir < 70 else 'Deep Out'
                elif sof == 'left':
                    return 'Deep Out' if 110 < dir < 160 else 'Deep In'
            elif direction == 'left':
                if sof == 'right':
                    return 'Deep Out' if dy > 0 else 'Deep In'
                elif sof == 'left':
                    return 'Deep In' if dy > 0 else 'Deep Out'
        elif accel <= 3:
            # Curl/Comeback classification
            if direction == 'right':
                if sof == 'right':
                    return 'Curl' if dy > 0 else 'Comeback'
                elif sof == 'left':
                    return 'Comeback' if dy > 0 else 'Curl'
            elif direction == 'left':
                if sof == 'right':
                    return 'Comeback' if dy > 0 else 'Curl'
                elif sof == 'left':
                    return 'Curl' if dy > 0 else 'Comeback'

    # Deep routes (Go, Post, Corner)
    elif dx > 12:
        if 75 <= dir <= 105 or 255 <= dir <= 285:
            return 'Go'
        else:
            if direction == 'right':
                if sof == 'right':
                    return 'Post' if dy > 0 else 'Corner'
                elif sof == 'left':
                    return 'Corner' if dy > 0 else 'Post'
            elif direction == 'left':
                if sof == 'right':
                    return 'Corner' if dy > 0 else 'Post'
                elif sof == 'left':
                    return 'Post' if dy > 0 else 'Corner'

    # Default case if none of the above match
    return 'Other'

route_summary['route'] = route_summary.apply(classify_route, axis=1)                    

route_summary.head()

Unnamed: 0,game_id,play_id,nfl_id,play_direction,qb_y,start_x,end_x,max_x,min_x,start_y,end_y,max_y,min_y,last_a,last_dir,delta_x,furthest_x,delta_y,side_of_formation,route
0,2023090700,101,43290,right,30.07,37.36,35.41,37.36,35.23,30.07,29.99,30.15,29.99,0.47,108.83,-1.95,0.0,-0.08,right,Screen
1,2023090700,101,44930,right,30.07,41.03,52.43,52.43,41.03,12.17,14.14,14.37,12.17,2.68,99.25,11.4,11.4,1.97,right,Curl
2,2023090700,101,53541,right,30.07,40.61,46.85,46.85,40.61,19.12,13.31,19.12,13.31,3.4,174.03,6.24,6.24,-5.81,right,Quick Out
3,2023090700,101,53959,right,30.07,40.91,50.6,50.6,40.91,35.14,36.43,36.73,35.14,4.11,107.03,9.69,9.69,1.29,left,Deep In
4,2023090700,194,41325,left,30.15,93.33,88.98,93.98,88.98,27.85,22.23,27.93,22.23,2.14,245.74,4.35,4.35,-5.62,left,Quick Out


# Route Combos

In [9]:
route_combos = route_summary.groupby(['game_id', 'play_id', 'side_of_formation'], as_index=False).agg(
    route_combo = ('route', lambda x: '-'.join(sorted(x.dropna().astype(str).unique())))
)

route_combos.head(100)

Unnamed: 0,game_id,play_id,side_of_formation,route_combo
0,2023090700,101,left,Comeback
1,2023090700,101,right,Curl-Other-Screen
2,2023090700,194,left,Go-Other
3,2023090700,194,right,Corner-Hitch
4,2023090700,219,left,Hitch
...,...,...,...,...
95,2023090700,3233,right,Comeback-Hitch
96,2023090700,3324,left,Corner
97,2023090700,3324,right,Hitch-Other-Post
98,2023090700,3461,left,Go-Other


# Route Combo Analysis

In [None]:
# merge route combos with supplementary data for analysis
analysis = route_combos.merge(df_sup, on=['game_id', 'play_id'], how='left')

combo_analysis = analysis[['possession_team', 'route_combo', 'pass_result', 'pre_penalty_yards_gained']]

combo_analysis['binary'] = np.where(combo_analysis['pass_result'] == 'C', 1, 0)

#find completion percentages and average yards per route combination
combo_analysis = combo_analysis.groupby(['possession_team', 'route_combo'], as_index=False).agg(
    completions=('binary', 'sum'),
    attempts=('route_combo', 'count'),
    avg_yards=('pre_penalty_yards_gained', 'mean'),
    avg_yards_per_completion = ('pre_penalty_yards_gained', lambda x: x[combo_analysis['pass_result'] == 'C'].mean())
)

# caclulate the completion percentage
combo_analysis['completion_percentage'] = combo_analysis['completions'] / combo_analysis['attempts']

combo_analysis.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  combo_analysis['binary'] = np.where(combo_analysis['pass_result'] == 'C', 1, 0)


Unnamed: 0,possession_team,route_combo,completions,attempts,avg_yards,avg_yards_per_completion,completion_percentage
0,ARI,Comeback,5,8,7.375,11.8,0.625
1,ARI,Comeback-Corner-Curl-Hitch,1,1,-2.0,-2.0,1.0
2,ARI,Comeback-Corner-Flat-Go,1,1,38.0,38.0,1.0
3,ARI,Comeback-Corner-Hitch,1,1,28.0,28.0,1.0
4,ARI,Comeback-Curl,2,2,8.0,8.0,1.0
5,ARI,Comeback-Curl-Flat,0,2,0.0,,0.0
6,ARI,Comeback-Curl-Flat-Go,1,1,2.0,2.0,1.0
7,ARI,Comeback-Curl-Flat-Hitch-Other,1,1,9.0,9.0,1.0
8,ARI,Comeback-Curl-Flat-Screen,2,2,4.5,4.5,1.0
9,ARI,Comeback-Curl-Go-Screen,0,1,0.0,,0.0


# Defensive Analysis

In [37]:
def_analysis = analysis[['defensive_team', 'team_coverage_type', 'route_combo', 'pass_result', 'pre_penalty_yards_gained']]

def_analysis = def_analysis.groupby(['defensive_team', 'team_coverage_type', 'route_combo'], as_index=False).agg(
    completions=('pass_result', lambda x: (x == 'C').sum()),
    attempts = ('pass_result', 'count'),
    avg_yards = ('pre_penalty_yards_gained', 'mean'),
    avg_yards_per_completion = ('pre_penalty_yards_gained', lambda x: x[def_analysis['pass_result'] == 'C'].mean())
)

def_analysis['completion_percentage'] = def_analysis['completions'] / def_analysis['attempts']

def_analysis.head(10)

Unnamed: 0,defensive_team,team_coverage_type,route_combo,completions,attempts,avg_yards,avg_yards_per_completion,completion_percentage
0,ARI,COVER_0_MAN,Comeback,2,2,7.5,7.5,1.0
1,ARI,COVER_0_MAN,Comeback-Curl-Hitch,0,1,0.0,,0.0
2,ARI,COVER_0_MAN,Comeback-Curl-Other-Screen,1,1,5.0,5.0,1.0
3,ARI,COVER_0_MAN,Comeback-Flat-Hitch,1,1,7.0,7.0,1.0
4,ARI,COVER_0_MAN,Comeback-Flat-Other-Screen,1,1,17.0,17.0,1.0
5,ARI,COVER_0_MAN,Comeback-Hitch,0,1,0.0,,0.0
6,ARI,COVER_0_MAN,Comeback-Hitch-Other,0,1,0.0,,0.0
7,ARI,COVER_0_MAN,Comeback-Other,1,4,0.25,1.0,0.25
8,ARI,COVER_0_MAN,Comeback-Other-Quick Out,0,1,0.0,,0.0
9,ARI,COVER_0_MAN,Curl,1,1,1.0,1.0,1.0
