In [8]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [34]:

#import dataset

df_raw = pd.read_csv('data/processed/df_clean.csv')

cols_to_use = [
      'gameId', 'playId', 'frameId', 'nflId', 'displayName',
      'position', 'club', 'possessionTeam', 'defensiveTeam',
      'preSnapHomeScore', 'preSnapVisitorScore', 'quarter',
      'gameClock', 'down', 'yardsToGo', 'yardlineNumber',
      'yardlineSide', 'offenseFormation', 'receiverAlignment',
      'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability',
      'o_clean', 'a_clean', 's_clean', 'x_clean', 'y_clean', 'dir_clean',
      'playDescription', 'passLocationType', 'rushLocationType', 
      'pff_runConceptPrimary', 'yardsGained', 'wasInitialPassRusher', 'event'
      ]

df = df_raw[cols_to_use]



In [35]:


#we need to group by game_id, play_id, and player_id, then find the x coordinate for the row where event = "line_set" and the row where event = "ball_snapped"

#then we can subtract the x coordinate for the "line_set" row from the x coordinate for the "ball_snapped" row

#we can then add this value to the dataframe as a new column
#actually since we're removing all frames between line_set and ball snapped we can just take the difference in the x coordinates between the two rows

#group df by unique game_id, play_id, player_id, and find the difference in x coord between two rows (line_set and ball_snapped), will be positive for crept forward
grouped_df = df.groupby(['gameId', 'playId', 'nflId'])
crept_up_dict = grouped_df['x_clean'].apply(lambda g: g.diff().iloc[-1]).to_dict()

#now we can get rid of line_set rows
df = df[df['event'] == 'line_set']

# add crept up distance to df by mapping the dictionary to the index of the df
df['creptDist'] = df.set_index(['gameId', 'playId', 'nflId']).index.map(crept_up_dict)

print(df.head())

columns = ['displayName', 'position', 'o_clean', 'a_clean', 's_clean', 'x_clean', 'y_clean', 'dir_clean',  'wasInitialPassRusher', 'event']
print(df[columns].head())

       gameId  playId  frameId  nflId      displayName position club  \
0  2022090800      56       76  35472   Rodger Saffold        G  BUF   
2  2022090800      56       76  42392      Mitch Morse        C  BUF   
4  2022090800      56       76  42489     Stefon Diggs       WR  BUF   
6  2022090800      56       76  44875     Dion Dawkins        T  BUF   
8  2022090800      56       76  44985  Isaiah McKenzie       WR  BUF   

  possessionTeam defensiveTeam  preSnapHomeScore  ...  y_clean  dir_clean  \
0            BUF            LA                 0  ...     2816       2563   
2            BUF            LA                 0  ...     2957       2844   
4            BUF            LA                 0  ...     2791       1775   
6            BUF            LA                 0  ...     2634       3497   
8            BUF            LA                 0  ...     1817       3047   

                                     playDescription  passLocationType  \
0  (15:00) (Shotgun) J.Allen p

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['creptDist'] = df.set_index(['gameId', 'playId', 'nflId']).index.map(crept_up_dict)


In [38]:

columns = ['gameId', 'playId','frameId', 'displayName', 'o_clean', 'a_clean', 's_clean', 'x_clean', 'y_clean', 'dir_clean',  'wasInitialPassRusher', 'event', 'creptDist']
print(df[(df['displayName']=="Aaron Donald") & (df['playId']==56)][columns].head(10))

            gameId  playId  frameId   displayName  o_clean  a_clean  s_clean  \
24      2022090800      56       76  Aaron Donald      721        9        9   
107934  2022091807      56       66  Aaron Donald      946      136       45   

        x_clean  y_clean  dir_clean  wasInitialPassRusher     event  creptDist  
24         3606     3219        856                   1.0  line_set      -22.0  
107934     3618     2078        199                   1.0  line_set      -82.0  


In [13]:
print(df['creptDist'].describe())

count    293875.000000
mean         -2.330746
std         109.062958
min       -1403.000000
25%         -25.000000
50%          -2.000000
75%           9.000000
max        1399.000000
Name: creptDist, dtype: float64


In [None]:

#now calculate horizontal distance to the ball for each player
# we get the y coord. of the ball by using the y coord. of the center.

#so we want to group by game_id, play_id, get the y coord of the player with "position" = "C" 
#then loop through each player on the defense and calculate the horizontal distance to the ball by subtracting their y coord. with the y coord. of the center
#then we'll add this value to the dataframe as a new column

#group df by unique game_id, play_id, player_id, and find the y coord of the player with position = "C"
grouped_df = df.groupby(['game_id', 'play_id'])
center_y_dict = grouped_df['y'].apply(lambda g: g[g['position'] == 'C'].iloc[0]).to_dict()

#add center_y to df by mapping the dictionary to the index of the df
df['center_y'] = df.set_index(['game_id', 'play_id']).index.map(center_y_dict)

#calculate horizontal distance to the ball for each player
df['hDist'] = df['y'] - df['center_y']

print(df.head())



In [None]:

# now we need to calculate distance from teh line of scrimmage, which is the difference between the x coordinate of the player and the line of scrimmage
# we can simply take the difference in x coordinates between each player and "yardlineNumber" for each play

df['losDist'] = df['x'] - df['yardlineNumber']

#and the final feature is just the players orientation i think?

features = ['wasInitialPassRusher', 'creptDist', 'hDist', 'losDist', 'o']

df_model = df[features]
