## Import Data

In [4]:
# import packages and set options

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from datetime import datetime
pd.set_option('display.max_rows', 20)

In [4]:
# read data in

games_df = pd.read_csv('../data/games.csv')
players_df = pd.read_csv('../data/players.csv')
plays_df = pd.read_csv('../data/plays.csv')
tackles_df = pd.read_csv('../data/tackles.csv')
tracking_week_1_df = pd.read_csv('../data/tracking_week_1.csv')
tracking_week_2_df = pd.read_csv('../data/tracking_week_2.csv')
tracking_week_3_df = pd.read_csv('../data/tracking_week_3.csv')
tracking_week_4_df = pd.read_csv('../data/tracking_week_4.csv')
tracking_week_5_df = pd.read_csv('../data/tracking_week_5.csv')
tracking_week_6_df = pd.read_csv('../data/tracking_week_6.csv')
tracking_week_7_df = pd.read_csv('../data/tracking_week_7.csv')
#tracking_week_8_df = pd.read_csv('tracking_week_8.csv')
#tracking_week_9_df = pd.read_csv('tracking_week_9.csv')

In [5]:
# append all tracking data into one dataframe
tracking_df = tracking_week_1_df.append(tracking_week_2_df, ignore_index=True)
tracking_df = tracking_df.append(tracking_week_3_df, ignore_index=True)
tracking_df = tracking_df.append(tracking_week_4_df, ignore_index=True)
tracking_df = tracking_df.append(tracking_week_5_df, ignore_index=True)
tracking_df = tracking_df.append(tracking_week_6_df, ignore_index=True)
tracking_df = tracking_df.append(tracking_week_7_df, ignore_index=True)

  tracking_df = tracking_week_1_df.append(tracking_week_2_df, ignore_index=True)
  tracking_df = tracking_df.append(tracking_week_3_df, ignore_index=True)
  tracking_df = tracking_df.append(tracking_week_4_df, ignore_index=True)
  tracking_df = tracking_df.append(tracking_week_5_df, ignore_index=True)
  tracking_df = tracking_df.append(tracking_week_6_df, ignore_index=True)
  tracking_df = tracking_df.append(tracking_week_7_df, ignore_index=True)


## Data Preprocessing and Basic Feature Engineering

In [6]:
## players_df edits

# converting heights

# height dictionary
height_mapping = {'5-10': 70, '5-11': 71, '5-6': 66, '5-7': 67, '5-8': 68,
                  '5-9': 69, '6-0': 72, '6-1': 73, '6-2': 74, '6-3': 75,
                  '6-4': 76, '6-5': 77, '6-6': 78, '6-7': 79, '6-8': 80, '6-9': 81}

def height_to_inches(height_str):
    return height_mapping[height_str]

# Apply the function to create a new 'height_in_inch' column
players_df['height_in_inch'] = players_df['height'].apply(height_to_inches)

# converting ages

# Convert birthdates to datetime objects
birthdates_datetime = pd.to_datetime(players_df['birthDate'], errors='coerce')

# Calculate age based on current date
current_date = datetime.now()
ages = (current_date - birthdates_datetime).dt.days / 365.25

# Create a DataFrame with birthdates and ages
players_df['age'] = ages

# creating position groups

# size based / traditional mapping
position_group_mapping = {'C' : 'OL', 'CB' : 'DB', 'DB': 'DB', 'DE' : 'DL', 'DT': 'DL', 'FB' : 'HB',
                  'FS': 'DB', 'G': 'OL', 'ILB' : 'LB', 'LS': 'OL', 'MLB': 'LB', 'NT': 'DL',
                  'OLB': 'LB', 'QB': 'QB', 'RB': 'HB', 'SS': 'DB', 'T': 'OL', 'TE' : 'R', 'WR' : 'R'}

# role based mapping
position_role_mapping = {'C' : 'OL', 'CB' : 'CB', 'DB': 'DB', 'DE' : 'EDGE', 'DT': 'DL', 'FB' : 'TE',
                  'FS': 'DB', 'G': 'OL', 'ILB' : 'LB', 'LS': 'OL', 'MLB': 'LB', 'NT': 'DL',
                  'OLB': 'EDGE', 'QB': 'QB', 'RB': 'HB', 'SS': 'DB', 'T': 'OL', 'TE' : 'TE', 'WR' : 'WR'}


def position_to_pos_group(position):
    return position_group_mapping[position]

def position_to_pos_role(position):
    return position_role_mapping[position]

players_df['position_group'] = players_df['position'].apply(position_to_pos_group)
players_df['position_role'] = players_df['position'].apply(position_to_pos_role)

# select relevant columns
players_df_set = players_df[['nflId','height_in_inch', 'weight',
                         'position','position_group','position_role','displayName']]

In [7]:
# plays_df edits

def calculate_time_remaining(quarter, gameclock):
    def convert_to_seconds(gameclock_str):
        minutes, seconds = map(int, gameclock_str.split(':'))
        return minutes * 60 + seconds

    # Convert 'gameclock' to total seconds
    total_seconds = gameclock.apply(convert_to_seconds)

    # Adjust time based on 'quarter'
    adjusted_time = total_seconds + (4 - quarter) * 900

    return adjusted_time

plays_df['time_remaining'] = calculate_time_remaining(plays_df['quarter'], plays_df['gameClock'])

# drop plays nullified by penalty
plays_df = plays_df[plays_df["playNullifiedByPenalty"] == 'N']

plays_df_set = plays_df[['gameId', 'playId','ballCarrierId', 'ballCarrierDisplayName', 
                         'quarter', 'down', 'yardsToGo', 'time_remaining',
                    'absoluteYardlineNumber', 'gameClock', 'preSnapHomeScore', 'preSnapVisitorScore',
                    'passLength', 'offenseFormation', 'defendersInTheBox', 'passProbability',
                    'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability','defensiveTeam']]

In [6]:
plays_df = pd.read_csv('../../data/plays.csv')
pd.set_option('display.max_columns', None)
plays_df

Unnamed: 0,gameId,playId,ballCarrierId,ballCarrierDisplayName,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,gameClock,preSnapHomeScore,preSnapVisitorScore,passResult,passLength,penaltyYards,prePenaltyPlayResult,playResult,playNullifiedByPenalty,absoluteYardlineNumber,offenseFormation,defendersInTheBox,passProbability,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,expectedPoints,expectedPointsAdded,foulName1,foulName2,foulNFLId1,foulNFLId2
0,2022100908,3537,48723,Parker Hesse,(7:52) (Shotgun) M.Mariota pass short middle t...,4,1,10,ATL,TB,ATL,41,7:52,21,7,C,6.0,,9,9,N,69,SHOTGUN,7.0,0.747284,0.976785,0.023215,-0.006110,0.006110,2.360609,0.981955,,,,
1,2022091103,3126,52457,Chase Claypool,(7:38) (Shotgun) C.Claypool right end to PIT 3...,4,1,10,PIT,CIN,PIT,34,7:38,14,20,,,,3,3,N,76,SHOTGUN,7.0,0.416454,0.160485,0.839515,-0.010865,0.010865,1.733344,-0.263424,,,,
2,2022091111,1148,42547,Darren Waller,(8:57) D.Carr pass short middle to D.Waller to...,2,2,5,LV,LAC,LV,30,8:57,10,3,C,11.0,,15,15,N,40,I_FORM,6.0,0.267933,0.756661,0.243339,-0.037409,0.037409,1.312855,1.133666,,,,
3,2022100212,2007,46461,Mike Boone,(13:12) M.Boone left tackle to DEN 44 for 7 ya...,3,2,10,DEN,LV,DEN,37,13:12,19,16,,,,7,7,N,47,SINGLEBACK,6.0,0.592704,0.620552,0.379448,-0.002451,0.002451,1.641006,-0.043580,,,,
4,2022091900,1372,47857,Devin Singletary,(8:33) D.Singletary right guard to TEN 32 for ...,2,1,10,BUF,TEN,TEN,35,8:33,7,7,,,,3,3,N,75,I_FORM,7.0,0.470508,0.836290,0.163710,0.001053,-0.001053,3.686428,-0.167903,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12481,2022100204,123,43293,Ezekiel Elliott,(13:31) E.Elliott right tackle to WAS 38 for 1...,1,1,10,DAL,WAS,WAS,39,13:31,0,0,,,,1,1,N,49,SINGLEBACK,6.0,0.577800,0.638600,0.361400,-0.011542,0.011542,3.642571,-0.504018,,,,
12482,2022091200,3467,46189,Will Dissly,(6:08) G.Smith pass short right to W.Dissly to...,4,1,10,SEA,DEN,SEA,30,6:08,17,16,C,0.0,,2,2,N,40,SINGLEBACK,7.0,0.298983,0.615241,0.384759,-0.025458,0.025458,1.434580,-0.444642,,,,
12483,2022101605,3371,44860,Joe Mixon,(9:35) (Shotgun) J.Mixon left end to CIN 47 fo...,4,1,10,CIN,NO,CIN,41,9:35,26,21,,,,6,6,N,69,SHOTGUN,6.0,0.639439,0.667054,0.332946,-0.005164,0.005164,2.115356,0.203819,,,,
12484,2022100207,2777,52449,Jonathan Taylor,(2:02) (Shotgun) J.Taylor up the middle to TEN...,3,1,10,IND,TEN,TEN,34,2:02,17,24,,,,-2,-2,N,44,SHOTGUN,6.0,0.518695,0.410611,0.589389,-0.046648,0.046648,3.946232,-0.976039,,,,


In [8]:
# tackles_df edits

tackles_df = tackles_df.sort_values(by=['gameId', 'playId'])
tackles_df['season_rolling_tackle_ct'] = tackles_df.groupby('nflId')['tackle'].cumsum()
tackles_df['game_rolling_tackle_ct'] = tackles_df.groupby(['gameId', 'nflId'])['tackle'].cumsum()
tackles_df['season_rolling_missed_tackle_ct'] = tackles_df.groupby('nflId')['pff_missedTackle'].cumsum()
tackles_df['game_rolling_missed_tackle_ct'] = tackles_df.groupby(['gameId', 'nflId'])['pff_missedTackle'].cumsum()

tackles_df_set = tackles_df[['gameId', 'playId', 'nflId','tackle', 'assist', 'forcedFumble', 'pff_missedTackle',
                            'season_rolling_tackle_ct','game_rolling_tackle_ct','season_rolling_missed_tackle_ct','game_rolling_missed_tackle_ct']]

In [9]:
# take subset of tracking_df

tracking_df_set = tracking_df[['gameId', 'playId', 'nflId', 'displayName', 'frameId', 'time'
                               , 'club', 'playDirection', 'x', 'y', 's', 'a', 'dis', 'o','dir', 'event']]

In [22]:
# merge all available data to tracking data

one_merge = pd.merge(tracking_df_set, games_df, on='gameId')
two_merge = pd.merge(one_merge, players_df_set, on='nflId')
three_merge = pd.merge(two_merge, plays_df_set, on=['gameId', 'playId'])
final_merge = pd.merge(three_merge, tackles_df_set, on=['gameId', 'playId', 'nflId'], how="left")

final_merge.head()

# dropping about 61193 frames because of missing/unexplained data
# tracking_week_1_df[tracking_week_1_df["displayName"] == "football"]

Unnamed: 0,gameId,playId,nflId,displayName_x,frameId,time,club,playDirection,x,y,s,a,dis,o,dir,event,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore,height_in_inch,weight,position,position_group,position_role,displayName_y,ballCarrierId,ballCarrierDisplayName,quarter,down,yardsToGo,time_remaining,absoluteYardlineNumber,gameClock,preSnapHomeScore,preSnapVisitorScore,passLength,offenseFormation,defendersInTheBox,passProbability,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,defensiveTeam,tackle,assist,forcedFumble,pff_missedTackle,season_rolling_tackle_ct,game_rolling_tackle_ct,season_rolling_missed_tackle_ct,game_rolling_missed_tackle_ct
0,2022090800,56,35472.0,Rodger Saffold,6,2022-09-08 20:24:05.700000,BUF,left,88.8,26.7,1.15,1.42,0.12,234.48,139.41,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,77,325,G,OL,OL,Rodger Saffold,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,
1,2022090800,56,38577.0,Bobby Wagner,6,2022-09-08 20:24:05.700000,LA,left,78.11,28.74,3.35,2.62,0.32,349.47,357.71,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,72,242,ILB,LB,LB,Bobby Wagner,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,
2,2022090800,56,41239.0,Aaron Donald,6,2022-09-08 20:24:05.700000,LA,left,92.15,29.96,3.62,2.86,0.37,186.16,157.65,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,73,280,DT,DL,DL,Aaron Donald,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,
3,2022090800,56,42392.0,Mitch Morse,6,2022-09-08 20:24:05.700000,BUF,left,88.21,29.31,1.42,0.64,0.14,282.32,347.15,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,78,305,C,OL,OL,Mitch Morse,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,
4,2022090800,56,42489.0,Stefon Diggs,6,2022-09-08 20:24:05.700000,BUF,left,79.85,35.59,4.61,4.82,0.45,114.27,202.2,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,72,191,WR,R,WR,Stefon Diggs,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,


In [27]:
# filter by frames in which the pass is caught
pass_caught = final_merge[final_merge['event'] == 'pass_outcome_caught']

In [29]:
# join all players to ball carrier information

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)
joined_with_bc = pd.merge(pass_caught,
                          final_merge[['displayName_x','gameId', 'playId', 'frameId','x', 'y', 's', 'a',
            'dis', 'o', 'dir', 'weight', 'position', 'nflId', 'height_in_inch', 'position_group','position_role']],
                     left_on=['gameId', 'playId', 'frameId', 'ballCarrierId'],
                     right_on=['gameId', 'playId', 'frameId', 'nflId'],
                     how='left', suffixes=('_defense', '_offense'))
joined_with_bc.head(5)

Unnamed: 0,gameId,playId,nflId_defense,displayName_x_defense,frameId,time,club,playDirection,x_defense,y_defense,s_defense,a_defense,dis_defense,o_defense,dir_defense,event,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore,height_in_inch_defense,weight_defense,position_defense,position_group_defense,position_role_defense,displayName_y,ballCarrierId,ballCarrierDisplayName,quarter,down,yardsToGo,time_remaining,absoluteYardlineNumber,gameClock,preSnapHomeScore,preSnapVisitorScore,passLength,offenseFormation,defendersInTheBox,passProbability,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,defensiveTeam,tackle,assist,forcedFumble,pff_missedTackle,season_rolling_tackle_ct,game_rolling_tackle_ct,season_rolling_missed_tackle_ct,game_rolling_missed_tackle_ct,displayName_x_offense,x_offense,y_offense,s_offense,a_offense,dis_offense,o_offense,dir_offense,weight_offense,position_offense,nflId_offense,height_in_inch_offense,position_group_offense,position_role_offense
0,2022090800,56,35472.0,Rodger Saffold,6,2022-09-08 20:24:05.700000,BUF,left,88.8,26.7,1.15,1.42,0.12,234.48,139.41,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,77,325,G,OL,OL,Rodger Saffold,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.2,191,WR,42489.0,72,R,WR
1,2022090800,56,38577.0,Bobby Wagner,6,2022-09-08 20:24:05.700000,LA,left,78.11,28.74,3.35,2.62,0.32,349.47,357.71,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,72,242,ILB,LB,LB,Bobby Wagner,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.2,191,WR,42489.0,72,R,WR
2,2022090800,56,41239.0,Aaron Donald,6,2022-09-08 20:24:05.700000,LA,left,92.15,29.96,3.62,2.86,0.37,186.16,157.65,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,73,280,DT,DL,DL,Aaron Donald,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.2,191,WR,42489.0,72,R,WR
3,2022090800,56,42392.0,Mitch Morse,6,2022-09-08 20:24:05.700000,BUF,left,88.21,29.31,1.42,0.64,0.14,282.32,347.15,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,78,305,C,OL,OL,Mitch Morse,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.2,191,WR,42489.0,72,R,WR
4,2022090800,56,42489.0,Stefon Diggs,6,2022-09-08 20:24:05.700000,BUF,left,79.85,35.59,4.61,4.82,0.45,114.27,202.2,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,72,191,WR,R,WR,Stefon Diggs,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.68996,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.2,191,WR,42489.0,72,R,WR


In [30]:
#defense_only = joined_with_bc[joined_with_bc['club'] == joined_with_bc['defensiveTeam']]

joined_with_bc["offense_score"] = np.where(joined_with_bc['homeTeamAbbr'] == joined_with_bc['defensiveTeam'],
                                           joined_with_bc["preSnapVisitorScore"], joined_with_bc["preSnapHomeScore"])
joined_with_bc["defense_score"] = np.where(joined_with_bc['homeTeamAbbr'] == joined_with_bc['defensiveTeam'],
                                           joined_with_bc["preSnapHomeScore"], joined_with_bc["preSnapVisitorScore"])
joined_with_bc["differential"] = joined_with_bc["defense_score"] - joined_with_bc["offense_score"]

joined_with_bc["offense_win_prob"] = np.where(joined_with_bc['homeTeamAbbr'] == joined_with_bc['defensiveTeam'],
                                           joined_with_bc["preSnapVisitorTeamWinProbability"], joined_with_bc["preSnapHomeTeamWinProbability"])
joined_with_bc["defense_win_prob"] = np.where(joined_with_bc['homeTeamAbbr'] == joined_with_bc['defensiveTeam'],
                                           joined_with_bc["preSnapHomeTeamWinProbability"], joined_with_bc["preSnapVisitorTeamWinProbability"])
joined_with_bc["prob_differential"] = joined_with_bc["defense_win_prob"] - joined_with_bc["offense_win_prob"]


In [32]:
# distance to ball carrier
def calculate_distance(row):
    return ((row['x_defense'] - row['x_offense'])**2 + (row['y_defense'] - row['y_offense'])**2)**0.5

# Apply the function to create a new column 'distance'
joined_with_bc['dist_to_bc'] = joined_with_bc.apply(calculate_distance, axis=1)

# adds column for defense
joined_with_bc['is_defense'] = (joined_with_bc['club'] == joined_with_bc['defensiveTeam']).astype(int)

# number of offensive obstacles
#joined_with_bc['num_blockers'] = joined_with_bc[joined_with_bc['is_defense'] == 0].groupby('playId')['dist_to_bc'].transform(lambda x: x.lt(joined_with_bc.loc[joined_with_bc['is_defense'] == 1, 'dist_to_bc']).sum())

# create offense defense subsets
#offensive_players = joined_with_bc[joined_with_bc['is_defense'] == 0]
defensive_players = joined_with_bc[joined_with_bc['is_defense'] == 1]

# weight of offensive obstacles
#joined_with_bc['total_weight_blockers'] = offensive_players.groupby('playId')['weight_offense'].transform(
#    lambda x: x.where(joined_with_bc['dist_to_bc'].lt(joined_with_bc.loc[joined_with_bc['is_defense'] == 1, 'dist_to_bc'])).sum()
#)

# If you want to fill NaN with 0 for those who are not closer, you can do:
#joined_with_bc['total_weight_blockers'].fillna(0, inplace=True)

defensive_players = joined_with_bc[joined_with_bc['is_defense'] == 1]
# rank amongst defenders
defensive_players['rank_to_bc'] = defensive_players.groupby(['playId','gameId'])['dist_to_bc'].rank()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  defensive_players['rank_to_bc'] = defensive_players.groupby(['playId','gameId'])['dist_to_bc'].rank()


## TARGET VARIABLE ENGINEERING

In [35]:
defensive_players

Unnamed: 0,gameId,playId,nflId_defense,displayName_x_defense,frameId,time,club,playDirection,x_defense,y_defense,s_defense,a_defense,dis_defense,o_defense,dir_defense,event,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore,height_in_inch_defense,weight_defense,position_defense,position_group_defense,position_role_defense,displayName_y,ballCarrierId,ballCarrierDisplayName,quarter,down,yardsToGo,time_remaining,absoluteYardlineNumber,gameClock,preSnapHomeScore,preSnapVisitorScore,passLength,offenseFormation,defendersInTheBox,passProbability,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,defensiveTeam,tackle,assist,forcedFumble,pff_missedTackle,season_rolling_tackle_ct,game_rolling_tackle_ct,season_rolling_missed_tackle_ct,game_rolling_missed_tackle_ct,displayName_x_offense,x_offense,y_offense,s_offense,a_offense,dis_offense,o_offense,dir_offense,weight_offense,position_offense,nflId_offense,height_in_inch_offense,position_group_offense,position_role_offense,offense_score,defense_score,differential,offense_win_prob,defense_win_prob,prob_differential,dist_to_bc,is_defense,rank_to_bc
1,2022090800,56,38577.0,Bobby Wagner,6,2022-09-08 20:24:05.700000,LA,left,78.11,28.74,3.35,2.62,0.32,349.47,357.71,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,72,242,ILB,LB,LB,Bobby Wagner,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.689960,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.20,191,WR,42489.0,72,R,WR,0,0,0,0.586653,0.413347,-0.173306,7.067538,1,3.0
2,2022090800,56,41239.0,Aaron Donald,6,2022-09-08 20:24:05.700000,LA,left,92.15,29.96,3.62,2.86,0.37,186.16,157.65,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,73,280,DT,DL,DL,Aaron Donald,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.689960,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.20,191,WR,42489.0,72,R,WR,0,0,0,0.586653,0.413347,-0.173306,13.527265,1,9.0
5,2022090800,56,42816.0,Troy Hill,6,2022-09-08 20:24:05.700000,LA,left,70.62,7.66,2.60,4.14,0.27,331.57,278.33,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,71,184,CB,DB,CB,Troy Hill,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.689960,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.20,191,WR,42489.0,72,R,WR,0,0,0,0.586653,0.413347,-0.173306,29.415605,1,11.0
6,2022090800,56,43294.0,Jalen Ramsey,6,2022-09-08 20:24:05.700000,LA,left,78.15,37.85,5.88,1.23,0.59,140.96,178.50,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,73,208,CB,DB,CB,Jalen Ramsey,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.689960,0.413347,0.586653,LA,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.20,191,WR,42489.0,72,R,WR,0,0,0,0.586653,0.413347,-0.173306,2.828003,1,1.0
7,2022090800,56,43298.0,Leonard Floyd,6,2022-09-08 20:24:05.700000,LA,left,92.11,33.14,1.34,2.21,0.13,159.12,203.53,pass_outcome_caught,2022,1,09/08/2022,20:20:00,LA,BUF,10,31,77,240,DE,DL,EDGE,Leonard Floyd,42489,Stefon Diggs,1,1,10,3600,85,15:00,0,0,5.0,SHOTGUN,6.0,0.689960,0.413347,0.586653,LA,,,,,,,,,Stefon Diggs,79.85,35.59,4.61,4.82,0.45,114.27,202.20,191,WR,42489.0,72,R,WR,0,0,0,0.586653,0.413347,-0.173306,12.502404,1,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97078,2022091200,3826,43436.0,Quinton Jefferson,6,2022-09-12 23:05:53.500000,SEA,left,69.03,28.24,3.50,0.92,0.36,112.81,184.46,pass_outcome_caught,2022,1,09/12/2022,20:15:00,SEA,DEN,17,16,76,291,DE,DL,EDGE,Quinton Jefferson,53464,Javonte Williams,4,3,14,71,65,1:11,17,16,-6.0,SHOTGUN,3.0,0.916875,0.508994,0.491006,SEA,,,,,,,,,Javonte Williams,70.57,11.96,3.91,2.22,0.39,313.03,192.49,220,RB,53464.0,70,HB,HB,16,17,1,0.491006,0.508994,0.017987,16.352676,1,4.0
97079,2022091200,3826,42827.0,Justin Coleman,6,2022-09-12 23:05:53.500000,SEA,left,60.59,12.24,6.30,2.68,0.63,103.59,136.76,pass_outcome_caught,2022,1,09/12/2022,20:15:00,SEA,DEN,17,16,71,190,CB,DB,CB,Justin Coleman,53464,Javonte Williams,4,3,14,71,65,1:11,17,16,-6.0,SHOTGUN,3.0,0.916875,0.508994,0.491006,SEA,0.0,0.0,0.0,1.0,3.0,3.0,2.0,2.0,Javonte Williams,70.57,11.96,3.91,2.22,0.39,313.03,192.49,220,RB,53464.0,70,HB,HB,16,17,1,0.491006,0.508994,0.017987,9.983927,1,1.0
97080,2022091200,3826,44873.0,Josh Jones,6,2022-09-12 23:05:53.500000,SEA,left,49.62,32.42,4.63,4.05,0.44,143.62,159.31,pass_outcome_caught,2022,1,09/12/2022,20:15:00,SEA,DEN,17,16,74,220,FS,DB,DB,Josh Jones,53464,Javonte Williams,4,3,14,71,65,1:11,17,16,-6.0,SHOTGUN,3.0,0.916875,0.508994,0.491006,SEA,,,,,,,,,Javonte Williams,70.57,11.96,3.91,2.22,0.39,313.03,192.49,220,RB,53464.0,70,HB,HB,16,17,1,0.491006,0.508994,0.017987,29.283342,1,8.0
97083,2022091200,3826,54505.0,Boye Mafe,6,2022-09-12 23:05:53.500000,SEA,left,71.56,25.09,4.80,3.11,0.47,183.52,187.27,pass_outcome_caught,2022,1,09/12/2022,20:15:00,SEA,DEN,17,16,76,265,OLB,LB,EDGE,Boye Mafe,53464,Javonte Williams,4,3,14,71,65,1:11,17,16,-6.0,SHOTGUN,3.0,0.916875,0.508994,0.491006,SEA,,,,,,,,,Javonte Williams,70.57,11.96,3.91,2.22,0.39,313.03,192.49,220,RB,53464.0,70,HB,HB,16,17,1,0.491006,0.508994,0.017987,13.167270,1,2.0


In [78]:
# how to handle assists, forced fumbles
#tackle_prob_df['tackle_prob'] = np.where(tackle_prob_df['tackle'] == 1, 1, np.where(tackle_prob_df['assist'] == 1, 0.5, 0))


defensive_players['tackle_prob'] = np.where(defensive_players['tackle'] == 1, 1, 0)
defensive_players.fillna(0, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  defensive_players['tackle_prob'] = np.where(defensive_players['tackle'] == 1, 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  defensive_players.fillna(0, inplace=True)


## EXPORT DATA

In [79]:
defensive_players.to_csv('../model/defensive_players.csv', index=False)