In [1]:
import pandas as pd
import glob
import requests
import sqlite3

In [2]:
# Load Teams API data
teams_json = requests.get('https://site.api.espn.com/apis/site/v2/sports/football/nfl/teams').json()
teams = teams_json['sports'][0]['leagues'][0]['teams']

team_dict = []

for t in teams:
    team_dict.append({
        'id': t['team']['id'],
        'color': t['team']['color'],
        'alternateColor': t['team']['color'],
        'logo': t['team']['logos'][0]['href'],
        'abbreviation': t['team']['abbreviation'],
        'displayName': t['team']['displayName'],
        'location': t['team']['location'],
        'name': t['team']['name'],
        'nickname': t['team']['nickname'],
        'shortDisplayName': t['team']['shortDisplayName'],
    })

# Create Teams DataFrame using team abbreviations as index
team_df = pd.DataFrame(team_dict).set_index('abbreviation')
team_df

Unnamed: 0_level_0,id,color,alternateColor,logo,displayName,location,name,nickname,shortDisplayName
abbreviation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ARI,22,a4113e,a4113e,https://a.espncdn.com/i/teamlogos/nfl/500/ari.png,Arizona Cardinals,Arizona,Cardinals,Cardinals,Cardinals
ATL,1,a71930,a71930,https://a.espncdn.com/i/teamlogos/nfl/500/atl.png,Atlanta Falcons,Atlanta,Falcons,Falcons,Falcons
BAL,33,29126f,29126f,https://a.espncdn.com/i/teamlogos/nfl/500/bal.png,Baltimore Ravens,Baltimore,Ravens,Ravens,Ravens
BUF,2,00338d,00338d,https://a.espncdn.com/i/teamlogos/nfl/500/buf.png,Buffalo Bills,Buffalo,Bills,Bills,Bills
CAR,29,0085ca,0085ca,https://a.espncdn.com/i/teamlogos/nfl/500/car.png,Carolina Panthers,Carolina,Panthers,Panthers,Panthers
CHI,3,0b1c3a,0b1c3a,https://a.espncdn.com/i/teamlogos/nfl/500/chi.png,Chicago Bears,Chicago,Bears,Bears,Bears
CIN,4,fb4f14,fb4f14,https://a.espncdn.com/i/teamlogos/nfl/500/cin.png,Cincinnati Bengals,Cincinnati,Bengals,Bengals,Bengals
CLE,5,472a08,472a08,https://a.espncdn.com/i/teamlogos/nfl/500/cle.png,Cleveland Browns,Cleveland,Browns,Browns,Browns
DAL,6,002a5c,002a5c,https://a.espncdn.com/i/teamlogos/nfl/500/dal.png,Dallas Cowboys,Dallas,Cowboys,Cowboys,Cowboys
DEN,7,0a2343,0a2343,https://a.espncdn.com/i/teamlogos/nfl/500/den.png,Denver Broncos,Denver,Broncos,Broncos,Broncos


In [3]:
# load 2024 games DF
games_df = pd.concat(map(pd.read_csv, glob.glob(f'data/nfl-big-data-bowl-2024/games.csv')))

games_df['homeTeamAbbr'] = games_df['homeTeamAbbr'].replace('LA', 'LAR').replace('WAS', 'WSH')
games_df['visitorTeamAbbr'] = games_df['visitorTeamAbbr'].replace('LA', 'LAR').replace('WAS', 'WSH')

games_df = games_df.set_index('gameId')
games_df.head()

Unnamed: 0_level_0,season,week,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,homeFinalScore,visitorFinalScore
gameId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022090800,2022,1,09/08/2022,20:20:00,LAR,BUF,10,31
2022091100,2022,1,09/11/2022,13:00:00,ATL,NO,26,27
2022091101,2022,1,09/11/2022,13:00:00,CAR,CLE,24,26
2022091102,2022,1,09/11/2022,13:00:00,CHI,SF,19,10
2022091103,2022,1,09/11/2022,13:00:00,CIN,PIT,20,23


In [4]:
# plays_df = pd.concat(map(pd.read_csv, glob.glob(f'data/nfl-big-data-bowl-*/plays.csv')))
# Import 2024 Plays DF
plays_df = pd.read_csv('data/nfl-big-data-bowl-2024/plays.csv')

plays_df['possessionTeam'] = plays_df['possessionTeam'].replace('LA', 'LAR').replace('WAS', 'WSH')
plays_df['defensiveTeam'] = plays_df['defensiveTeam'].replace('LA', 'LAR').replace('WAS', 'WSH')
# plays_df['converted'] = (plays_df['playResult'] > plays_df['yardsToGo'])

plays_df.head()

Unnamed: 0,gameId,playId,ballCarrierId,ballCarrierDisplayName,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,...,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,expectedPoints,expectedPointsAdded,foulName1,foulName2,foulNFLId1,foulNFLId2
0,2022100908,3537,48723,Parker Hesse,(7:52) (Shotgun) M.Mariota pass short middle t...,4,1,10,ATL,TB,...,0.976785,0.023215,-0.00611,0.00611,2.360609,0.981955,,,,
1,2022091103,3126,52457,Chase Claypool,(7:38) (Shotgun) C.Claypool right end to PIT 3...,4,1,10,PIT,CIN,...,0.160485,0.839515,-0.010865,0.010865,1.733344,-0.263424,,,,
2,2022091111,1148,42547,Darren Waller,(8:57) D.Carr pass short middle to D.Waller to...,2,2,5,LV,LAC,...,0.756661,0.243339,-0.037409,0.037409,1.312855,1.133666,,,,
3,2022100212,2007,46461,Mike Boone,(13:12) M.Boone left tackle to DEN 44 for 7 ya...,3,2,10,DEN,LV,...,0.620552,0.379448,-0.002451,0.002451,1.641006,-0.04358,,,,
4,2022091900,1372,47857,Devin Singletary,(8:33) D.Singletary right guard to TEN 32 for ...,2,1,10,BUF,TEN,...,0.83629,0.16371,0.001053,-0.001053,3.686428,-0.167903,,,,


In [5]:
plays_df.columns

Index(['gameId', 'playId', 'ballCarrierId', 'ballCarrierDisplayName',
       'playDescription', 'quarter', 'down', 'yardsToGo', 'possessionTeam',
       'defensiveTeam', 'yardlineSide', 'yardlineNumber', 'gameClock',
       'preSnapHomeScore', 'preSnapVisitorScore', 'passResult', 'passLength',
       'penaltyYards', 'prePenaltyPlayResult', 'playResult',
       'playNullifiedByPenalty', 'absoluteYardlineNumber', 'offenseFormation',
       'defendersInTheBox', 'passProbability', 'preSnapHomeTeamWinProbability',
       'preSnapVisitorTeamWinProbability', 'homeTeamWinProbabilityAdded',
       'visitorTeamWinProbilityAdded', 'expectedPoints', 'expectedPointsAdded',
       'foulName1', 'foulName2', 'foulNFLId1', 'foulNFLId2'],
      dtype='object')

In [6]:
# Concatenate GameId and PlayID to create a unique ID for plays across games
# The dataset re-uses playid values across games
plays_df['play_uuid'] = plays_df['gameId'].astype(str) + '.' + plays_df['playId'].astype(str)
plays_df = plays_df.set_index('play_uuid')
plays_df.head()

Unnamed: 0_level_0,gameId,playId,ballCarrierId,ballCarrierDisplayName,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,...,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,expectedPoints,expectedPointsAdded,foulName1,foulName2,foulNFLId1,foulNFLId2
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022100908.3537,2022100908,3537,48723,Parker Hesse,(7:52) (Shotgun) M.Mariota pass short middle t...,4,1,10,ATL,TB,...,0.976785,0.023215,-0.00611,0.00611,2.360609,0.981955,,,,
2022091103.3126,2022091103,3126,52457,Chase Claypool,(7:38) (Shotgun) C.Claypool right end to PIT 3...,4,1,10,PIT,CIN,...,0.160485,0.839515,-0.010865,0.010865,1.733344,-0.263424,,,,
2022091111.1148,2022091111,1148,42547,Darren Waller,(8:57) D.Carr pass short middle to D.Waller to...,2,2,5,LV,LAC,...,0.756661,0.243339,-0.037409,0.037409,1.312855,1.133666,,,,
2022100212.2007,2022100212,2007,46461,Mike Boone,(13:12) M.Boone left tackle to DEN 44 for 7 ya...,3,2,10,DEN,LV,...,0.620552,0.379448,-0.002451,0.002451,1.641006,-0.04358,,,,
2022091900.1372,2022091900,1372,47857,Devin Singletary,(8:33) D.Singletary right guard to TEN 32 for ...,2,1,10,BUF,TEN,...,0.83629,0.16371,0.001053,-0.001053,3.686428,-0.167903,,,,


In [7]:
players_df = pd.read_csv('data/nfl-big-data-bowl-2024/players.csv').set_index('nflId')
players_df.head()

Unnamed: 0_level_0,height,weight,birthDate,collegeName,position,displayName
nflId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
25511,6-4,225,1977-08-03,Michigan,QB,Tom Brady
29550,6-4,328,1982-01-22,Arkansas,T,Jason Peters
29851,6-2,225,1983-12-02,California,QB,Aaron Rodgers
30842,6-6,267,1984-05-19,UCLA,TE,Marcedes Lewis
33084,6-4,217,1985-05-17,Boston College,QB,Matt Ryan


In [8]:
# (GITHUB COLAB AI) 
# prompt: load all CSV files with a name like "tracking_week_" into a tracking_df dataframe
import glob
tracking_df = pd.concat(map(pd.read_csv, glob.glob(f'data/nfl-big-data-bowl-2024/*week*.csv')))

In [9]:
# generate same Play-UUID for frame lookup
tracking_df['club'] = tracking_df['club'].replace('LA', 'LAR').replace('WAS', 'WSH')
tracking_df['play_uuid'] = tracking_df['gameId'].astype(str) + '.' + tracking_df['playId'].astype(str)
tracking_df['play_uuid'].head()

0    2022100600.90
1    2022100600.90
2    2022100600.90
3    2022100600.90
4    2022100600.90
Name: play_uuid, dtype: object

process plays_df to add features:
create the target feature, 'converted'

In [10]:
# Create the target 'converted' column based on the play result and yards to go for the first down
converted_list = [1 if row['playResult'] - row['yardsToGo'] >= 0 else 0 for index, row in plays_df.iterrows()]

plays_df['converted'] = converted_list             # converted=1 is a success for the offense
plays_df['blocked'] = (plays_df['converted'] == 0).astype(int)         # used in cumsum and cumcount below
plays_df.head()

Unnamed: 0_level_0,gameId,playId,ballCarrierId,ballCarrierDisplayName,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,...,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,expectedPoints,expectedPointsAdded,foulName1,foulName2,foulNFLId1,foulNFLId2,converted,blocked
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022100908.3537,2022100908,3537,48723,Parker Hesse,(7:52) (Shotgun) M.Mariota pass short middle t...,4,1,10,ATL,TB,...,-0.00611,0.00611,2.360609,0.981955,,,,,0,1
2022091103.3126,2022091103,3126,52457,Chase Claypool,(7:38) (Shotgun) C.Claypool right end to PIT 3...,4,1,10,PIT,CIN,...,-0.010865,0.010865,1.733344,-0.263424,,,,,0,1
2022091111.1148,2022091111,1148,42547,Darren Waller,(8:57) D.Carr pass short middle to D.Waller to...,2,2,5,LV,LAC,...,-0.037409,0.037409,1.312855,1.133666,,,,,1,0
2022100212.2007,2022100212,2007,46461,Mike Boone,(13:12) M.Boone left tackle to DEN 44 for 7 ya...,3,2,10,DEN,LV,...,-0.002451,0.002451,1.641006,-0.04358,,,,,0,1
2022091900.1372,2022091900,1372,47857,Devin Singletary,(8:33) D.Singletary right guard to TEN 32 for ...,2,1,10,BUF,TEN,...,0.001053,-0.001053,3.686428,-0.167903,,,,,0,1


In [11]:
# Categorize the type of play, pass or not_pass
plays_df['playType'] = 'not_pass'     # not_pass will be the default
plays_df.loc[plays_df['playDescription'].str.contains('pass'), 'playType'] = 'pass'

plays_df['playType'].value_counts()

playType
not_pass    6840
pass        5646
Name: count, dtype: int64

In [12]:
# Categorize the direction of play, or the ball
plays_df['ballDirection'] = 'none'    # 'none' will be the default value
plays_df.loc[plays_df['playDescription'].str.contains('right'), 'ballDirection'] = 'right'
plays_df.loc[plays_df['playDescription'].str.contains('left'), 'ballDirection'] = 'left'
plays_df.loc[plays_df['playDescription'].str.contains('middle'), 'ballDirection'] = 'middle'

plays_df['ballDirection'].value_counts()

ballDirection
right     4858
left      4703
middle    2919
none         6
Name: count, dtype: int64

In [13]:
plays_uuid_list = []
plays_uuid_list = plays_df.index.tolist()

narrowed_tracking_df = tracking_df[['play_uuid', 'time']]

unique_uuid_df = narrowed_tracking_df.drop_duplicates(subset='play_uuid', keep='first')

timestamp_list = []
for uuid in plays_uuid_list:
    time = unique_uuid_df[unique_uuid_df['play_uuid'] == uuid]['time'].values[0]
    timestamp_list.append(time)
    
plays_df['timestamp'] = timestamp_list   # now the plays_df has the timestamp for sorting
plays_df

Unnamed: 0_level_0,gameId,playId,ballCarrierId,ballCarrierDisplayName,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,...,expectedPointsAdded,foulName1,foulName2,foulNFLId1,foulNFLId2,converted,blocked,playType,ballDirection,timestamp
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022100908.3537,2022100908,3537,48723,Parker Hesse,(7:52) (Shotgun) M.Mariota pass short middle t...,4,1,10,ATL,TB,...,0.981955,,,,,0,1,pass,middle,2022-10-09 15:43:13.299999
2022091103.3126,2022091103,3126,52457,Chase Claypool,(7:38) (Shotgun) C.Claypool right end to PIT 3...,4,1,10,PIT,CIN,...,-0.263424,,,,,0,1,not_pass,right,2022-09-11 15:34:11.099999
2022091111.1148,2022091111,1148,42547,Darren Waller,(8:57) D.Carr pass short middle to D.Waller to...,2,2,5,LV,LAC,...,1.133666,,,,,1,0,pass,middle,2022-09-11 17:12:12.400000
2022100212.2007,2022100212,2007,46461,Mike Boone,(13:12) M.Boone left tackle to DEN 44 for 7 ya...,3,2,10,DEN,LV,...,-0.043580,,,,,0,1,not_pass,left,2022-10-02 18:11:41.299999
2022091900.1372,2022091900,1372,47857,Devin Singletary,(8:33) D.Singletary right guard to TEN 32 for ...,2,1,10,BUF,TEN,...,-0.167903,,,,,0,1,not_pass,right,2022-09-19 20:12:02.900000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022100204.123,2022100204,123,43293,Ezekiel Elliott,(13:31) E.Elliott right tackle to WAS 38 for 1...,1,1,10,DAL,WSH,...,-0.504018,,,,,0,1,not_pass,right,2022-10-02 13:05:05.200000
2022091200.3467,2022091200,3467,46189,Will Dissly,(6:08) G.Smith pass short right to W.Dissly to...,4,1,10,SEA,DEN,...,-0.444642,,,,,0,1,pass,right,2022-09-12 22:47:31.500000
2022101605.3371,2022101605,3371,44860,Joe Mixon,(9:35) (Shotgun) J.Mixon left end to CIN 47 fo...,4,1,10,CIN,NO,...,0.203819,,,,,0,1,not_pass,left,2022-10-16 15:29:41.000000
2022100207.2777,2022100207,2777,52449,Jonathan Taylor,(2:02) (Shotgun) J.Taylor up the middle to TEN...,3,1,10,IND,TEN,...,-0.976039,,,,,0,1,not_pass,middle,2022-10-02 15:09:58.700000


In [14]:

# Create a 'week' column for processing through the dataset as well as a possible feature
plays_df['timestamp'] = pd.to_datetime(plays_df['timestamp'])  # Convert 'timestamp' to datetime format
start_date = pd.to_datetime(plays_df['timestamp'].min()) - pd.Timedelta(days=1)

# Calculate the week number based on the start date
plays_df['week'] = (plays_df['timestamp'] - start_date).dt.days // 7 + 1
plays_df.head()

Unnamed: 0_level_0,gameId,playId,ballCarrierId,ballCarrierDisplayName,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,...,foulName1,foulName2,foulNFLId1,foulNFLId2,converted,blocked,playType,ballDirection,timestamp,week
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022100908.3537,2022100908,3537,48723,Parker Hesse,(7:52) (Shotgun) M.Mariota pass short middle t...,4,1,10,ATL,TB,...,,,,,0,1,pass,middle,2022-10-09 15:43:13.299999,5
2022091103.3126,2022091103,3126,52457,Chase Claypool,(7:38) (Shotgun) C.Claypool right end to PIT 3...,4,1,10,PIT,CIN,...,,,,,0,1,not_pass,right,2022-09-11 15:34:11.099999,1
2022091111.1148,2022091111,1148,42547,Darren Waller,(8:57) D.Carr pass short middle to D.Waller to...,2,2,5,LV,LAC,...,,,,,1,0,pass,middle,2022-09-11 17:12:12.400000,1
2022100212.2007,2022100212,2007,46461,Mike Boone,(13:12) M.Boone left tackle to DEN 44 for 7 ya...,3,2,10,DEN,LV,...,,,,,0,1,not_pass,left,2022-10-02 18:11:41.299999,4
2022091900.1372,2022091900,1372,47857,Devin Singletary,(8:33) D.Singletary right guard to TEN 32 for ...,2,1,10,BUF,TEN,...,,,,,0,1,not_pass,right,2022-09-19 20:12:02.900000,2


In [15]:
####### variables passed to cumulate
# team = 'possessionTeam', 'defensiveTeam', or 'ballCarrierId'
# success = 'converted' or 'blocked'
# new_column = name of the column to be created
# formation = 'offenseFormation', or 'defendersInTheBox'

def cumulate_averages(team, success, new_column, formation):
    
    global plays_df
    
    if formation != 'None':     # sort and grouo by formation 
        plays_df = plays_df.sort_values(by=[team, formation, 'timestamp'])

        # Calculate temporary cumulative sum and count columns
        plays_df['cumulativeSum'] = plays_df.groupby([team, formation])[success].cumsum()
        plays_df['cumulativeCount'] = plays_df.groupby([team, formation]).cumcount() + 1
        
        # Calculate cumulative average based on the cumulative sum and count,
        # shifting to account for results to represent the average prior to the current play
        plays_df[new_column] = plays_df.groupby([team, formation])['cumulativeSum'].shift(fill_value=0) / plays_df.groupby([team, formation])['cumulativeCount'].shift(fill_value=0)

    else:     # sort and group without formation - for overall averages
        plays_df = plays_df.sort_values(by=[team, 'timestamp'])
        
        # Calculate temporary cumulative sum and count columns
        plays_df['cumulativeSum'] = plays_df.groupby([team])[success].cumsum()
        plays_df['cumulativeCount'] = plays_df.groupby([team]).cumcount() + 1
        
        # Calculate cumulative average based on the cumulative sum and count,
        # shifting to account for results to represent the average prior to the current play
        plays_df[new_column] = plays_df.groupby([team])['cumulativeSum'].shift(fill_value=0) / plays_df.groupby([team])['cumulativeCount'].shift(fill_value=0)
            
    # The first row of each sorted group is nan so we fill it with 0, representing no prior historical values
    plays_df[new_column].fillna(value=0, inplace=True)

    # Drop temporary columns
    plays_df.drop(['cumulativeSum', 'cumulativeCount'], axis=1, inplace=True)

    # Sort the df as a cleanup step
    plays_df = plays_df.sort_index() 

In [16]:
len(plays_df)

12486

In [17]:
nan_indices = plays_df[plays_df['offenseFormation'].isnull()].index

# Print the indices
print(nan_indices)

Index(['2022102311.3663', '2022092503.700', '2022092503.459',
       '2022091113.1283'],
      dtype='object', name='play_uuid')


In [18]:
nan_indices = plays_df[plays_df['defendersInTheBox'].isnull()].index

# Print the indices
print(nan_indices)

# 2022091807.3597

Index(['2022102311.3663', '2022091807.3597', '2022092503.700',
       '2022092503.459', '2022091113.1283'],
      dtype='object', name='play_uuid')


In [19]:
plays_df = plays_df.dropna(subset=['offenseFormation'])
plays_df['offenseFormation'].value_counts()

offenseFormation
SHOTGUN       6378
SINGLEBACK    3524
I_FORM         929
EMPTY          850
PISTOL         599
JUMBO          115
WILDCAT         87
Name: count, dtype: int64

In [20]:
len(plays_df)

12482

In [21]:
plays_df = plays_df.dropna(subset=['defendersInTheBox'])
plays_df['defendersInTheBox'].value_counts()

defendersInTheBox
6.0     5500
7.0     3692
8.0     1457
5.0     1238
4.0      319
9.0      166
3.0       51
10.0      35
11.0      19
2.0        3
1.0        1
Name: count, dtype: int64

In [22]:
len(plays_df)

12481

In [23]:
# Converted is a success for the offense and for the player (ball carrier)
# Blocked is a success for the defense
# Blocked and Converted are mutually exclusive
cumulate_averages('possessionTeam', 'converted', 'cumulativeOverall_O', 'None')
cumulate_averages('defensiveTeam', 'blocked', 'cumulativeOverall_D', 'None')
cumulate_averages('ballCarrierId', 'converted', 'cumulativeOverall_P', 'None')

cumulate_averages('possessionTeam', 'converted', 'cumulativePerFormation_O', 'offenseFormation')
cumulate_averages('defensiveTeam', 'blocked', 'cumulativePerFormation_D', 'offenseFormation')
cumulate_averages('ballCarrierId', 'converted', 'cumulativePerFormation_P', 'offenseFormation')

cumulate_averages('possessionTeam', 'converted', 'cumulativePerBoxCt_O', 'defendersInTheBox')
cumulate_averages('defensiveTeam', 'blocked', 'cumulativePerBoxCt_D', 'defendersInTheBox')
cumulate_averages('ballCarrierId', 'converted', 'cumulativePerBoxCt_P', 'defendersInTheBox')

plays_df.head()

Unnamed: 0_level_0,gameId,playId,ballCarrierId,ballCarrierDisplayName,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,...,week,cumulativeOverall_O,cumulativeOverall_D,cumulativeOverall_P,cumulativePerFormation_O,cumulativePerFormation_D,cumulativePerFormation_P,cumulativePerBoxCt_O,cumulativePerBoxCt_D,cumulativePerBoxCt_P
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022090800.101,2022090800,101,47857,Devin Singletary,(13:54) D.Singletary right end to BUF 45 for 7...,1,1,10,BUF,LAR,...,1,0.5,0.5,0.0,0.0,0.0,0.0,0.5,0.5,0.0
2022090800.103,2022090800,1030,52536,Gabe Davis,(11:44) (Shotgun) J.Allen pass short left to G...,2,3,2,BUF,LAR,...,1,0.444444,0.555556,0.5,0.363636,0.636364,0.0,0.0,0.0,0.0
2022090800.1102,2022090800,1102,52494,Zack Moss,"(9:37) (No Huddle, Shotgun) J.Allen pass short...",2,2,6,BUF,LAR,...,1,0.473684,0.526316,0.0,0.416667,0.583333,0.0,0.4375,0.5625,0.0
2022090800.1187,2022090800,1187,47853,Darrell Henderson,(8:48) D.Henderson right tackle to LA 44 for 4...,2,1,10,LAR,BUF,...,1,0.2,0.8,0.0,0.0,1.0,0.0,0.142857,0.857143,0.0
2022090800.122,2022090800,122,47857,Devin Singletary,(13:15) (Shotgun) J.Allen pass short right to ...,1,2,3,BUF,LAR,...,1,0.333333,0.666667,0.0,0.0,1.0,0.0,0.333333,0.666667,0.0


In [24]:
# testing offense cumulativePerFormation
filtered_df = plays_df[(plays_df['possessionTeam'] == 'CLE')]
show_columns = ['possessionTeam', 'converted', 'cumulativeOverall_O', 'timestamp']
narrowed_df = filtered_df[show_columns]
sorted_df = narrowed_df.sort_values(by='timestamp')
sorted_df.head()

Unnamed: 0_level_0,possessionTeam,converted,cumulativeOverall_O,timestamp
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022091101.158,CLE,1,0.0,2022-09-11 13:08:08.599999
2022091101.184,CLE,0,1.0,2022-09-11 13:08:54.500000
2022091101.213,CLE,0,0.5,2022-09-11 13:09:35.500000
2022091101.251,CLE,1,0.333333,2022-09-11 13:11:22.900000
2022091101.272,CLE,0,0.5,2022-09-11 13:12:02.400000


In [25]:
# testing defense cumulativeOverall
filtered_df = plays_df[(plays_df['defensiveTeam'] == 'DAL')]
show_columns = ['defensiveTeam', 'blocked', 'cumulativeOverall_D', 'timestamp']
narrowed_df = filtered_df[show_columns]
sorted_df = narrowed_df.sort_values(by='timestamp')
sorted_df.head()

Unnamed: 0_level_0,defensiveTeam,blocked,cumulativeOverall_D,timestamp
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022091113.413,DAL,0,0.0,2022-09-11 20:36:42.000000
2022091113.444,DAL,1,0.0,2022-09-11 20:37:17.599999
2022091113.465,DAL,0,0.5,2022-09-11 20:37:58.700000
2022091113.489,DAL,1,0.333333,2022-09-11 20:38:34.400000
2022091113.51,DAL,1,0.5,2022-09-11 20:39:14.099999


In [26]:
# testing player cumulativeOverall
filtered_df = plays_df[(plays_df['ballCarrierId'] == 52536)]
show_columns = ['ballCarrierDisplayName', 'ballCarrierId', 'possessionTeam', 'converted', 'cumulativeOverall_P', 'timestamp']
narrowed_df = filtered_df[show_columns]
sorted_df = narrowed_df.sort_values(by='timestamp')
sorted_df.head()

Unnamed: 0_level_0,ballCarrierDisplayName,ballCarrierId,possessionTeam,converted,cumulativeOverall_P,timestamp
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022090800.236,Gabe Davis,52536,BUF,1,0.0,2022-09-08 20:29:04.000000
2022090800.98,Gabe Davis,52536,BUF,0,1.0,2022-09-08 21:00:29.900000
2022090800.103,Gabe Davis,52536,BUF,1,0.5,2022-09-08 21:01:44.799999
2022090800.2688,Gabe Davis,52536,BUF,1,0.666667,2022-09-08 22:27:06.299999
2022092503.1482,Gabe Davis,52536,BUF,0,0.75,2022-09-25 14:05:57.400000


In [27]:
plays_df['down'].value_counts()

down
1    5911
2    4257
3    2126
4     187
Name: count, dtype: int64

In [28]:
# Filter tracking_df for the Ball Snap Frames
ball_snap_df = tracking_df.loc[(tracking_df['event'] == 'ball_snap')] # Duplicate plays - drop autoevent | (tracking_df['event'] == 'autoevent_ballsnap')]
ball_snap_df.head()

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,play_uuid
5,2022100600,90,33084.0,Matt Ryan,6,2022-10-06 20:17:05.299999,2.0,IND,left,90.26,23.69,0.2,0.14,0.04,274.45,250.64,ball_snap,2022100600.9
65,2022100600,90,35459.0,Kareem Jackson,6,2022-10-06 20:17:05.299999,22.0,DEN,left,72.17,16.8,0.48,0.94,0.05,119.6,125.33,ball_snap,2022100600.9
125,2022100600,90,42030.0,K'Waun Williams,6,2022-10-06 20:17:05.299999,21.0,DEN,left,80.22,31.04,3.41,2.96,0.35,128.25,199.16,ball_snap,2022100600.9
185,2022100600,90,42393.0,Ronald Darby,6,2022-10-06 20:17:05.299999,23.0,DEN,left,76.83,36.25,0.04,0.14,0.01,73.29,206.98,ball_snap,2022100600.9
245,2022100600,90,43307.0,Ryan Kelly,6,2022-10-06 20:17:05.299999,78.0,IND,left,85.75,23.8,0.08,0.38,0.01,252.44,311.45,ball_snap,2022100600.9


In [29]:
play_ids = plays_df.index.unique()
play_ids

Index(['2022090800.101', '2022090800.1030', '2022090800.1102',
       '2022090800.1187', '2022090800.122', '2022090800.1230',
       '2022090800.1254', '2022090800.1286', '2022090800.1334',
       '2022090800.1358',
       ...
       '2022110700.612', '2022110700.646', '2022110700.670', '2022110700.700',
       '2022110700.781', '2022110700.80', '2022110700.839', '2022110700.929',
       '2022110700.950', '2022110700.971'],
      dtype='object', name='play_uuid', length=12481)

In [30]:
position_counts = ball_snap_df[['play_uuid', 'nflId']].groupby('play_uuid').count()
snap_ids = position_counts.loc[(position_counts['nflId'] == 22)].index
snap_ids

Index(['2022090800.101', '2022090800.1187', '2022090800.1254',
       '2022090800.1385', '2022090800.1406', '2022090800.146',
       '2022090800.1736', '2022090800.191', '2022090800.1946',
       '2022090800.2043',
       ...
       '2022110700.3787', '2022110700.403', '2022110700.443', '2022110700.493',
       '2022110700.546', '2022110700.612', '2022110700.670', '2022110700.80',
       '2022110700.929', '2022110700.950'],
      dtype='object', name='play_uuid', length=6323)

In [34]:
snap_ids = [snap_id for snap_id in snap_ids if snap_id in plays_df.index]

In [35]:
flat_frames_dict = []

for play in snap_ids:
    play_frame = ball_snap_df.loc[(ball_snap_df['play_uuid'] == play)]
    frame_dict = {'play_uuid': play}

    for i in range(len(play_frame)):
        frame_dict['x' + str(i)] = play_frame.iloc[i]['x']
        frame_dict['y' + str(i)] = play_frame.iloc[i]['y']
        frame_dict['s' + str(i)] = play_frame.iloc[i]['s']
        frame_dict['a' + str(i)] = play_frame.iloc[i]['a']
        frame_dict['dis' + str(i)] = play_frame.iloc[i]['dis']
        if i<22:
            frame_dict['o' + str(i)] = play_frame.iloc[i]['o']
            frame_dict['dir' + str(i)] = play_frame.iloc[i]['dir']
    
    flat_frames_dict.append(frame_dict)

transposed_frames_df = pd.DataFrame(flat_frames_dict).set_index('play_uuid').dropna()

In [36]:
transposed_frames_df

Unnamed: 0_level_0,x0,y0,s0,a0,dis0,o0,dir0,x1,y1,s1,...,s21,a21,dis21,o21,dir21,x22,y22,s22,a22,dis22
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022090800.101,73.13,27.67,0.10,1.14,0.02,273.97,348.75,67.35,29.96,0.07,...,0.00,0.00,0.00,267.12,339.31,72.040001,29.520000,0.04,0.29,0.01
2022090800.1187,71.72,29.54,0.05,0.85,0.01,272.46,55.53,69.25,24.99,0.14,...,0.94,0.81,0.09,87.20,10.33,70.070000,29.610001,0.00,0.00,0.01
2022090800.1254,59.81,23.63,0.00,0.00,0.00,272.98,143.12,42.18,36.75,0.65,...,0.04,0.04,0.02,131.60,216.90,54.689999,23.879999,0.00,0.00,0.00
2022090800.1385,36.64,29.50,0.08,0.88,0.01,273.91,130.33,34.44,33.93,0.49,...,0.00,0.00,0.03,49.70,91.93,34.990002,29.730000,0.00,0.00,0.00
2022090800.1406,18.65,23.35,0.25,1.67,0.02,278.66,52.44,16.13,28.06,0.40,...,0.32,0.18,0.03,117.94,100.45,17.389999,23.340000,0.02,0.02,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022110700.612,73.37,25.06,0.11,0.58,0.01,281.51,254.24,74.11,36.43,0.06,...,0.22,0.56,0.02,240.21,321.26,72.830002,29.920000,0.00,0.00,0.00
2022110700.670,82.37,19.86,0.29,1.71,0.02,256.40,302.59,80.80,22.52,0.13,...,0.00,0.00,0.04,172.38,162.23,81.779999,23.889999,0.00,0.00,0.00
2022110700.80,69.07,27.04,0.04,0.23,0.01,62.17,44.09,68.68,35.34,0.09,...,0.00,0.00,0.00,281.95,290.97,70.260002,29.990000,0.62,3.42,0.07
2022110700.929,76.34,35.18,0.00,0.08,0.00,95.97,142.30,77.70,31.41,0.08,...,0.60,0.76,0.05,174.30,226.59,77.029999,29.830000,0.00,0.00,0.01


In [37]:
team_frames_dict = []

for play_id in snap_ids:
    snap_frame = ball_snap_df.loc[(ball_snap_df['play_uuid'] == play_id)]
    play = plays_df.loc[play_id]
    frame_dict = {'play_uuid': play_id}

    #print(f"{play_id}: {play['possessionTeam']} {play['defensiveTeam']}")
    #print(play_id)
    
    off_players = snap_frame.loc[(snap_frame['club'] == play['possessionTeam'])]
    def_players = snap_frame.loc[(snap_frame['club'] == play['defensiveTeam'])]
    ball_position = snap_frame.loc[(snap_frame['club'] == 'football')].reset_index()

    for i in range(len(off_players)):
        frame_dict['off_x' + str(i)] = off_players.iloc[i]['x']
        frame_dict['off_y' + str(i)] = off_players.iloc[i]['y']
        frame_dict['off_s' + str(i)] = off_players.iloc[i]['s']
        frame_dict['off_a' + str(i)] = off_players.iloc[i]['a']
        frame_dict['off_dis' + str(i)] = off_players.iloc[i]['dis']
        frame_dict['off_o' + str(i)] = off_players.iloc[i]['o']
        frame_dict['off_dir' + str(i)] = off_players.iloc[i]['dir']

    for j in range(len(def_players)):
        frame_dict['def_x' + str(j)] = def_players.iloc[j]['x']
        frame_dict['def_y' + str(j)] = def_players.iloc[j]['y']
        frame_dict['def_s' + str(j)] = def_players.iloc[j]['s']
        frame_dict['def_a' + str(j)] = def_players.iloc[j]['a']
        frame_dict['def_dis' + str(j)] = def_players.iloc[j]['dis']
        frame_dict['def_o' + str(j)] = def_players.iloc[j]['o']
        frame_dict['def_dir' + str(j)] = def_players.iloc[j]['dir']
    
    for k in range(len(ball_position)):
        frame_dict['ball_x'] = ball_position.iloc[0]['x']
        frame_dict['ball_y'] = ball_position.iloc[0]['y']
        frame_dict['ball_s'] = ball_position.iloc[0]['s']
        frame_dict['ball_a'] = ball_position.iloc[0]['a']
        frame_dict['ball_dis'] = ball_position.iloc[0]['dis']
        
    team_frames_dict.append(frame_dict)

transposed_team_frames_df = pd.DataFrame(team_frames_dict).set_index('play_uuid').dropna()
transposed_team_frames_df

Unnamed: 0_level_0,off_x0,off_y0,off_s0,off_a0,off_dis0,off_o0,off_dir0,off_x1,off_y1,off_s1,...,def_s10,def_a10,def_dis10,def_o10,def_dir10,ball_x,ball_y,ball_s,ball_a,ball_dis
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022090800.101,73.13,27.67,0.10,1.14,0.02,273.97,348.75,72.50,29.47,0.00,...,0.06,0.03,0.01,79.04,59.57,72.040001,29.520000,0.04,0.29,0.01
2022090800.1187,71.72,29.54,0.05,0.85,0.01,272.46,55.53,70.73,43.53,0.00,...,0.94,0.81,0.09,87.20,10.33,70.070000,29.610001,0.00,0.00,0.01
2022090800.1254,59.81,23.63,0.00,0.00,0.00,272.98,143.12,55.60,9.34,0.00,...,0.04,0.04,0.02,131.60,216.90,54.689999,23.879999,0.00,0.00,0.00
2022090800.1385,36.64,29.50,0.08,0.88,0.01,273.91,130.33,35.57,36.98,0.00,...,0.00,0.00,0.03,49.70,91.93,34.990002,29.730000,0.00,0.00,0.00
2022090800.1406,18.65,23.35,0.25,1.67,0.02,278.66,52.44,17.67,9.53,0.00,...,0.32,0.18,0.03,117.94,100.45,17.389999,23.340000,0.02,0.02,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022110700.612,71.90,28.67,0.05,0.44,0.01,74.74,48.42,71.57,27.13,0.00,...,0.22,0.56,0.02,240.21,321.26,72.830002,29.920000,0.00,0.00,0.00
2022110700.670,80.80,22.52,0.13,0.90,0.01,92.18,120.21,80.61,21.10,0.06,...,0.00,0.00,0.00,253.94,152.31,81.779999,23.889999,0.00,0.00,0.00
2022110700.80,74.74,30.02,0.01,0.02,0.03,277.43,326.30,71.04,26.71,0.01,...,0.45,0.25,0.04,86.05,318.34,70.260002,29.990000,0.62,3.42,0.07
2022110700.929,77.70,31.41,0.08,0.05,0.04,273.44,217.09,77.94,32.81,0.00,...,0.11,0.12,0.01,72.36,14.39,77.029999,29.830000,0.00,0.00,0.01


In [38]:
def get_relative_coord(playDirection, coord, player, ball_position):
    if playDirection == 'right':
        return player[coord] - ball_position[coord]
    else:
        if coord == 'x':
            return ball_position[coord] - player[coord]
        else:
            return (53.3 - player[coord]) - ball_position[coord]

In [39]:
relative_frames_dict = []

for play_id in snap_ids:
    snap_frame = ball_snap_df.loc[(ball_snap_df['play_uuid'] == play_id)]
    play = plays_df.loc[play_id]
    frame_dict = {'play_uuid': play_id}
    
    ball_position = snap_frame.loc[(snap_frame['club'] == 'football')].reset_index()

    if len(ball_position) == 1:
        play_direction = ball_position.iloc[0]['playDirection']

        off_players = snap_frame.loc[(snap_frame['club'] == play['possessionTeam'])]
        def_players = snap_frame.loc[(snap_frame['club'] == play['defensiveTeam'])]

        for i in range(len(off_players)):
            frame_dict['off_x' + str(i)] = get_relative_coord(play_direction, 'x', off_players.iloc[i], ball_position.iloc[0])
            frame_dict['off_y' + str(i)] = get_relative_coord(play_direction, 'y', off_players.iloc[i], ball_position.iloc[0])
            # frame_dict['off_s' + str(i)] = off_players.iloc[i]['s']
            # frame_dict['off_a' + str(i)] = off_players.iloc[i]['a']
            # frame_dict['off_dis' + str(i)] = off_players.iloc[i]['dis']
            # frame_dict['off_o' + str(i)] = off_players.iloc[i]['o']
            # frame_dict['off_dir' + str(i)] = off_players.iloc[i]['dir']

        for j in range(len(def_players)):
            frame_dict['def_x' + str(j)] = get_relative_coord(play_direction, 'x', def_players.iloc[i], ball_position.iloc[0])
            frame_dict['def_y' + str(j)] = get_relative_coord(play_direction, 'y', def_players.iloc[i], ball_position.iloc[0])
            # frame_dict['def_s' + str(j)] = def_players.iloc[j]['s']
            # frame_dict['def_a' + str(j)] = def_players.iloc[j]['a']
            # frame_dict['def_dis' + str(j)] = def_players.iloc[j]['dis']
            # frame_dict['def_o' + str(j)] = def_players.iloc[j]['o']
            # frame_dict['def_dir' + str(j)] = def_players.iloc[j]['dir']
            
        relative_frames_dict.append(frame_dict)

relative_frames_df = pd.DataFrame(relative_frames_dict).set_index('play_uuid').dropna()
relative_frames_df

Unnamed: 0_level_0,off_x0,off_y0,off_x1,off_y1,off_x2,off_y2,off_x3,off_y3,off_x4,off_y4,...,def_x6,def_y6,def_x7,def_y7,def_x8,def_y8,def_x9,def_y9,def_x10,def_y10
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022090800.101,-1.089999,-3.890000,-0.459999,-5.690000,-0.989999,-15.720000,-1.109999,-2.270000,-1.509999,-5.780000,...,11.180001,-10.940000,11.180001,-10.940000,11.180001,-10.940000,11.180001,-10.940000,11.180001,-10.940000
2022090800.1187,-1.650000,-5.850001,-0.660000,-19.840001,-1.150000,-8.990001,-1.540000,-1.530001,-0.750000,-0.140001,...,0.780000,1.539999,0.780000,1.539999,0.780000,1.539999,0.780000,1.539999,0.780000,1.539999
2022090800.1254,-5.120001,5.790001,-0.910001,20.080001,-1.390001,2.780001,-0.730001,-15.669999,-1.570001,-6.229999,...,1.229999,-15.599999,1.229999,-15.599999,1.229999,-15.599999,1.229999,-15.599999,1.229999,-15.599999
2022090800.1385,-1.649998,-5.930000,-0.579998,-13.410000,-1.219998,-8.780000,-1.029998,-1.670000,-1.649998,-10.800000,...,0.150002,3.940000,0.150002,3.940000,0.150002,3.940000,0.150002,3.940000,0.150002,3.940000
2022090800.1406,-1.260001,6.610000,-0.280001,20.430000,-0.530001,3.470000,-1.020001,1.580000,-0.480001,0.450000,...,1.269999,-1.120000,1.269999,-1.120000,1.269999,-1.120000,1.269999,-1.120000,1.269999,-1.120000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022110700.612,-0.930002,-1.250000,-1.260002,-2.790000,-1.310002,3.480000,-0.890002,-18.910000,-2.860002,3.890000,...,7.429998,9.300000,7.429998,9.300000,7.429998,9.300000,7.429998,9.300000,7.429998,9.300000
2022110700.670,-0.979999,-1.369999,-1.169999,-2.789999,-1.079999,3.270001,-7.299999,0.180001,-4.349999,-2.019999,...,1.120001,18.770001,1.120001,18.770001,1.120001,18.770001,1.120001,18.770001,1.120001,18.770001
2022110700.80,-4.479998,-6.710000,-0.779998,-3.400000,-0.569998,-5.160000,-0.669998,-9.790000,-5.289998,-4.270000,...,12.010002,-0.940000,12.010002,-0.940000,12.010002,-0.940000,12.010002,-0.940000,12.010002,-0.940000
2022110700.929,-0.670001,-7.940000,-0.910001,-9.340000,-0.920001,-3.280000,-6.850001,-6.530000,-4.390001,-6.400000,...,4.529999,-0.730000,4.529999,-0.730000,4.529999,-0.730000,4.529999,-0.730000,4.529999,-0.730000


In [53]:
# transposed_frame_conversions_df = transposed_frames_df.merge(plays_df[['converted', 'quarter', 'down', 'yardsToGo', 'gameClock']], on='play_uuid', how='left')
transposed_frame_conversions_df = transposed_frames_df.merge(plays_df[['converted', 'playType', 'ballCarrierId', 'ballDirection', 'cumulativeOverall_O', 'cumulativeOverall_D', 'cumulativeOverall_P', 'cumulativePerFormation_O', 'cumulativePerFormation_D', 'cumulativePerFormation_P', 
                                                            'cumulativePerBoxCt_O', 'cumulativePerBoxCt_D', 'cumulativePerBoxCt_P', 'week', 'quarter', 'yardsToGo','preSnapHomeScore', 'preSnapVisitorScore', 'offenseFormation', 'defendersInTheBox', 
                                                            'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability', 'homeTeamWinProbabilityAdded', 'visitorTeamWinProbilityAdded', 'down']], on='play_uuid', how='left')

transposed_frame_conversions_df.head()

Unnamed: 0_level_0,x0,y0,s0,a0,dis0,o0,dir0,x1,y1,s1,...,yardsToGo,preSnapHomeScore,preSnapVisitorScore,offenseFormation,defendersInTheBox,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,down
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022090800.101,73.13,27.67,0.1,1.14,0.02,273.97,348.75,67.35,29.96,0.07,...,10,0,0,I_FORM,6.0,0.399819,0.600181,-0.01485,0.01485,1
2022090800.1187,71.72,29.54,0.05,0.85,0.01,272.46,55.53,69.25,24.99,0.14,...,10,0,10,SINGLEBACK,6.0,0.207237,0.792763,0.006345,-0.006345,1
2022090800.1254,59.81,23.63,0.0,0.0,0.0,272.98,143.12,42.18,36.75,0.65,...,10,0,10,PISTOL,6.0,0.227212,0.772788,0.008701,-0.008701,1
2022090800.1385,36.64,29.5,0.08,0.88,0.01,273.91,130.33,34.44,33.93,0.49,...,10,0,10,SINGLEBACK,6.0,0.235201,0.764799,0.027923,-0.027923,1
2022090800.1406,18.65,23.35,0.25,1.67,0.02,278.66,52.44,16.13,28.06,0.4,...,7,0,10,SINGLEBACK,6.0,0.263124,0.736876,0.0001,-0.0001,1


In [54]:
# transposed_team_frame_conversions_df = transposed_team_frames_df.merge(plays_df[['converted', 'quarter', 'down', 'yardsToGo', 'gameClock']], on='play_uuid', how='left')
transposed_team_frame_conversions_df = transposed_frames_df.merge(plays_df[['converted', 'playType', 'ballCarrierId', 'ballDirection', 'cumulativeOverall_O', 'cumulativeOverall_D', 'cumulativeOverall_P', 'cumulativePerFormation_O', 'cumulativePerFormation_D', 'cumulativePerFormation_P', 
                                                            'cumulativePerBoxCt_O', 'cumulativePerBoxCt_D', 'cumulativePerBoxCt_P', 'week', 'quarter', 'yardsToGo','preSnapHomeScore', 'preSnapVisitorScore', 'offenseFormation', 'defendersInTheBox', 
                                                            'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability', 'homeTeamWinProbabilityAdded', 'visitorTeamWinProbilityAdded', 'down']], on='play_uuid', how='left')
transposed_team_frame_conversions_df.head()

Unnamed: 0_level_0,x0,y0,s0,a0,dis0,o0,dir0,x1,y1,s1,...,yardsToGo,preSnapHomeScore,preSnapVisitorScore,offenseFormation,defendersInTheBox,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,down
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022090800.101,73.13,27.67,0.1,1.14,0.02,273.97,348.75,67.35,29.96,0.07,...,10,0,0,I_FORM,6.0,0.399819,0.600181,-0.01485,0.01485,1
2022090800.1187,71.72,29.54,0.05,0.85,0.01,272.46,55.53,69.25,24.99,0.14,...,10,0,10,SINGLEBACK,6.0,0.207237,0.792763,0.006345,-0.006345,1
2022090800.1254,59.81,23.63,0.0,0.0,0.0,272.98,143.12,42.18,36.75,0.65,...,10,0,10,PISTOL,6.0,0.227212,0.772788,0.008701,-0.008701,1
2022090800.1385,36.64,29.5,0.08,0.88,0.01,273.91,130.33,34.44,33.93,0.49,...,10,0,10,SINGLEBACK,6.0,0.235201,0.764799,0.027923,-0.027923,1
2022090800.1406,18.65,23.35,0.25,1.67,0.02,278.66,52.44,16.13,28.06,0.4,...,7,0,10,SINGLEBACK,6.0,0.263124,0.736876,0.0001,-0.0001,1


In [52]:
# relative_conversion_df = relative_frames_df.merge(plays_df[['converted', 'quarter', 'down', 'yardsToGo', 'gameClock']], on='play_uuid', how='left')
relative_conversion_df = relative_frames_df.merge(plays_df[['converted', 'playType', 'ballCarrierId', 'ballDirection', 'cumulativeOverall_O', 'cumulativeOverall_D', 'cumulativeOverall_P', 'cumulativePerFormation_O', 'cumulativePerFormation_D', 'cumulativePerFormation_P', 
                                                            'cumulativePerBoxCt_O', 'cumulativePerBoxCt_D', 'cumulativePerBoxCt_P', 'week', 'quarter', 'yardsToGo','preSnapHomeScore', 'preSnapVisitorScore', 'offenseFormation', 'defendersInTheBox', 
                                                            'preSnapHomeTeamWinProbability', 'preSnapVisitorTeamWinProbability', 'homeTeamWinProbabilityAdded', 'visitorTeamWinProbilityAdded', 'down']], on='play_uuid', how='left')
relative_conversion_df.head()

Unnamed: 0_level_0,off_x0,off_y0,off_x1,off_y1,off_x2,off_y2,off_x3,off_y3,off_x4,off_y4,...,yardsToGo,preSnapHomeScore,preSnapVisitorScore,offenseFormation,defendersInTheBox,preSnapHomeTeamWinProbability,preSnapVisitorTeamWinProbability,homeTeamWinProbabilityAdded,visitorTeamWinProbilityAdded,down
play_uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022090800.101,-1.089999,-3.89,-0.459999,-5.69,-0.989999,-15.72,-1.109999,-2.27,-1.509999,-5.78,...,10,0,0,I_FORM,6.0,0.399819,0.600181,-0.01485,0.01485,1
2022090800.1187,-1.65,-5.850001,-0.66,-19.840001,-1.15,-8.990001,-1.54,-1.530001,-0.75,-0.140001,...,10,0,10,SINGLEBACK,6.0,0.207237,0.792763,0.006345,-0.006345,1
2022090800.1254,-5.120001,5.790001,-0.910001,20.080001,-1.390001,2.780001,-0.730001,-15.669999,-1.570001,-6.229999,...,10,0,10,PISTOL,6.0,0.227212,0.772788,0.008701,-0.008701,1
2022090800.1385,-1.649998,-5.93,-0.579998,-13.41,-1.219998,-8.78,-1.029998,-1.67,-1.649998,-10.8,...,10,0,10,SINGLEBACK,6.0,0.235201,0.764799,0.027923,-0.027923,1
2022090800.1406,-1.260001,6.61,-0.280001,20.43,-0.530001,3.47,-1.020001,1.58,-0.480001,0.45,...,7,0,10,SINGLEBACK,6.0,0.263124,0.736876,0.0001,-0.0001,1


In [55]:
# Write data to Sqlite
conn = sqlite3.connect('data/sqlite/nfl_data.sqlite')

In [56]:
team_df.to_sql('teams',conn,if_exists='replace', index='abbreviation')

32

In [57]:
games_df.to_sql('games',conn,if_exists='replace', index='gameId')

136

In [58]:
plays_df.to_sql('plays',conn,if_exists='replace', index='play_uuid')

12481

In [59]:
players_df.to_sql('players', conn, if_exists='replace', index='nflId')

1683

In [60]:
#tracking_df.to_sql('tracking', conn, if_exists='replace')

In [61]:
transposed_frame_conversions_df.to_sql('snap_formations', conn, if_exists='replace', index='play_uuid')

6297

In [62]:
transposed_team_frame_conversions_df.to_sql('team_formations', conn, if_exists='replace', index='play_uuid')

6297

In [63]:
relative_conversion_df.to_sql('relative_team_formations', conn, if_exists='replace', index='play_uuid')

6297

In [64]:
len(relative_conversion_df.loc[(relative_conversion_df['down'] == 3)])

635