In [None]:
import pandas as pd
import glob
import requests
import sqlite3

In [None]:
# Load Teams API data
teams_json = requests.get('https://site.api.espn.com/apis/site/v2/sports/football/nfl/teams').json()
teams = teams_json['sports'][0]['leagues'][0]['teams']

team_dict = []

for t in teams:
    team_dict.append({
        'id': t['team']['id'],
        'color': t['team']['color'],
        'alternateColor': t['team']['color'],
        'logo': t['team']['logos'][0]['href'],
        'abbreviation': t['team']['abbreviation'],
        'displayName': t['team']['displayName'],
        'location': t['team']['location'],
        'name': t['team']['name'],
        'nickname': t['team']['nickname'],
        'shortDisplayName': t['team']['shortDisplayName'],
    })

# Create Teams DataFrame using team abbreviations as index
team_df = pd.DataFrame(team_dict).set_index('abbreviation')
team_df

In [None]:
# load 2024 games DF
games_df = pd.concat(map(pd.read_csv, glob.glob(f'data/nfl-big-data-bowl-2024/games.csv')))

games_df['homeTeamAbbr'] = games_df['homeTeamAbbr'].replace('LA', 'LAR').replace('WAS', 'WSH')
games_df['visitorTeamAbbr'] = games_df['visitorTeamAbbr'].replace('LA', 'LAR').replace('WAS', 'WSH')

games_df = games_df.set_index('gameId')
games_df.head()

In [None]:
# plays_df = pd.concat(map(pd.read_csv, glob.glob(f'data/nfl-big-data-bowl-*/plays.csv')))
# Import 2024 Plays DF
plays_df = pd.read_csv('data/nfl-big-data-bowl-2024/plays.csv')

plays_df['possessionTeam'] = plays_df['possessionTeam'].replace('LA', 'LAR').replace('WAS', 'WSH')
plays_df['defensiveTeam'] = plays_df['defensiveTeam'].replace('LA', 'LAR').replace('WAS', 'WSH')
plays_df['converted'] = (plays_df['playResult'] > plays_df['yardsToGo'])

plays_df.head()

In [None]:
# Concatenate GameId and PlayID to create a unique ID for plays across games
# The dataset re-uses playid values across games
plays_df['play_uuid'] = plays_df['gameId'].astype(str) + '.' + plays_df['playId'].astype(str)
plays_df = plays_df.set_index('play_uuid')
plays_df.head()

In [None]:
players_df = pd.read_csv('data/nfl-big-data-bowl-2024/players.csv').set_index('nflId')
players_df.head()

In [None]:
# (GITHUB COLAB AI) 
# prompt: load all CSV files with a name like "tracking_week_" into a tracking_df dataframe
import glob
tracking_df = pd.concat(map(pd.read_csv, glob.glob(f'data/nfl-big-data-bowl-2024/*week*.csv')))

In [None]:
# generate same Play-UUID for frame lookup
tracking_df['play_uuid'] = tracking_df['gameId'].astype(str) + '.' + tracking_df['playId'].astype(str)
tracking_df['play_uuid'].head()

In [None]:
# Filter tracking_df for the Ball Snap Frames
ball_snap_df = tracking_df.loc[(tracking_df['event'] == 'ball_snap')] # Duplicate plays - drop autoevent | (tracking_df['event'] == 'autoevent_ballsnap')]
ball_snap_df.head()

In [None]:
play_ids = plays_df['play_uuid'].unique()
play_ids

In [None]:
position_counts = ball_snap_df[['play_uuid', 'nflId']].groupby('play_uuid').count()
snap_ids = position_counts.loc[(position_counts['nflId'] == 22)].index
snap_ids

In [None]:
flat_frames_dict = []

for play in snap_ids:
    play_frame = ball_snap_df.loc[(ball_snap_df['play_uuid'] == play)]
    frame_dict = {'play_uuid': play}

    for i in range(len(play_frame)):
        frame_dict['x' + str(i)] = play_frame.iloc[i]['x']
        frame_dict['y' + str(i)] = play_frame.iloc[i]['y']
        frame_dict['s' + str(i)] = play_frame.iloc[i]['s']
        frame_dict['a' + str(i)] = play_frame.iloc[i]['a']
        frame_dict['dis' + str(i)] = play_frame.iloc[i]['dis']
        if i<22:
            frame_dict['o' + str(i)] = play_frame.iloc[i]['o']
            frame_dict['dir' + str(i)] = play_frame.iloc[i]['dir']
    
    flat_frames_dict.append(frame_dict)

transposed_frames_df = pd.DataFrame(flat_frames_dict).set_index('play_uuid')


In [None]:
# flat_frames_dict = []

# for play in all_22_play_ids:
#     play_frame = ball_snap_df.loc[(ball_snap_df['play_uuid'] == play)]
#     frame_dict = {'play_uuid': play}

#     for i in range(23):
#         nflId = play_frame.iloc[i]['nflId']
#         if nflId == nflId:
#             player = players_df.loc[int(nflId)]
#             player_position = player['position']
#         else:
#             player_position = 'football'

#         frame_dict['x_' + player_position] = play_frame.iloc[i]['x']
#         frame_dict['y_' + player_position] = play_frame.iloc[i]['y']
#         frame_dict['s_' + player_position] = play_frame.iloc[i]['s']
#         frame_dict['a_' + player_position] = play_frame.iloc[i]['a']
#         frame_dict['dis_' + player_position] = play_frame.iloc[i]['dis']
#         if i<22:
#             frame_dict['o_' + player_position] = play_frame.iloc[i]['o']
#             frame_dict['dir_' + player_position] = play_frame.iloc[i]['dir']
    
#     flat_frames_dict.append(frame_dict)

# transposed_frames_df = pd.DataFrame(flat_frames_dict)


In [None]:
transposed_frames_df

In [None]:
len(plays_df)

In [None]:
transposed_frame_conversions_df = transposed_frames_df.merge(plays_df['converted'], on='play_uuid', how='left')
transposed_frame_conversions_df.head()

In [None]:
# Write data to Sqlite
conn = sqlite3.connect('data/sqlite/nfl_data.sqlite')


In [None]:
team_df.to_sql('teams',conn,if_exists='replace', index='abbreviation')

In [None]:
games_df.to_sql('games',conn,if_exists='replace', index='gameId')

In [None]:
plays_df.to_sql('plays',conn,if_exists='replace', index='play_uuid')

In [None]:
players_df.to_sql('players', conn, if_exists='replace', index='nflId')

In [None]:
#tracking_df.to_sql('tracking', conn, if_exists='replace')

In [None]:
transposed_frames_df.to_sql('snap_formations', conn, if_exists='replace', index='play_uuid')