# Find Game IDs

The objective of this notebook is to take the df with all team stats for each game and append the unique "game_id" to each game using nba_api.

In [None]:
import numpy as np
import pandas as pd
from nba_api.stats.static import teams
from nba_api.stats.endpoints import teamgamelog
import datetime

In [None]:
PATH_TO_TEAM_DATA = "../../data/raw/nba_games_runtime.csv"

In [None]:
df = pd.read_csv(PATH_TO_TEAM_DATA, index_col=0)
df

In [None]:
# Create a "game_id" column and move to the front
df.insert(0, 'game_id', None)
df

In [None]:
# Print out all the team codes
all_team_codes = df["team"].unique()
print(len(all_team_codes))  # We should have 30 teams
all_team_codes

In [None]:
# Fix team codes that do not match the NBA team codes
# BRK -> BKN
# CHO -> CHA
# PHO -> PHX

df = df.replace({"BRK": "BKN", "CHO": "CHA", "PHO": "PHX"})
all_team_codes = np.sort(df["team"].unique())
all_opp_team_codes = np.sort(df["team_opp"].unique())
print(all_team_codes)
print(all_opp_team_codes)

assert np.array_equal(all_team_codes, all_opp_team_codes)

In [None]:
# Create a python dict to store team ids
team_ids = dict()

# Get all the team ids using nba api
for code in all_team_codes:
    t_id = teams.find_team_by_abbreviation(code)['id']
    print(f"{code}: {t_id}")
    team_ids[code] = t_id

In [None]:
import time

# Store all game logs in a python dict
game_logs = dict()

# Get starting and end dates - format them
starting_date = df["date"].iloc[0]
last_date = df["date"].iloc[-1]

fmt_starting_date = datetime.datetime.strptime(starting_date, "%Y-%m-%d").strftime("%m/%d/%Y")
fmt_last_date = datetime.datetime.strptime(last_date, "%Y-%m-%d").strftime("%m/%d/%Y")
print(fmt_starting_date, fmt_last_date)

start_season = fmt_starting_date[-4:]
last_season = fmt_last_date[-4:]

In [None]:
# Calculate season range
season_range = None

# If our last date in the dataset is near the start of a new NBA season
if(int(fmt_last_date[:2]) >= 10):
    season_range = range(int(start_season), int(last_season) + 1)
else:
    season_range = range(int(start_season), int(last_season))

print(list(season_range))

In [None]:
# Fetch the game logs (add a 30 second delay in between requests)
for code in all_team_codes:
    
    curr_team_id = team_ids[code]
    combined_team_game_log_df = pd.DataFrame()
    
    for season in season_range:
        
        gamelog = teamgamelog.TeamGameLog(
            season = season,
            season_type_all_star = 'Regular Season',
            team_id = curr_team_id,
            timeout=100
        )
        gamelog_df = gamelog.team_game_log.get_data_frame()
        
        print(f"Gamelog fetched for {code} for the {season} season")
        
        combined_team_game_log_df = pd.concat([gamelog_df, combined_team_game_log_df], ignore_index=True)
        
#         print(combined_team_game_log_df.head(3))
#         print(combined_team_game_log_df.tail(3))
        
        # Add a 30 second delay in-between requests
        time.sleep(30)
    
    game_logs[code] = combined_team_game_log_df
    print(combined_team_game_log_df.head(2))
    print(combined_team_game_log_df.tail(2))
    print("-" * 30)

In [None]:
# # TODO: instead of sending an api request per game (over 20,000 requests), just send 30 requests of all games from 2015-2024

# # Sending too many requests can block your IP from using the API

# # for index, row in df.iterrows(): 
#     team_abbrev = row["team"]


#     # Format the game date into MM/DD/YY
#     game_date = row["date"]
#     formatted_date = datetime.datetime.strptime(game_date, "%Y-%m-%d").strftime("%m/%d/%Y")
    
#     # Now we can fetch the gamelog for that team for that specific date
#     gamelog = teamgamelog.TeamGameLog(
#         season = 2015,
#         season_type_all_star = 'Regular Season',
#         team_id = curr_team_id,
#         date_from_nullable = formatted_date,
#         date_to_nullable = formatted_date
#     )

#     gamelog_df = gamelog.team_game_log.get_data_frame()
#     assert gamelog_df.shape[0] == 1

#     game_id = gamelog_df.iloc[0]["Game_ID"]
#     row["game_id"] = game_id