<div class="alert alert-danger">
    <h4 style="font-weight: bold; font-size: 28px;">NBA API</h4>
    <p style="font-size: 20px;">Data Gathering</p>
</div>

<a name="NBA"></a>

# Setup

In [None]:
import pandas as pd
from datetime import datetime, timedelta
import time

In [None]:
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import (
  scoreboard, leaguegamefinder, playercareerstats,
  boxscorematchupsv3, boxscoreadvancedv2, teamestimatedmetrics, 
  teamgamelogs, TeamGameLogs, TeamEstimatedMetrics, leaguedashteamstats
)

# Team Data

In [None]:
# get_teams returns a list of 30 dictionaries, each an NBA team
nba_teams = teams.get_teams()
print("Number of teams fetched: {}".format(len(nba_teams)))
nba_teams_df = pd.DataFrame(nba_teams)
nba_teams_df.head()

# Players

In [None]:
# get_players returns a list of dictionaries, each representing a player
nba_players = players.get_players()
print("Number of players fetched: {}".format(len(nba_players)))
nba_players_df = pd.DataFrame(nba_players)
nba_players_df.head()

# Scoreboard

In [None]:
# Today's Score Board
games = scoreboard.ScoreBoard()

# json
games.get_json()

# dictionary
games.get_dict()

# League Game Finder

In [None]:
# get game data
team_ids = nba_teams_df['id'].tolist()

games_list = []

for id in team_ids:
    print(id)
    # query for games
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=id)
    # we want the first DataFrame of those returned
    games_list.append(gamefinder.get_data_frames()[0])
    # add time delay between requests
    time.sleep(3)

In [None]:
games_df = pd.concat(games_list)
print(games_df.shape)
games_df.head()

In [None]:
games_df['GAME_DATE'] = pd.to_datetime(games_df['GAME_DATE'])

# get the latest date
earliest_date = games_df['GAME_DATE'].max()
print(earliest_date)

In [None]:
games_df.to_csv('../data/original/nba_games_box_scores_1984_2024.csv', index=False)

# League Dash Team Stats

In [None]:
ldts = leaguedashteamstats.LeagueDashTeamStats(month=5, season='2023-24')

In [None]:
ldts.get_data_frames()[0].head()

In [None]:
ldts_list = []
seasons = []
for year in range(1996, 2024):
    season = f"{year}-{str(year + 1)[-2:]}"
    seasons.append(season)
months = range(1, 13)

for season in seasons:
    for month in months:
        print(f"Querying season {season}, month {month}")
        # query for months
        ldts = leaguedashteamstats.LeagueDashTeamStats(month=month, season=season)
        # get the first DataFrame of those returned
        df = ldts.get_data_frames()[0]
        
        # add columns for 'season' and 'month'
        df['SEASON'] = season
        df['MONTH'] = month
        
        # append the DataFrame to the list
        ldts_list.append(df)
        
        # add time delay between requests
        time.sleep(3)

# concatenate all DataFrames in the list into one large DataFrame
ldts_df = pd.concat(ldts_list, ignore_index=True)

In [None]:
ldts_df.head()

In [None]:
ldts_df.to_csv('../data/original/nba_dash_team_stats_1997_2024.csv', index=False)

In [None]:
ldts_df.value_counts('TEAM_NAME')

In [None]:
BC_2023 = ldts_df[(ldts_df['TEAM_NAME'] == 'Boston Celtics') & (ldts_df['SEASON'] == '2022-23')]
BC_2023.head()

# Player Career Statistics

In [None]:
# get player data
player_ids = nba_players_df['id'].tolist()

players_stats_list = []

for id in player_ids:
    print(id)
    # query for games
    career = playercareerstats.PlayerCareerStats(player_id=id)
    # we want the first DataFrame of those returned
    players_stats_list.append(career.get_data_frames()[0])
    # add time delay between requests
    time.sleep(1)

In [None]:
players_stats_df = pd.concat(players_stats_list)
print(players_stats_df.shape)
players_stats_df.head()

In [None]:
# get the latest season
earliest_date = players_stats_df['SEASON_ID'].min()
print(earliest_date)

In [None]:
players_stats_df.to_csv('../data/original/nba_players_statistics_1946_2024.csv', index=False)

# Box Score Matchups V3

In [None]:
games_df = pd.read_csv('../data/original/nba_games_box_scores_1984_2024.csv')
# get player data
game_ids = games_df['GAME_ID'].tolist()

In [None]:
game_ids[5000]

In [None]:
bs_matchups = boxscorematchupsv3.BoxScoreMatchupsV3(game_id=21000400)
#bs_matchups.get_data_frames()

# Box Score Advanced V3

In [None]:
bs_adv = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=21000400)
check = bs_adv.get_data_frames()[1]
check

In [None]:
# function to get game_ids
def get_game_ids(season_id):
    game_ids = []
    game_ids = games_df['GAME_ID'][(games_df.SEASON_ID == season_id)].tolist()
    return game_ids

#get game ids for 2023 - 2024 season
game_ids_2023_2024 = get_game_ids('22023')

#get game ids for 2022 - 2023 season
game_ids_2022_2023 = get_game_ids('22022')

#get game ids for 2021 - 2022 season
game_ids_2021_2022 = get_game_ids('22021')
game_ids_2021_2022

In [None]:
# function to get team advanced stats per game for a given season
def get_adv_stats_df(game_id_list):
    adv_games_stats_list = []
    for id in game_id_list:
        print(id)
        # query for games
        games = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id=id)
        adv_games_stats_list.append(games.get_data_frames()[1])
        time.sleep(3)
    adv_stats_df = pd.concat(adv_games_stats_list, ignore_index=True)
    adv_stats_df = adv_stats_df.drop_duplicates()
    return adv_stats_df

In [None]:
# get advanced stats df for 2021 - 2022 season
adv_stats_df_2021_2022 = get_adv_stats_df(game_ids_2021_2022)
adv_stats_df_2021_2022.head()

In [None]:
# get advanced stats df for 2022 - 2023 season
adv_stats_df_2022_2023 = get_adv_stats_df(game_ids_2022_2023)
adv_stats_df_2022_2023.head()

In [None]:
# get advanced stats df for 2023 - 2024 season
adv_stats_df_2023_2024 = get_adv_stats_df(game_ids_2023_2024)
adv_stats_df_2023_2024.head()

In [None]:
#combine advanced stat dataframes into one combined datafram
adv_stats_frames = [adv_stats_df_2021_2022, adv_stats_df_2022_2023, adv_stats_df_2023_2024]
adv_stats_df = pd.concat(adv_stat_frames)
adv_stats_df.head()

In [None]:
#creates team name field to match games_df
adv_stats_df['TEAM_NAME'] = adv_stats_df['teamCity'] + " " + adv_stats_df['teamName']

#renames fields that match games_df
adv_stats_df.rename(columns={'gameId':'GAME_ID','teamId':'TEAM_ID', 'teamTricode': 'TEAM_ABBREVIATION'}, inplace=True)

#drop redundant columns
adv_stats_df.drop(['teamCity', 'teamName', 'teamSlug'], inplace=True, axis=1)

adv_stats_df.head()

In [None]:
#get GAME_DATE, MATCHUP, GAME_ID, TEAM_ABBREVIATION fields from games_df
adv_stats_df = pd.merge(adv_stats_df, games_df[['SEASON_ID','GAME_DATE','MATCHUP', 'GAME_ID','TEAM_ID', 'TEAM_ABBREVIATION']], on=['GAME_ID','TEAM_ID', 'TEAM_ABBREVIATION'])

adv_stats_df.head()

In [None]:
#export csv
adv_stats_df.to_csv('../data/original/nba_advanced_statistics_2021_2024.csv', index=False)

# Team Game Logs

In [None]:
mav_id = '1610612742'
logs = teamgamelogs.TeamGameLogs.DataSet(data=)
#logs.get_data_frame(data=)

# Team Estimated Metrics

In [None]:
team_metrics = teamestimatedmetrics.TeamEstimatedMetrics.DataSet(data=)

In [None]:
from nba_api.stats.endpoints import commonplayerinfo

# Basic Request
player_info = commonplayerinfo.CommonPlayerInfo(player_id=2544)

In [None]:
player_info.available_seasons.get_data_frame()