<div class="alert alert-danger">
    <h4 style="font-weight: bold; font-size: 28px;">NBA API</h4>
    <p style="font-size: 20px;">Data Gathering</p>
</div>

<a name="NBA"></a>

# Setup

In [35]:
import pandas as pd
from datetime import datetime, timedelta
import time

In [69]:
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import scoreboard, leaguegamefinder, playercareerstats
from nba_api.stats.endpoints import boxscorematchupsv3, boxscoreadvancedv2, teamestimatedmetrics, teamgamelogs
from nba_api.stats.endpoints import TeamGameLogs, TeamEstimatedMetrics

# Team Data

In [13]:
# get_teams returns a list of 30 dictionaries, each an NBA team
nba_teams = teams.get_teams()
print("Number of teams fetched: {}".format(len(nba_teams)))
nba_teams_df = pd.DataFrame(nba_teams)
nba_teams_df.head()

Number of teams fetched: 30


Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966


# Players

In [33]:
# get_players returns a list of dictionaries, each representing a player
nba_players = players.get_players()
print("Number of players fetched: {}".format(len(nba_players)))
nba_players_df = pd.DataFrame(nba_players)
nba_players_df.head()

Number of players fetched: 4900


Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False


# Scoreboard

In [None]:
# Today's Score Board
games = scoreboard.ScoreBoard()

# json
games.get_json()

# dictionary
games.get_dict()

# League Game Finder

In [None]:
# get game data
team_ids = nba_teams_df['id'].tolist()

games_list = []

for id in team_ids:
    print(id)
    # query for games
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=id)
    # we want the first DataFrame of those returned
    games_list.append(gamefinder.get_data_frames()[0])
    # add time delay between requests
    time.sleep(3)

In [18]:
games_df = pd.concat(games_list)
print(games_df.shape)
games_df.head()

(104437, 28)


Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612737,ATL,Atlanta Hawks,22300595,2024-01-20,ATL vs. CLE,L,239,95,...,0.667,11.0,34.0,45.0,25,5.0,4,11,15,-21.0
1,22023,1610612737,ATL,Atlanta Hawks,22300587,2024-01-19,ATL @ MIA,W,241,109,...,0.824,9.0,29.0,38.0,23,6.0,2,13,20,1.0
2,22023,1610612737,ATL,Atlanta Hawks,22300570,2024-01-17,ATL vs. ORL,W,239,106,...,0.909,10.0,31.0,41.0,21,8.0,2,23,17,2.0
3,22023,1610612737,ATL,Atlanta Hawks,22300559,2024-01-15,ATL vs. SAS,W,241,109,...,0.762,13.0,44.0,57.0,24,8.0,6,16,19,10.0
4,22023,1610612737,ATL,Atlanta Hawks,22300543,2024-01-13,ATL vs. WAS,L,240,99,...,0.727,16.0,34.0,50.0,21,9.0,7,14,20,-28.0


In [25]:
games_df['GAME_DATE'] = pd.to_datetime(games_df['GAME_DATE'])

# get the latest date
earliest_date = games_df['GAME_DATE'].max()
print(earliest_date)

2024-01-21 00:00:00


In [26]:
games_df.to_csv('../data/original/nba_games_1983_2024.csv', index=False)

# Player Career Statistics

In [None]:
# get player data
player_ids = nba_players_df['id'].tolist()

players_stats_list = []

for id in player_ids:
    print(id)
    # query for games
    career = playercareerstats.PlayerCareerStats(player_id=id)
    # we want the first DataFrame of those returned
    players_stats_list.append(career.get_data_frames()[0])
    # add time delay between requests
    time.sleep(1)

In [38]:
players_stats_df = pd.concat(players_stats_list)
print(players_stats_df.shape)
players_stats_df.head()

(29746, 27)


Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,76001,1990-91,0,1610612757,POR,23.0,43,0,290.0,55,...,0.568,27,62,89,12,4,12,22,39,135
1,76001,1991-92,0,1610612757,POR,24.0,71,1,934.0,178,...,0.752,81,179,260,30,25,16,66,132,432
2,76001,1992-93,0,1610612749,MIL,25.0,12,0,159.0,26,...,0.75,12,25,37,10,6,4,13,24,64
3,76001,1992-93,0,1610612738,BOS,25.0,63,52,1152.0,219,...,0.76,114,186,300,17,19,22,84,165,514
4,76001,1992-93,0,0,TOT,25.0,75,52,1311.0,245,...,0.759,126,211,337,27,25,26,97,189,578


In [40]:
# get the latest season
earliest_date = players_stats_df['SEASON_ID'].min()
print(earliest_date)

1946-47


In [41]:
players_stats_df.to_csv('../data/original/nba_players_statistics_1946_2024.csv', index=False)

# Box Score Matchups V3

In [43]:
games_df = pd.read_csv('../data/original/nba_games_1983_2024.csv')
# get player data
game_ids = games_df['GAME_ID'].tolist()

In [63]:
game_ids[5000]

21000400

In [None]:
bs_matchups = boxscorematchupsv3.BoxScoreMatchupsV3(game_id=21000400)
#bs_matchups.get_data_frames()

# Box Score Advanced V2

In [None]:
bs_adv = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=21000400)
bs_adv.data_sets.get_data_frame()

# Team Game Logs

In [None]:
mav_id = '1610612742'
logs = teamgamelogs.TeamGameLogs.DataSet(data=)
#logs.get_data_frame(data=)

# Team Estimated Metrics

In [None]:
team_metrics = teamestimatedmetrics.TeamEstimatedMetrics.DataSet(data=)

In [65]:
from nba_api.stats.endpoints import commonplayerinfo

# Basic Request
player_info = commonplayerinfo.CommonPlayerInfo(player_id=2544)

In [66]:
player_info.available_seasons.get_data_frame()

Unnamed: 0,SEASON_ID
0,12003
1,22003
2,12004
3,22004
4,32004
...,...
75,42022
76,52022
77,12023
78,22023
