In [None]:
import pandas as pd
import time

from nba_api.stats.endpoints import leaguestandings
from nba_api.stats.endpoints import teamyearbyyearstats
from nba_api.stats.endpoints import commonteamroster
from nba_api.stats.endpoints import playercareerstats

Pull data from the LeagueStandings endpoint

Features: ['LeagueID', 'SeasonID', 'TeamID', 'TeamCity', 'TeamName', 'Conference', 'ConferenceRecord', 'PlayoffRank', 'ClinchIndicator', 'Division', 'DivisionRecord', 'DivisionRank', 'WINS', 'LOSSES', 'WinPCT', 'LeagueRank', 'Record', 'HOME', 'ROAD', 'L10', 'Last10Home', 'Last10Road', 'OT', 'ThreePTSOrLess', 'TenPTSOrMore', 'LongHomeStreak', 'strLongHomeStreak', 'LongRoadStreak', 'strLongRoadStreak', 'LongWinStreak', 'LongLossStreak', 'CurrentHomeStreak', 'strCurrentHomeStreak', 'CurrentRoadStreak', 'strCurrentRoadStreak', 'CurrentStreak', 'strCurrentStreak', 'ConferenceGamesBack', 'DivisionGamesBack', 'ClinchedConferenceTitle', 'ClinchedDivisionTitle', 'ClinchedPlayoffBirth', 'EliminatedConference', 'EliminatedDivision', 'AheadAtHalf', 'BehindAtHalf', 'TiedAtHalf', 'AheadAtThird', 'BehindAtThird', 'TiedAtThird', 'Score100PTS', 'OppScore100PTS', 'OppOver500', 'LeadInFGPCT', 'LeadInReb', 'FewerTurnovers', 'PointsPG', 'OppPointsPG', 'DiffPointsPG', 'vsEast', 'vsAtlantic', 'vsCentral', 'vsSoutheast', 'vsWest', 'vsNorthwest', 'vsPacific', 'vsSouthwest', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'PreAS', 'PostAS']

In [None]:
standings_all_years = pd.DataFrame()

In [None]:
#Pull standings for each year between 1982-2023 !!!May take long!!!
for year in range (1982, 2023):
    standings = leaguestandings.LeagueStandings(league_id="00", season=year, season_type="Regular Season")
    standingsdf = standings.get_data_frames()[0]
    standings_all_years = standings_all_years.append(standingsdf, ignore_index=True)
    print(year)
    time.sleep(2)

In [None]:
standings_all_years.to_csv('standings_complete.csv', index=False)

Some data cleaning and feature selection before exporting to CSV

In [None]:

standings_selected = standings_all_years[['SeasonID', 'TeamID', 'TeamName', 'OppPointsPG', 'HOME', 'PlayoffRank']]
standings_selected['SeasonID'] = standings_selected['SeasonID'].astype(str)
standings_selected['SeasonID'] = standings_selected['SeasonID'].str[1:]
standings_selected['SeasonID'] = standings_selected['SeasonID'].astype(int)
standings_selected = standings_selected[standings_selected['SeasonID']>1983]

In [None]:
standings_selected.to_csv('standings_selected_feat.csv', index=False)

Pull data from the TeamYearByYear endpoint

Features: ['TEAM_ID', 'TEAM_CITY', 'TEAM_NAME', 'YEAR', 'GP', 'WINS', 'LOSSES', 'WIN_PCT', 'CONF_RANK', 'DIV_RANK', 'PO_WINS', 'PO_LOSSES', 'CONF_COUNT', 'DIV_COUNT', 'NBA_FINALS_APPEARANCE', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'PF', 'STL', 'TOV', 'BLK', 'PTS', 'PTS_RANK']

In [None]:
result_all_teams = pd.DataFrame()

In [None]:
#Pull stats for each team
for team in pd.unique(standings_selected['TeamID']):

    by_team = teamyearbyyearstats.TeamYearByYearStats(team_id=team, per_mode_simple='PerGame', season_type_all_star="Regular Season") 
    by_team = by_team.get_data_frames()[0]
    by_team['YEAR'] = by_team['YEAR'].str[:4]
    by_team['AST_TOV_RAT'] = by_team['AST']/by_team['TOV']
    by_team = by_team.drop(['AST', 'TOV'], axis=1)
    by_team = by_team[['TEAM_ID', 'TEAM_NAME', 'YEAR', 'WIN_PCT', 'FG_PCT', 'REB', 'AST_TOV_RAT', 'PTS']]
    print(team)
    
    result_all_teams = result_all_teams.append(by_team, ignore_index=True)
    time.sleep(2)

Some data cleaning before exporting to CSV

In [None]:
result_all_teams['YEAR'] =result_all_teams['YEAR'].astype(int)
result_all_teams[result_all_teams['FG_PCT']==0].head(50)
result_all_teams = result_all_teams[result_all_teams['YEAR']>1981].reset_index()
result_all_teams = result_all_teams.drop(['level_0', 'index'], axis=1)


In [None]:
result_all_teams.to_csv('result_all_teams.csv', index=False)

Pull data from the CommonTeamRoster endpoint

Features: ['TeamID', 'SEASON', 'LeagueID', 'PLAYER', 'PLAYER_SLUG', 'NUM', 'POSITION', 'HEIGHT', 'WEIGHT', 'BIRTH_DATE', 'AGE', 'EXP', 'SCHOOL', 'PLAYER_ID']

In [None]:
all_rosters = pd.DataFrame()

In [None]:
#Pull roster for each team for each year between 1984-2023 !!!May take long!!!
for team in pd.unique(standings_selected['TeamID']):
    for year in range(1984, 2023):
        roster = commonteamroster.CommonTeamRoster(team_id=team, season=year)
        roster = roster.get_data_frames()[0]
        all_rosters = all_rosters.append(roster, ignore_index=True)
        print(team, year)
        time.sleep(1)

Some data cleaning before exporting to CSV

In [None]:
all_rosters['SEASON'] = all_rosters['SEASON'].astype(int)
all_rosters = all_rosters.rename(columns={'TeamID': 'TEAM_ID', 'SEASON': 'SEASON_ID'})

In [None]:
all_rosters.to_csv('all_rosters_1984.csv', index=False)

Pull data from the PlayerCareerStats endpoint

Features: ['PLAYER_ID', 'SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']

In [None]:
all_players_1984 = pd.DataFrame()

#Creates a list with all players featured in the rosters dataset
player_list = pd.unique(all_rosters['PLAYER_ID']).tolist()

In [None]:
#Pull stats for each player featured in the rosters dataset !!!May take long!!!
for count, player_id in enumerate(player_list):
        player = playercareerstats.PlayerCareerStats(player_id=player_id)
        player = player.get_data_frames()[0]
        all_players_1984 = all_players_1984.append(player, ignore_index=True)
        print(count, player_id)
        time.sleep(1)

Some data cleaning before exporting to CSV

In [None]:
all_players_1984['SEASON_ID'] = all_players_1984['SEASON_ID'].str[:4]
all_players_1984['SEASON_ID'] = all_players_1984['SEASON_ID'].astype(int)

In [None]:
all_players_1984.to_csv('all_players_1984.csv', index=False)

Combine data from the rosters with data from each player

In [None]:
#Merge the stats for each player with the corresponding team and year
rosters_players_combined = all_rosters.merge(all_players_1984, how='inner', on=['PLAYER_ID', 'SEASON_ID', 'TEAM_ID'])

In [None]:
rosters_players_combined.to_csv('rosters_players_combined.csv', index=False)