In [1]:
import pandas as pd 
import nba_api as nba 
import requests


In [2]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import scoreboard
from nba_api.stats.endpoints import boxscoreadvancedv2

In [3]:
#helper function to create list of dates in a given range, which will be used for each NBA season studied
def get_regular_season_dates(begin, end):
    datelist = pd.date_range(start=begin, end=end).tolist()
    return datelist

In [6]:
date = '2018-10-31'

In [7]:
day_game_data = scoreboard.Scoreboard(day_offset=0 , game_date=date, league_id='00')

ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)

In [4]:
#input begin and end date, return dataframe with all game results and features 
def get_nba_data(begin, end):
    dates = get_regular_season_dates(begin, end)
    yearly_sample = []
    headers = {
    'Host': 'stats.nba.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    }
    for date in dates:
        day_game_data = scoreboard.Scoreboard(day_offset=0 , game_date=date, league_id='00', headers=headers)
        games = day_game_data.get_data_frames()
        summary = games[0]
        game_ids = summary['GAME_ID'].tolist()
        daily_sample = []
        for game_id in game_ids:
            game = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=game_id)
            game = game.get_data_frames()
            outcome = game[1]
            outcome = outcome.stack().to_frame().T
            outcome.columns = ['{}_{}'.format(*c) for c in outcome.columns]
            daily_sample.append(outcome)
        yearly_sample.extend(daily_sample)
    df = pd.concat(yearly_sample)
    df = df.drop('1_GAME_ID', 1)
    df = df.rename({'0_GAME_ID': 'GAME_ID'}, axis=1)
    df = df.set_index('GAME_ID')
    return df 
        

    

In [5]:
test = get_nba_data('2018-10-31', '2018-11-10')

ReadTimeout: HTTPSConnectionPool(host='stats.nba.com', port=443): Read timed out. (read timeout=30)

In [None]:
test.head()

In [30]:
test = test.reset_index()

In [32]:
test.head()

Unnamed: 0,GAME_ID,0_TEAM_ID,0_TEAM_NAME,0_TEAM_ABBREVIATION,0_TEAM_CITY,0_MIN,0_E_OFF_RATING,0_OFF_RATING,0_E_DEF_RATING,0_DEF_RATING,...,1_TM_TOV_PCT,1_EFG_PCT,1_TS_PCT,1_USG_PCT,1_E_USG_PCT,1_E_PACE,1_PACE,1_PACE_PER40,1_POSS,1_PIE
0,21800104,1610612765,Pistons,DET,Detroit,265:00,105.2,107.2,107.0,108.1,...,14.4,0.485,0.525,1,0.198,102.05,100.53,83.77,111,0.499
1,21800105,1610612752,Knicks,NYK,New York,240:00,103.5,103.1,112.1,111.5,...,15.6,0.578,0.591,1,0.199,96.52,97.0,80.83,96,0.583
2,21800106,1610612743,Nuggets,DEN,Denver,265:00,102.0,106.9,103.0,104.9,...,13.7,0.511,0.535,1,0.197,95.02,91.92,76.6,102,0.47
3,21800107,1610612762,Jazz,UTA,Utah,240:00,120.0,120.2,121.3,123.1,...,15.4,0.651,0.663,1,0.197,104.88,104.0,86.67,104,0.521
4,21800108,1610612740,Pelicans,NOP,New Orleans,240:00,112.2,114.2,122.2,122.4,...,15.9,0.618,0.641,1,0.2,107.54,106.5,88.75,107,0.58


In [None]:
#function to create the time series features for each team 
def create_features(df):
    away_team_ids = df['0_TEAM_ID'].to_list()
    away_team_ids = set(away_team_ids)
    home_team_ids = df['1_TEAM_ID'].to_list()
    home_team_ids = set(home_team_ids)
    for i, j in iterrows():
        

In [48]:
test[1].columns

Index(['0_GAME_ID', '0_TEAM_ID', '0_TEAM_NAME', '0_TEAM_ABBREVIATION',
       '0_TEAM_CITY', '0_MIN', '0_E_OFF_RATING', '0_OFF_RATING',
       '0_E_DEF_RATING', '0_DEF_RATING', '0_E_NET_RATING', '0_NET_RATING',
       '0_AST_PCT', '0_AST_TOV', '0_AST_RATIO', '0_OREB_PCT', '0_DREB_PCT',
       '0_REB_PCT', '0_E_TM_TOV_PCT', '0_TM_TOV_PCT', '0_EFG_PCT', '0_TS_PCT',
       '0_USG_PCT', '0_E_USG_PCT', '0_E_PACE', '0_PACE', '0_PACE_PER40',
       '0_POSS', '0_PIE', '1_GAME_ID', '1_TEAM_ID', '1_TEAM_NAME',
       '1_TEAM_ABBREVIATION', '1_TEAM_CITY', '1_MIN', '1_E_OFF_RATING',
       '1_OFF_RATING', '1_E_DEF_RATING', '1_DEF_RATING', '1_E_NET_RATING',
       '1_NET_RATING', '1_AST_PCT', '1_AST_TOV', '1_AST_RATIO', '1_OREB_PCT',
       '1_DREB_PCT', '1_REB_PCT', '1_E_TM_TOV_PCT', '1_TM_TOV_PCT',
       '1_EFG_PCT', '1_TS_PCT', '1_USG_PCT', '1_E_USG_PCT', '1_E_PACE',
       '1_PACE', '1_PACE_PER40', '1_POSS', '1_PIE'],
      dtype='object')

In [6]:
dates = get_regular_season_dates('2018-10-31', '2019-3-28')

In [17]:
day_game_data = scoreboard.Scoreboard(day_offset=0 , game_date='2019-03-27' , league_id='00') 

In [18]:
day_game_data

<nba_api.stats.endpoints.scoreboard.Scoreboard at 0x114734ad0>

In [19]:
games = day_game_data.get_data_frames()

In [20]:
len(games)

7

In [21]:
summary = games[0]

In [22]:
outcome = games[1]

In [23]:
summary['outcome'] = outcome.iloc[0,21] - outcome.iloc[1,21]

In [24]:
summary

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,WH_STATUS,outcome
0,2019-03-27T00:00:00,1,21801117,3,Final,20190327/PORCHI,1610612741,1610612757,2018,4,,,Q4 -,1,20
1,2019-03-27T00:00:00,2,21801118,3,Final,20190327/GSWMEM,1610612763,1610612744,2018,4,,,Q4 -,1,20
2,2019-03-27T00:00:00,3,21801119,3,Final,20190327/INDOKC,1610612760,1610612754,2018,4,,ESPN,Q4 - ESPN,1,20
3,2019-03-27T00:00:00,4,21801120,3,Final,20190327/WASPHX,1610612756,1610612764,2018,4,,,Q4 -,1,20
4,2019-03-27T00:00:00,5,21801121,3,Final,20190327/LALUTA,1610612762,1610612747,2018,4,,ESPN,Q4 - ESPN,1,20


In [39]:
outcome.columns

Index(['GAME_DATE_EST', 'GAME_SEQUENCE', 'GAME_ID', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'TEAM_CITY_NAME', 'TEAM_WINS_LOSSES', 'PTS_QTR1',
       'PTS_QTR2', 'PTS_QTR3', 'PTS_QTR4', 'PTS_OT1', 'PTS_OT2', 'PTS_OT3',
       'PTS_OT4', 'PTS_OT5', 'PTS_OT6', 'PTS_OT7', 'PTS_OT8', 'PTS_OT9',
       'PTS_OT10', 'PTS', 'FG_PCT', 'FT_PCT', 'FG3_PCT', 'AST', 'REB', 'TOV'],
      dtype='object')

In [25]:
outcome

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY_NAME,TEAM_WINS_LOSSES,PTS_QTR1,PTS_QTR2,PTS_QTR3,...,PTS_OT8,PTS_OT9,PTS_OT10,PTS,FG_PCT,FT_PCT,FG3_PCT,AST,REB,TOV
0,2019-03-27T00:00:00,1,21801117,1610612757,POR,Portland,47-27,27,32,28,...,0,0,0,118,0.518,0.84,0.344,30,45,8
1,2019-03-27T00:00:00,1,21801117,1610612741,CHI,Chicago,21-55,14,27,24,...,0,0,0,98,0.429,0.789,0.217,22,44,8
2,2019-03-27T00:00:00,2,21801118,1610612744,GSW,Golden State,51-23,31,35,23,...,0,0,0,118,0.536,0.867,0.484,30,46,14
3,2019-03-27T00:00:00,2,21801118,1610612763,MEM,Memphis,30-45,37,23,24,...,0,0,0,103,0.39,0.714,0.323,22,51,9
4,2019-03-27T00:00:00,3,21801119,1610612754,IND,Indiana,45-30,27,27,15,...,0,0,0,99,0.435,0.545,0.419,26,45,11
5,2019-03-27T00:00:00,3,21801119,1610612760,OKC,Oklahoma City,44-31,29,16,31,...,0,0,0,107,0.446,0.632,0.361,29,44,6
6,2019-03-27T00:00:00,4,21801120,1610612764,WAS,Washington,31-45,27,32,30,...,0,0,0,124,0.478,0.674,0.36,18,52,10
7,2019-03-27T00:00:00,4,21801120,1610612756,PHX,Phoenix,17-59,30,29,31,...,0,0,0,121,0.494,0.808,0.378,22,48,15
8,2019-03-27T00:00:00,5,21801121,1610612747,LAL,Los Angeles,33-42,25,28,18,...,0,0,0,100,0.381,0.708,0.333,21,40,4
9,2019-03-27T00:00:00,5,21801121,1610612762,UTA,Utah,45-30,32,28,27,...,0,0,0,115,0.494,0.639,0.313,33,54,11


In [None]:
def isolate

In [None]:
game_ids = 

In [None]:
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=)

In [6]:
game = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id='0021801117')

In [7]:
game = game.get_data_frames()

In [8]:
len(game)

2

In [9]:
game[0].columns

Index(['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'PLAYER_ID',
       'PLAYER_NAME', 'START_POSITION', 'COMMENT', 'MIN', 'E_OFF_RATING',
       'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING', 'E_NET_RATING',
       'NET_RATING', 'AST_PCT', 'AST_TOV', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT',
       'REB_PCT', 'TM_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'USG_PCT', 'E_USG_PCT',
       'E_PACE', 'PACE', 'PACE_PER40', 'POSS', 'PIE'],
      dtype='object')

In [10]:
game[1].columns

Index(['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY',
       'MIN', 'E_OFF_RATING', 'OFF_RATING', 'E_DEF_RATING', 'DEF_RATING',
       'E_NET_RATING', 'NET_RATING', 'AST_PCT', 'AST_TOV', 'AST_RATIO',
       'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'E_TM_TOV_PCT', 'TM_TOV_PCT',
       'EFG_PCT', 'TS_PCT', 'USG_PCT', 'E_USG_PCT', 'E_PACE', 'PACE',
       'PACE_PER40', 'POSS', 'PIE'],
      dtype='object')

In [11]:
o = game[1]

In [12]:
o['NET_RATING']

0    20.6
1   -20.6
Name: NET_RATING, dtype: float64

In [13]:
df = game[1]

In [14]:
df = df.stack().to_frame().T
df.columns = ['{}_{}'.format(*c) for c in df.columns]

In [15]:
df

Unnamed: 0,0_GAME_ID,0_TEAM_ID,0_TEAM_NAME,0_TEAM_ABBREVIATION,0_TEAM_CITY,0_MIN,0_E_OFF_RATING,0_OFF_RATING,0_E_DEF_RATING,0_DEF_RATING,...,1_TM_TOV_PCT,1_EFG_PCT,1_TS_PCT,1_USG_PCT,1_E_USG_PCT,1_E_PACE,1_PACE,1_PACE_PER40,1_POSS,1_PIE
0,21801117,1610612757,Trail Blazers,POR,Portland,240:00,126.9,128.3,105,107.7,...,11,0.456,0.493,1,0.194,93.18,91.5,76.25,91,0.373


In [None]:
df['outcome']

In [32]:
df.columns

Index(['0_GAME_ID', '0_TEAM_ID', '0_TEAM_NAME', '0_TEAM_ABBREVIATION',
       '0_TEAM_CITY', '0_MIN', '0_E_OFF_RATING', '0_OFF_RATING',
       '0_E_DEF_RATING', '0_DEF_RATING', '0_E_NET_RATING', '0_NET_RATING',
       '0_AST_PCT', '0_AST_TOV', '0_AST_RATIO', '0_OREB_PCT', '0_DREB_PCT',
       '0_REB_PCT', '0_E_TM_TOV_PCT', '0_TM_TOV_PCT', '0_EFG_PCT', '0_TS_PCT',
       '0_USG_PCT', '0_E_USG_PCT', '0_E_PACE', '0_PACE', '0_PACE_PER40',
       '0_POSS', '0_PIE', '1_GAME_ID', '1_TEAM_ID', '1_TEAM_NAME',
       '1_TEAM_ABBREVIATION', '1_TEAM_CITY', '1_MIN', '1_E_OFF_RATING',
       '1_OFF_RATING', '1_E_DEF_RATING', '1_DEF_RATING', '1_E_NET_RATING',
       '1_NET_RATING', '1_AST_PCT', '1_AST_TOV', '1_AST_RATIO', '1_OREB_PCT',
       '1_DREB_PCT', '1_REB_PCT', '1_E_TM_TOV_PCT', '1_TM_TOV_PCT',
       '1_EFG_PCT', '1_TS_PCT', '1_USG_PCT', '1_E_USG_PCT', '1_E_PACE',
       '1_PACE', '1_PACE_PER40', '1_POSS', '1_PIE'],
      dtype='object')

In [None]:
def create_winner(summary):
    summary['winner'] = 0
    if 

In [10]:
outcome

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY_NAME,TEAM_WINS_LOSSES,PTS_QTR1,PTS_QTR2,PTS_QTR3,...,PTS_OT8,PTS_OT9,PTS_OT10,PTS,FG_PCT,FT_PCT,FG3_PCT,AST,REB,TOV
0,2019-03-27T00:00:00,1,21801117,1610612757,POR,Portland,47-27,27,32,28,...,0,0,0,118,0.518,0.84,0.344,30,45,8
1,2019-03-27T00:00:00,1,21801117,1610612741,CHI,Chicago,21-55,14,27,24,...,0,0,0,98,0.429,0.789,0.217,22,44,8
2,2019-03-27T00:00:00,2,21801118,1610612744,GSW,Golden State,51-23,31,35,23,...,0,0,0,118,0.536,0.867,0.484,30,46,14
3,2019-03-27T00:00:00,2,21801118,1610612763,MEM,Memphis,30-45,37,23,24,...,0,0,0,103,0.39,0.714,0.323,22,51,9
4,2019-03-27T00:00:00,3,21801119,1610612754,IND,Indiana,45-30,27,27,15,...,0,0,0,99,0.435,0.545,0.419,26,45,11
5,2019-03-27T00:00:00,3,21801119,1610612760,OKC,Oklahoma City,44-31,29,16,31,...,0,0,0,107,0.446,0.632,0.361,29,44,6
6,2019-03-27T00:00:00,4,21801120,1610612764,WAS,Washington,31-45,27,32,30,...,0,0,0,124,0.478,0.674,0.36,18,52,10
7,2019-03-27T00:00:00,4,21801120,1610612756,PHX,Phoenix,17-59,30,29,31,...,0,0,0,121,0.494,0.808,0.378,22,48,15
8,2019-03-27T00:00:00,5,21801121,1610612747,LAL,Los Angeles,33-42,25,28,18,...,0,0,0,100,0.381,0.708,0.333,21,40,4
9,2019-03-27T00:00:00,5,21801121,1610612762,UTA,Utah,45-30,32,28,27,...,0,0,0,115,0.494,0.639,0.313,33,54,11


In [12]:
summary

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,GAME_STATUS_ID,GAME_STATUS_TEXT,GAMECODE,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,LIVE_PERIOD,LIVE_PC_TIME,NATL_TV_BROADCASTER_ABBREVIATION,LIVE_PERIOD_TIME_BCAST,WH_STATUS
0,2019-03-27T00:00:00,1,21801117,3,Final,20190327/PORCHI,1610612741,1610612757,2018,4,,,Q4 -,1
1,2019-03-27T00:00:00,2,21801118,3,Final,20190327/GSWMEM,1610612763,1610612744,2018,4,,,Q4 -,1
2,2019-03-27T00:00:00,3,21801119,3,Final,20190327/INDOKC,1610612760,1610612754,2018,4,,ESPN,Q4 - ESPN,1
3,2019-03-27T00:00:00,4,21801120,3,Final,20190327/WASPHX,1610612756,1610612764,2018,4,,,Q4 -,1
4,2019-03-27T00:00:00,5,21801121,3,Final,20190327/LALUTA,1610612762,1610612747,2018,4,,ESPN,Q4 - ESPN,1


In [11]:
outcome.columns

Index(['GAME_DATE_EST', 'GAME_SEQUENCE', 'GAME_ID', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'TEAM_CITY_NAME', 'TEAM_WINS_LOSSES', 'PTS_QTR1',
       'PTS_QTR2', 'PTS_QTR3', 'PTS_QTR4', 'PTS_OT1', 'PTS_OT2', 'PTS_OT3',
       'PTS_OT4', 'PTS_OT5', 'PTS_OT6', 'PTS_OT7', 'PTS_OT8', 'PTS_OT9',
       'PTS_OT10', 'PTS', 'FG_PCT', 'FT_PCT', 'FG3_PCT', 'AST', 'REB', 'TOV'],
      dtype='object')