In [1]:
import requests
import requests_cache
import networkx as nx
import pandas as pd

In [2]:
from fake_useragent import UserAgent
from functools import partial, wraps

In [3]:
requests_cache.install_cache(
    'nba-rank',
    expire_after=24*60*60,
    backend='sqlite'
)

In [4]:
def apply_to_output(callback, *cb_args, **cb_kwargs):

    def decorator(func):

        @wraps(func)
        def new_func(*args, **kwargs):
            return callback(func(*args, **kwargs), *cb_args, **cb_kwargs)

        return new_func

    return decorator

In [5]:
def get_json(url, *args, **kwargs):
    r = requests.get(url=url, *args, **kwargs)
    r.raise_for_status()
    return r.json()

In [6]:
ua = UserAgent()

In [7]:
get_json = partial(get_json, headers={'User-Agent': ua.google})

In [8]:
def nested_dict_from_lst(lst, key):
    return {d.pop(key): d for d in lst}

In [9]:
def normalize_dict(dct, frame_name_key='name', frames_key='resultSets',
                   frames_default=[]):
    return nested_dict_from_lst(dct.get(frames_key, frames_default),
                                key=frame_name_key)

In [10]:
def to_data_frame(frames_dict, frame_name, frame_index=None,
                  frame_data_key='rowSet', frame_columns_key='headers'):
    return pd.DataFrame.from_records(
               data=frames_dict[frame_name][frame_data_key],
               columns=frames_dict[frame_name][frame_columns_key],
               index=frame_index).dropna()

In [47]:
@apply_to_output(to_data_frame, frame_name='LeagueGameLog', frame_index=['GAME_ID'])
@apply_to_output(normalize_dict)
def games(league_id='00', season='2015-16', season_type='Regular Season',
          team=True, sort_by='date', ascending=True):

    return get_json(url='http://stats.nba.com/stats/LeagueGameLog',
                    params={'LeagueID': league_id,
                            'PlayerOrTeam': 'T' if team else 'P',
                            'Season': season,
                            'SeasonType': season_type,
                            'Sorter': sort_by.upper(),
                            'Direction': 'ASC' if ascending else 'DESC'})

In [12]:
@apply_to_output(to_data_frame, frame_name='TeamYears', frame_index=['TEAM_ID'])
@apply_to_output(normalize_dict)
def teams(league_id='00'):

    return get_json(url='http://stats.nba.com/stats/commonTeamYears',
                    params={'LeagueID': league_id})

In [13]:
@apply_to_output(to_data_frame, frame_name='TeamInfoCommon', frame_index=['TEAM_ID'])
@apply_to_output(normalize_dict)
def team(team_id, league_id='00', season='2015-16',
         season_type='Regular Season'):

    return get_json(url='http://stats.nba.com/stats/TeamInfoCommon',
                    params={'LeagueID': league_id,
                            'TeamID': team_id,
                            'Season': season,
                            'SeasonType': season_type})

In [58]:
pd.to_datetime(games()['GAME_DATE'], format='%Y-%m-%d')

GAME_ID
0021500001   2015-10-27
0021500001   2015-10-27
0021500002   2015-10-27
0021500002   2015-10-27
0021500003   2015-10-27
0021500003   2015-10-27
0021500010   2015-10-28
0021500010   2015-10-28
0021500011   2015-10-28
0021500011   2015-10-28
0021500012   2015-10-28
0021500012   2015-10-28
0021500013   2015-10-28
0021500013   2015-10-28
0021500014   2015-10-28
0021500014   2015-10-28
0021500015   2015-10-28
0021500015   2015-10-28
0021500004   2015-10-28
0021500004   2015-10-28
0021500005   2015-10-28
0021500005   2015-10-28
0021500006   2015-10-28
0021500006   2015-10-28
0021500007   2015-10-28
0021500007   2015-10-28
0021500008   2015-10-28
0021500008   2015-10-28
0021500009   2015-10-28
0021500009   2015-10-28
                ...    
0021501216   2016-04-12
0021501216   2016-04-12
0021501219   2016-04-13
0021501219   2016-04-13
0021501218   2016-04-13
0021501218   2016-04-13
0021501224   2016-04-13
0021501224   2016-04-13
0021501225   2016-04-13
0021501225   2016-04-13
00215012

In [77]:
df = games()

In [80]:
df[df['MATCHUP'].str.contains('vs.')]['TEAM_ID']

GAME_ID
0021500001    1610612737
0021500002    1610612741
0021500003    1610612744
0021500010    1610612745
0021500011    1610612763
0021500012    1610612749
0021500013    1610612760
0021500014    1610612756
0021500015    1610612757
0021500004    1610612753
0021500005    1610612738
0021500006    1610612751
0021500007    1610612765
0021500008    1610612748
0021500009    1610612761
0021500016    1610612758
0021500017    1610612747
0021500018    1610612754
0021500019    1610612752
0021500020    1610612746
0021500026    1610612737
0021500028    1610612759
0021500030    1610612745
0021500027    1610612749
0021500029    1610612743
0021500031    1610612758
0021500021    1610612739
0021500024    1610612738
0021500032    1610612756
0021500023    1610612755
                 ...    
0021501201    1610612759
0021501202    1610612752
0021501205    1610612738
0021501206    1610612751
0021501207    1610612750
0021501208    1610612740
0021501209    1610612760
0021501210    1610612762
0021501211    161

In [90]:
pivot_df = games().reset_index().pivot_table(index='GAME_ID',
                                       columns='WL',
                                       values='TEAM_ID',
                                       aggfunc=lambda s: s.iloc[0])

In [91]:
pivot_df['VISITOR'] = df[df['MATCHUP'].str.contains('@')]['TEAM_ID']

In [92]:
pivot_df

WL,L,W,VISITOR
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0021500001,1610612737,1610612765,1610612765
0021500002,1610612739,1610612741,1610612739
0021500003,1610612740,1610612744,1610612740
0021500004,1610612753,1610612764,1610612764
0021500005,1610612755,1610612738,1610612755
0021500006,1610612751,1610612741,1610612741
0021500007,1610612762,1610612765,1610612762
0021500008,1610612766,1610612748,1610612766
0021500009,1610612754,1610612761,1610612754
0021500010,1610612745,1610612743,1610612743


In [59]:
def normalize_data_frame(df):
    df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'], format='%Y-%m-%d')
    return df

In [14]:
teams_df = teams()
teams_df

Unnamed: 0_level_0,LEAGUE_ID,MIN_YEAR,MAX_YEAR,ABBREVIATION
TEAM_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1610612737,0,1949,2016,ATL
1610612738,0,1946,2016,BOS
1610612739,0,1970,2016,CLE
1610612740,0,2002,2016,NOP
1610612741,0,1966,2016,CHI
1610612742,0,1980,2016,DAL
1610612743,0,1976,2016,DEN
1610612744,0,1946,2016,GSW
1610612745,0,1967,2016,HOU
1610612747,0,1948,2016,LAL


In [15]:
teams_details = pd.concat(map(team, teams_df.index))
teams_details

Unnamed: 0_level_0,SEASON_YEAR,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,TEAM_CONFERENCE,TEAM_DIVISION,TEAM_CODE,W,L,PCT,CONF_RANK,DIV_RANK,MIN_YEAR,MAX_YEAR
TEAM_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1610612737,2015-16,Atlanta,Hawks,ATL,East,Southeast,hawks,48,34,0.585,4,2,1949,2016
1610612738,2015-16,Boston,Celtics,BOS,East,Atlantic,celtics,48,34,0.585,5,2,1946,2016
1610612739,2015-16,Cleveland,Cavaliers,CLE,East,Central,cavaliers,57,25,0.695,1,1,1970,2016
1610612740,2015-16,New Orleans,Pelicans,NOP,West,Southwest,pelicans,30,52,0.366,12,5,2002,2016
1610612741,2015-16,Chicago,Bulls,CHI,East,Central,bulls,42,40,0.512,9,4,1966,2016
1610612742,2015-16,Dallas,Mavericks,DAL,West,Southwest,mavericks,42,40,0.512,6,2,1980,2016
1610612743,2015-16,Denver,Nuggets,DEN,West,Northwest,nuggets,33,49,0.402,11,4,1976,2016
1610612744,2015-16,Golden State,Warriors,GSW,West,Pacific,warriors,73,9,0.89,1,1,1946,2016
1610612745,2015-16,Houston,Rockets,HOU,West,Southwest,rockets,41,41,0.5,8,4,1967,2016
1610612747,2015-16,Los Angeles,Lakers,LAL,West,Pacific,lakers,17,65,0.207,15,5,1948,2016


In [16]:
G = nx.MultiDiGraph()

In [17]:
G.add_nodes_from([(1, dict(foo=42)), (5, dict(bar=21))])

In [18]:
list(G.nodes(data=True))

[(1, {'foo': 42}), (5, {'bar': 21})]

In [19]:
G.node[1]

{'foo': 42}

In [20]:
teams_details.to_dict(orient='index')

{1610612737: {'CONF_RANK': 4,
  'DIV_RANK': 2,
  'L': 34,
  'MAX_YEAR': '2016',
  'MIN_YEAR': '1949',
  'PCT': 0.585,
  'SEASON_YEAR': '2015-16',
  'TEAM_ABBREVIATION': 'ATL',
  'TEAM_CITY': 'Atlanta',
  'TEAM_CODE': 'hawks',
  'TEAM_CONFERENCE': 'East',
  'TEAM_DIVISION': 'Southeast',
  'TEAM_NAME': 'Hawks',
  'W': 48},
 1610612738: {'CONF_RANK': 5,
  'DIV_RANK': 2,
  'L': 34,
  'MAX_YEAR': '2016',
  'MIN_YEAR': '1946',
  'PCT': 0.585,
  'SEASON_YEAR': '2015-16',
  'TEAM_ABBREVIATION': 'BOS',
  'TEAM_CITY': 'Boston',
  'TEAM_CODE': 'celtics',
  'TEAM_CONFERENCE': 'East',
  'TEAM_DIVISION': 'Atlantic',
  'TEAM_NAME': 'Celtics',
  'W': 48},
 1610612739: {'CONF_RANK': 1,
  'DIV_RANK': 1,
  'L': 25,
  'MAX_YEAR': '2016',
  'MIN_YEAR': '1970',
  'PCT': 0.695,
  'SEASON_YEAR': '2015-16',
  'TEAM_ABBREVIATION': 'CLE',
  'TEAM_CITY': 'Cleveland',
  'TEAM_CODE': 'cavaliers',
  'TEAM_CONFERENCE': 'East',
  'TEAM_DIVISION': 'Central',
  'TEAM_NAME': 'Cavaliers',
  'W': 57},
 1610612740: {'CONF_R

In [21]:
G = nx.MultiDiGraph()

In [22]:
G.add_nodes_from(teams_details.to_dict(orient='index').items())

In [23]:
list(G.nodes(data=True))

[(1610612737,
  {'CONF_RANK': 4,
   'DIV_RANK': 2,
   'L': 34,
   'MAX_YEAR': '2016',
   'MIN_YEAR': '1949',
   'PCT': 0.585,
   'SEASON_YEAR': '2015-16',
   'TEAM_ABBREVIATION': 'ATL',
   'TEAM_CITY': 'Atlanta',
   'TEAM_CODE': 'hawks',
   'TEAM_CONFERENCE': 'East',
   'TEAM_DIVISION': 'Southeast',
   'TEAM_NAME': 'Hawks',
   'W': 48}),
 (1610612738,
  {'CONF_RANK': 5,
   'DIV_RANK': 2,
   'L': 34,
   'MAX_YEAR': '2016',
   'MIN_YEAR': '1946',
   'PCT': 0.585,
   'SEASON_YEAR': '2015-16',
   'TEAM_ABBREVIATION': 'BOS',
   'TEAM_CITY': 'Boston',
   'TEAM_CODE': 'celtics',
   'TEAM_CONFERENCE': 'East',
   'TEAM_DIVISION': 'Atlantic',
   'TEAM_NAME': 'Celtics',
   'W': 48}),
 (1610612739,
  {'CONF_RANK': 1,
   'DIV_RANK': 1,
   'L': 25,
   'MAX_YEAR': '2016',
   'MIN_YEAR': '1970',
   'PCT': 0.695,
   'SEASON_YEAR': '2015-16',
   'TEAM_ABBREVIATION': 'CLE',
   'TEAM_CITY': 'Cleveland',
   'TEAM_CODE': 'cavaliers',
   'TEAM_CONFERENCE': 'East',
   'TEAM_DIVISION': 'Central',
   'TEAM_NAME

In [24]:
G.node[1610612738]['TEAM_NAME']

'Celtics'

In [50]:
games()['MATCHUP']

GAME_ID
0021500001      DET @ ATL
0021500001    ATL vs. DET
0021500002    CHI vs. CLE
0021500002      CLE @ CHI
0021500003      NOP @ GSW
0021500003    GSW vs. NOP
0021500010    HOU vs. DEN
0021500010      DEN @ HOU
0021500011    MEM vs. CLE
0021500011      CLE @ MEM
0021500012      NYK @ MIL
0021500012    MIL vs. NYK
0021500013      SAS @ OKC
0021500013    OKC vs. SAS
0021500014      DAL @ PHX
0021500014    PHX vs. DAL
0021500015      NOP @ POR
0021500015    POR vs. NOP
0021500004    ORL vs. WAS
0021500004      WAS @ ORL
0021500005    BOS vs. PHI
0021500005      PHI @ BOS
0021500006      CHI @ BKN
0021500006    BKN vs. CHI
0021500007    DET vs. UTA
0021500007      UTA @ DET
0021500008    MIA vs. CHA
0021500008      CHA @ MIA
0021500009    TOR vs. IND
0021500009      IND @ TOR
                 ...     
0021501216      MEM @ LAC
0021501216    LAC vs. MEM
0021501219    CHA vs. ORL
0021501219      ORL @ CHA
0021501218    BKN vs. TOR
0021501218      TOR @ BKN
0021501224    HOU vs. SAC
0021

In [25]:
games().reset_index().pivot_table(index='GAME_ID',
                                  columns='WL',
                                  values='TEAM_ID', 
                                  aggfunc=lambda s: s.iloc[0])

WL,L,W
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
0021500001,1610612737,1610612765
0021500002,1610612739,1610612741
0021500003,1610612740,1610612744
0021500004,1610612753,1610612764
0021500005,1610612755,1610612738
0021500006,1610612751,1610612741
0021500007,1610612762,1610612765
0021500008,1610612766,1610612748
0021500009,1610612754,1610612761
0021500010,1610612745,1610612743


In [27]:
pd.pivot_table(games().reset_index(), 
               index='GAME_ID', 
               columns='WL', 
               values='TEAM_ID', 
               aggfunc=lambda s: s.iloc[0])

WL,L,W
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
0021500001,1610612737,1610612765
0021500002,1610612739,1610612741
0021500003,1610612740,1610612744
0021500004,1610612753,1610612764
0021500005,1610612755,1610612738
0021500006,1610612751,1610612741
0021500007,1610612762,1610612765
0021500008,1610612766,1610612748
0021500009,1610612754,1610612761
0021500010,1610612745,1610612743


In [29]:
games()

Unnamed: 0_level_0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0021500001,22015,1610612765,DET,Detroit Pistons,2015-10-27,DET @ ATL,W,240,37,96,...,36,59,23,5,3,15,15,106,12,1
0021500001,22015,1610612737,ATL,Atlanta Hawks,2015-10-27,ATL vs. DET,L,240,37,82,...,33,40,22,9,4,15,25,94,-12,1
0021500002,22015,1610612741,CHI,Chicago Bulls,2015-10-27,CHI vs. CLE,W,240,37,87,...,40,47,13,6,10,13,22,97,2,1
0021500002,22015,1610612739,CLE,Cleveland Cavaliers,2015-10-27,CLE @ CHI,L,240,38,94,...,39,50,26,5,7,11,21,95,-2,1
0021500003,22015,1610612740,NOP,New Orleans Pelicans,2015-10-27,NOP @ GSW,L,240,35,83,...,25,33,21,9,3,19,26,95,-16,1
0021500003,22015,1610612744,GSW,Golden State Warriors,2015-10-27,GSW vs. NOP,W,240,41,96,...,35,56,29,8,7,20,29,111,16,1
0021500010,22015,1610612745,HOU,Houston Rockets,2015-10-28,HOU vs. DEN,L,240,30,87,...,29,44,17,13,8,17,19,85,-20,1
0021500010,22015,1610612743,DEN,Denver Nuggets,2015-10-28,DEN @ HOU,W,240,40,79,...,40,49,26,9,10,21,26,105,20,1
0021500011,22015,1610612763,MEM,Memphis Grizzlies,2015-10-28,MEM vs. CLE,L,240,29,82,...,29,40,15,8,3,16,18,76,-30,1
0021500011,22015,1610612739,CLE,Cleveland Cavaliers,2015-10-28,CLE @ MEM,W,240,41,84,...,42,54,29,7,2,19,25,106,30,1


In [30]:
primary_columns = [
    'TEAM_ID',
    'FGM',
    'FGA',
    'FG3M',
    'FG3A',
    'FTM',
    'FTA',
    'OREB',
    'DREB',
    'AST',
    'STL',
    'BLK',
    'TOV',
    'PF',
    'PTS',
]

In [45]:
@apply_to_output(pd.pivot_table, index=['GAME_ID', 'SEASON_ID', 'GAME_DATE', 'MIN'], 
                                 columns='WL', 
                                 values=primary_columns, 
                                 aggfunc=lambda s: s.iloc[0])
@apply_to_output(to_data_frame, frame_name='LeagueGameLog')
@apply_to_output(normalize_dict)
def games(league_id='00', season='2015-16', season_type='Regular Season',
          team=True, sort_by='date', ascending=True):

    return get_json(url='http://stats.nba.com/stats/LeagueGameLog',
                    params={'LeagueID': league_id,
                            'PlayerOrTeam': 'T' if team else 'P',
                            'Season': season,
                            'SeasonType': season_type,
                            'Sorter': sort_by.upper(),
                            'Direction': 'ASC' if ascending else 'DESC'})

In [46]:
games().reset_index(['SEASON_ID', 'GAME_DATE', 'MIN'])

Unnamed: 0_level_0,SEASON_ID,GAME_DATE,MIN,TEAM_ID,TEAM_ID,FGM,FGM,FGA,FGA,FG3M,...,STL,STL,BLK,BLK,TOV,TOV,PF,PF,PTS,PTS
WL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,L,W,L,W,L,W,L,...,L,W,L,W,L,W,L,W,L,W
GAME_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0021500001,22015,2015-10-27,240,1610612737,1610612765,37,37,82,96,8,...,9,5,4,3,15,15,25,15,94,106
0021500002,22015,2015-10-27,240,1610612739,1610612741,38,37,94,87,9,...,5,6,7,10,11,13,21,22,95,97
0021500003,22015,2015-10-27,240,1610612740,1610612744,35,41,83,96,6,...,9,8,3,7,19,20,26,29,95,111
0021500004,22015,2015-10-28,240,1610612753,1610612764,37,33,100,84,5,...,9,8,6,9,16,18,22,14,87,88
0021500005,22015,2015-10-28,240,1610612755,1610612738,34,39,83,85,7,...,11,10,6,7,24,17,22,23,95,112
0021500006,22015,2015-10-28,240,1610612751,1610612741,39,42,93,78,0,...,11,9,5,3,13,20,18,24,100,115
0021500007,22015,2015-10-28,240,1610612762,1610612765,35,30,75,74,2,...,4,2,5,4,12,12,25,20,87,92
0021500008,22015,2015-10-28,240,1610612766,1610612748,33,36,84,73,6,...,4,5,1,7,9,13,16,25,94,104
0021500009,22015,2015-10-28,240,1610612754,1610612761,32,36,86,80,9,...,14,8,3,2,13,21,30,24,99,106
0021500010,22015,2015-10-28,240,1610612745,1610612743,30,40,87,79,8,...,13,9,8,10,17,21,19,26,85,105
