In [1]:
import pandas as pd
import requests
import json
from datetime import date, timedelta
import time

create headers needed to make direct calls to NBA API

In [2]:
headers = {
    'Host': 'stats.nba.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'en-US,en;q=0.5',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'x-nba-stats-origin': 'stats',
    'x-nba-stats-token': 'true'
}

create lists of header columns to use for shot contest stats

In [3]:
fiveft_range_headers = ['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE',
                        'OPP_FGM_LESS5', 'OPP_FGA_LESS5', 'OPP_FG_PCT_LESS5',
                        'OPP_FGM_5_9', 'OPP_FGA_5_9', 'OPP_FG_PCT_5_9',
                        'OPP_FGM_10_14', 'OPP_FGA_10_14', 'OPP_FG_PCT_10_14',
                        'OPP_FGM_15_19', 'OPP_FGA_15_19', 'OPP_FG_PCT_15_19',
                        'OPP_FGM_20_24', 'OPP_FGA_20_24', 'OPP_FG_PCT_20_24',
                        'OPP_FGM_25_29', 'OPP_FGA_25_29', 'OPP_FG_PCT_25_29',
                        'OPP_FGM_30_34', 'OPP_FGA_30_34', 'OPP_FG_PCT_30_34',
                        'OPP_FGM_35_39', 'OPP_FGA_35_39', 'OPP_FG_PCT_35_39', 
                        'OPP_FGM_40PLUS', 'OPP_FGA_40PLUS', 'OPP_FG_PCT_40PLUS']
pace_adj_5ft_headers = [x + "_PACE_ADJ" if "OPP_" in x else x for x in fiveft_range_headers ]
zone_headers = ['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE',
   'OPP_FGM_RES_AREA', 'OPP_FGA_RES_AREA', 'OPP_FG_PCT_RES_AREA',
   'OPP_FGM_PAINT', 'OPP_FGA_PAINT','OPP_FG_PCT_PAINT',
   'OPP_FGM_MID', 'OPP_FGA_MID','OPP_FG_PCT_MID',
   'OPP_FGM_LEFT_CORNER','OPP_FGA_LEFT_CORNER','OPP_FG_PCT_LEFT_CORNER',
   'OPP_FGM_RIGHT_CORNER', 'OPP_FGA_RIGHT_CORNER', 'OPP_FG_PCT_RIGHT_CORNER',
   'OPP_FGM_ABOVE_BREAK', 'OPP_FGA_ABOVE_BREAK','OPP_FG_PCT_ABOVE_BREAK',
   'OPP_FGM_BACKCOURT', 'OPP_FGA_BACKCOURT','OPP_FG_PCT_BACKCOURT']
pace_adj_zone_headers = [x + "_PACE_ADJ" if "OPP_" in x else x for x in zone_headers]

create function to retreive data for each day in the season

In [51]:
def retrieve_defensive_day_stats(game_date, season):
    shot_contest_url_template = "https://stats.nba.com/stats/leaguedashplayershotlocations?DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}&DistanceRange=5ft+Range&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision="
    shot_contest_url = shot_contest_url_template.format(game_date.strftime('%m'),game_date.strftime('%d'), game_date.strftime('%Y'), game_date.strftime('%m'), game_date.strftime('%d'), game_date.strftime('%Y'), season)
    shot_contest_paceadj_url_template = "https://stats.nba.com/stats/leaguedashplayershotlocations?DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}&DistanceRange=5ft+Range&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=Y&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision="
    shot_contest_paceadj_url = shot_contest_paceadj_url_template.format(game_date.strftime('%m'), game_date.strftime('%d'), game_date.strftime('%Y'), game_date.strftime('%m'), game_date.strftime('%d'), game_date.strftime('%Y'), season)
    reb_url_template = "https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=PerGame&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Rebounding&Season={}&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
    reb_url = reb_url_template.format(game_date.strftime('%m'),game_date.strftime('%d'), game_date.strftime('%Y'), game_date.strftime('%m'), game_date.strftime('%d'), game_date.strftime('%Y'), season)
    core_def_template = "https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=PerGame&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Defense&Season={}&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
    core_def_url = core_def_template.format(game_date.strftime('%m'),game_date.strftime('%d'), game_date.strftime('%Y'), game_date.strftime('%m'), game_date.strftime('%d'), game_date.strftime('%Y'), season)
    boxout_url_template = "https://stats.nba.com/stats/leaguehustlestatsplayer?College=&Conference=&Country=&DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season&TeamID=0&VsConference=&VsDivision=&Weight="
    boxout_url = boxout_url_template.format(game_date.strftime('%m'),game_date.strftime('%d'), game_date.strftime('%Y'), game_date.strftime('%m'), game_date.strftime('%d'), game_date.strftime('%Y'), season)
    shotzone_url_template = "https://stats.nba.com/stats/leaguedashplayershotlocations?College=&Conference=&Country=&DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}&DistanceRange=By+Zone&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
    shotzone_url = shotzone_url_template.format(game_date.strftime('%m'),game_date.strftime('%d'), game_date.strftime('%Y'), game_date.strftime('%m'), game_date.strftime('%d'), game_date.strftime('%Y'), season)
    shotzone_paced_url_template = "https://stats.nba.com/stats/leaguedashplayershotlocations?College=&Conference=&Country=&DateFrom={}%2F{}%2F{}&DateTo={}%2F{}%2F{}&DistanceRange=By+Zone&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Opponent&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=Y&PerMode=PerGame&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
    shotzone_paced_url = shotzone_url_template.format(game_date.strftime('%m'),game_date.strftime('%d'), game_date.strftime('%Y'), game_date.strftime('%m'), game_date.strftime('%d'), game_date.strftime('%Y'), season)
    
    shot_contest_json = requests.get(url = shot_contest_url, headers = headers).json()
    time.sleep(1)
    shot_contest_paceadj_json = requests.get(url = shot_contest_paceadj_url, headers= headers).json()
    time.sleep(1)
    reb_json = requests.get(url = reb_url, headers=headers).json()
    time.sleep(1)
    core_def_json = requests.get(url = core_def_url, headers=headers).json()
    time.sleep(1)
    boxout_json = requests.get(url = boxout_url, headers = headers).json()
    time.sleep(1)
    shotzone_json = requests.get(url = shotzone_url, headers=headers).json()
    time.sleep(1)
    shotzone_paced_json = requests.get(url = shotzone_paced_url, headers=headers).json()
    
    shot_contests = (pd.DataFrame(shot_contest_json['resultSets']['rowSet'], 
                                  columns = fiveft_range_headers)
                     .set_index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION']))
    
    shot_contest_paced = (pd.DataFrame(shot_contest_paceadj_json['resultSets']['rowSet'], 
                                       columns = pace_adj_5ft_headers)
                          .set_index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION']))
    #print(shot_contests.shape)
    #print(shot_contest_paced.shape)
    rebs = (pd.DataFrame(reb_json['resultSets'][0]['rowSet'],
                        columns = reb_json['resultSets'][0]['headers'])
           .set_index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION']))
    #print(rebs.shape)
    core_def = (pd.DataFrame(core_def_json['resultSets'][0]['rowSet'],
                             columns = core_def_json['resultSets'][0]['headers'])
           .set_index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION']))
    #print(core_def.shape)
    boxout = (pd.DataFrame(boxout_json['resultSets'][0]['rowSet'],
                           columns = boxout_json['resultSets'][0]['headers'])
              .set_index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION'])
              .drop(['AGE', 'G', 'MIN'], axis='columns'))
    #print(boxout.shape)
    shotzone = (pd.DataFrame(shotzone_json['resultSets']['rowSet'],
                             columns = zone_headers)
                .set_index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION'])
                .drop(['AGE'], axis='columns'))
    #print(shotzone.shape)
    shotzone_paced = (pd.DataFrame(shotzone_paced_json['resultSets']['rowSet'],
                                   columns = pace_adj_zone_headers)
                      .set_index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION'])
                      .drop(['AGE'], axis='columns'))
    #print(shotzone_paced.shape)
    daily_def_df = (pd.merge(shot_contests,
                             shot_contest_paced,
                             left_index = True,
                             right_index = True)
                    .merge(rebs,
                           left_index=True,
                           right_index=True)
                    .merge(core_def,
                           left_index=True,
                           right_index=True)
                    .merge(boxout,
                           left_index=True,
                           right_index=True)
                    .merge(shotzone,
                           left_index=True,
                           right_index=True)
                    .merge(shotzone_paced,
                           left_index=True,
                           right_index=True)
                    .assign(date = game_date)
                    .reset_index()
                   )
    time.sleep(1)
    print(daily_def_df.shape)
    return daily_def_df

In [44]:
retrieve_defensive_day_stats(date(2013, 10, 29), '2013-14')

(62, 28)
(62, 28)
(62, 31)
(62, 10)
(62, 21)
(62, 21)
(62, 21)
(62, 165)


Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE_x,OPP_FGM_LESS5,OPP_FGA_LESS5,OPP_FG_PCT_LESS5,OPP_FGM_5_9,OPP_FGA_5_9,...,OPP_FGM_RIGHT_CORNER_PACE_ADJ,OPP_FGA_RIGHT_CORNER_PACE_ADJ,OPP_FG_PCT_RIGHT_CORNER_PACE_ADJ,OPP_FGM_ABOVE_BREAK_PACE_ADJ,OPP_FGA_ABOVE_BREAK_PACE_ADJ,OPP_FG_PCT_ABOVE_BREAK_PACE_ADJ,OPP_FGM_BACKCOURT_PACE_ADJ,OPP_FGA_BACKCOURT_PACE_ADJ,OPP_FG_PCT_BACKCOURT_PACE_ADJ,date
0,203094,Andrew Nicholson,1610612753,ORL,24.0,4.0,7.0,0.571,2.0,5.0,...,0.0,1.0,0.000,1.0,4.0,0.250,0.0,0.0,0.0,2013-10-29
1,201167,Arron Afflalo,1610612753,ORL,28.0,11.0,16.0,0.688,2.0,9.0,...,0.0,1.0,0.000,3.0,7.0,0.429,0.0,0.0,0.0,2013-10-29
2,201933,Blake Griffin,1610612746,LAC,25.0,13.0,23.0,0.565,3.0,4.0,...,1.0,2.0,0.500,11.0,18.0,0.611,0.0,0.0,0.0,2013-10-29
3,201228,C.J. Watson,1610612754,IND,30.0,5.0,10.0,0.500,2.0,6.0,...,0.0,0.0,0.000,0.0,2.0,0.000,0.0,1.0,0.0,2013-10-29
4,2430,Carlos Boozer,1610612741,CHI,32.0,6.0,10.0,0.600,1.0,4.0,...,2.0,3.0,0.667,3.0,8.0,0.375,0.0,0.0,0.0,2013-10-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,203503,Tony Snell,1610612741,CHI,22.0,4.0,5.0,0.800,0.0,1.0,...,1.0,1.0,1.000,1.0,1.0,1.000,,,,2013-10-29
58,2617,Udonis Haslem,1610612748,MIA,34.0,15.0,21.0,0.714,0.0,2.0,...,1.0,1.0,1.000,0.0,9.0,0.000,,,,2013-10-29
59,203506,Victor Oladipo,1610612753,ORL,22.0,6.0,9.0,0.667,2.0,4.0,...,,,,3.0,4.0,0.750,0.0,0.0,0.0,2013-10-29
60,202325,Wesley Johnson,1610612747,LAL,26.0,9.0,16.0,0.563,0.0,3.0,...,0.0,1.0,0.000,3.0,8.0,0.375,0.0,0.0,0.0,2013-10-29


In [52]:
initial_date = date(2013, 10, 29)
last_date = date(2014, 4, 16)
(last_date - initial_date).days

169

In [53]:
season_dates = [initial_date + timedelta(days = x) 
                for x 
                in range((last_date - initial_date).days + 1)]

In [57]:
gameslist_1314 = [retrieve_defensive_day_stats(day, '2013-14') for day in season_dates]

In [55]:
defense_1314 = pd.concat(gameslist_1314)

In [56]:
defense_1314

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE_x,OPP_FGM_LESS5,OPP_FGA_LESS5,OPP_FG_PCT_LESS5,OPP_FGM_5_9,OPP_FGA_5_9,...,OPP_FGM_RIGHT_CORNER_PACE_ADJ,OPP_FGA_RIGHT_CORNER_PACE_ADJ,OPP_FG_PCT_RIGHT_CORNER_PACE_ADJ,OPP_FGM_ABOVE_BREAK_PACE_ADJ,OPP_FGA_ABOVE_BREAK_PACE_ADJ,OPP_FG_PCT_ABOVE_BREAK_PACE_ADJ,OPP_FGM_BACKCOURT_PACE_ADJ,OPP_FGA_BACKCOURT_PACE_ADJ,OPP_FG_PCT_BACKCOURT_PACE_ADJ,date
0,203094.0,Andrew Nicholson,1.610613e+09,ORL,24.0,4.0,7.0,0.571,2.0,5.0,...,0.0,1.0,0.000,1.0,4.0,0.250,0.0,0.0,0.0,2013-10-29
1,201167.0,Arron Afflalo,1.610613e+09,ORL,28.0,11.0,16.0,0.688,2.0,9.0,...,0.0,1.0,0.000,3.0,7.0,0.429,0.0,0.0,0.0,2013-10-29
2,201933.0,Blake Griffin,1.610613e+09,LAC,25.0,13.0,23.0,0.565,3.0,4.0,...,1.0,2.0,0.500,11.0,18.0,0.611,0.0,0.0,0.0,2013-10-29
3,201228.0,C.J. Watson,1.610613e+09,IND,30.0,5.0,10.0,0.500,2.0,6.0,...,0.0,0.0,0.000,0.0,2.0,0.000,0.0,1.0,0.0,2013-10-29
4,2430.0,Carlos Boozer,1.610613e+09,CHI,32.0,6.0,10.0,0.600,1.0,4.0,...,2.0,3.0,0.667,3.0,8.0,0.375,0.0,0.0,0.0,2013-10-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294,203115.0,Will Barton,1.610613e+09,POR,23.0,6.0,7.0,0.857,2.0,7.0,...,1.0,1.0,1.000,5.0,24.0,0.208,0.0,0.0,0.0,2014-04-16
295,2584.0,Willie Green,1.610613e+09,LAC,32.0,7.0,13.0,0.538,2.0,6.0,...,2.0,2.0,1.000,3.0,8.0,0.375,0.0,0.0,0.0,2014-04-16
296,201163.0,Wilson Chandler,1.610613e+09,DEN,27.0,13.0,20.0,0.650,2.0,6.0,...,0.0,1.0,0.000,4.0,12.0,0.333,0.0,0.0,0.0,2014-04-16
297,2216.0,Zach Randolph,1.610613e+09,MEM,32.0,15.0,24.0,0.625,2.0,3.0,...,1.0,4.0,0.250,4.0,20.0,0.200,0.0,0.0,0.0,2014-04-16


In [58]:
defense_1314.to_csv('defense_1314.csv', index=False)

In [8]:
defense_1718 = pd.concat(gameslist_1718)

In [9]:
defense_1718.to_csv("defense_1718.csv", index=False)

In [None]:
defense_1819 = pd.concat(gameslist_1819)

In [None]:
defense_1819.to_csv("defense_1819.csv", index=False)

In [23]:
defense_1516

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE_x,OPP_FGM_LESS5,OPP_FGA_LESS5,OPP_FG_PCT_LESS5,OPP_FGM_5_9,OPP_FGA_5_9,...,OPP_FGM_RIGHT_CORNER_PACE_ADJ,OPP_FGA_RIGHT_CORNER_PACE_ADJ,OPP_FG_PCT_RIGHT_CORNER_PACE_ADJ,OPP_FGM_ABOVE_BREAK_PACE_ADJ,OPP_FGA_ABOVE_BREAK_PACE_ADJ,OPP_FG_PCT_ABOVE_BREAK_PACE_ADJ,OPP_FGM_BACKCOURT_PACE_ADJ,OPP_FGA_BACKCOURT_PACE_ADJ,OPP_FG_PCT_BACKCOURT_PACE_ADJ,date
0,203932.0,Aaron Gordon,1.610613e+09,ORL,19.0,16.0,32.0,0.500,2.0,6.0,...,0.0,3.0,0.000,3.0,8.0,0.375,0.0,0.0,0.0,2014-10-28
1,202329.0,Al-Farouq Aminu,1.610613e+09,DAL,24.0,2.0,3.0,0.667,1.0,2.0,...,0.0,0.0,0.000,1.0,1.0,1.000,,,,2014-10-28
2,201582.0,Alexis Ajinca,1.610613e+09,NOP,27.0,5.0,8.0,0.625,0.0,0.0,...,,,,,,,,,,2014-10-28
3,203076.0,Anthony Davis,1.610613e+09,NOP,22.0,15.0,28.0,0.536,1.0,6.0,...,0.0,1.0,0.000,3.0,7.0,0.429,0.0,1.0,0.0,2014-10-28
4,203382.0,Aron Baynes,1.610613e+09,SAS,28.0,1.0,3.0,0.333,3.0,4.0,...,1.0,3.0,0.333,2.0,4.0,0.500,,,,2014-10-28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,201163.0,Wilson Chandler,1.610613e+09,DEN,28.0,11.0,13.0,0.846,1.0,5.0,...,3.0,4.0,0.750,6.0,13.0,0.462,,,,2015-04-15
284,203897.0,Zach LaVine,1.610613e+09,MIN,20.0,25.0,37.0,0.676,3.0,5.0,...,3.0,8.0,0.375,4.0,10.0,0.400,0.0,0.0,0.0,2015-04-15
285,2216.0,Zach Randolph,1.610613e+09,MEM,33.0,8.0,14.0,0.571,5.0,10.0,...,1.0,2.0,0.500,6.0,13.0,0.462,0.0,0.0,0.0,2015-04-15
286,2585.0,Zaza Pachulia,1.610613e+09,MIL,31.0,6.0,12.0,0.500,1.0,1.0,...,0.0,0.0,0.000,2.0,6.0,0.333,,,,2015-04-15
