In [1]:
import requests
from bs4 import BeautifulSoup as bs
import urllib.request
import json
import pandas as pd
import numpy as np

In [2]:
url = "https://stats.nba.com/js/data/boxscorebreakdowns/2020/boxscore_breakdown_20200201.json"
response = requests.get(url)
response.status_code
data = response.json()
games = data['results']

game_ids = []

for i in range(0, len(games)):
   game_id = games[i]['GameID']
   game_ids.append(game_id)
   print(game_id)

0021900735
0021900736
0021900734
0021900732
0021900733
0021900731
0021900730
0021900728
0021900729
0021900727


In [3]:
parameters = {
    "DayOffset": "0",
    "LeagueID": "00",
    "gameDate": "02/27/2020"
}
parameters

{'DayOffset': '0', 'LeagueID': '00', 'gameDate': '02/27/2020'}

In [4]:
headers = {
    "Host": "stats.nba.com",
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0",
    "Accept": "application/json, text/plain, */*",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "X-NewRelic-ID": "VQECWF5UChAHUlNTBwgBVw==",
    "x-nba-stats-origin": "stats",
    "x-nba-stats-token": "true"
}
headers


{'Host': 'stats.nba.com',
 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0',
 'Accept': 'application/json, text/plain, */*',
 'Accept-Language': 'en-US,en;q=0.5',
 'Accept-Encoding': 'gzip, deflate, br',
 'X-NewRelic-ID': 'VQECWF5UChAHUlNTBwgBVw==',
 'x-nba-stats-origin': 'stats',
 'x-nba-stats-token': 'true'}

In [5]:
response = requests.get("https://stats.nba.com/stats/scoreboardV2", params=parameters, headers=headers, verify=False, timeout=10)
print(response.url)
print(response.status_code)
print(response.json())

https://stats.nba.com/stats/scoreboardV2?DayOffset=0&LeagueID=00&gameDate=02%2F27%2F2020
200
{'resource': 'scoreboardV2', 'parameters': {'GameDate': '02/27/2020', 'LeagueID': '00', 'DayOffset': '0'}, 'resultSets': [{'name': 'GameHeader', 'headers': ['GAME_DATE_EST', 'GAME_SEQUENCE', 'GAME_ID', 'GAME_STATUS_ID', 'GAME_STATUS_TEXT', 'GAMECODE', 'HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'SEASON', 'LIVE_PERIOD', 'LIVE_PC_TIME', 'NATL_TV_BROADCASTER_ABBREVIATION', 'HOME_TV_BROADCASTER_ABBREVIATION', 'AWAY_TV_BROADCASTER_ABBREVIATION', 'LIVE_PERIOD_TIME_BCAST', 'ARENA_NAME', 'WH_STATUS'], 'rowSet': [['2020-02-27T00:00:00', 1, '0021900873', 3, 'Final', '20200227/NYKPHI', 1610612755, 1610612752, '2019', 4, '     ', None, 'NBCSP', 'MSG', 'Q4       - ', 'Wells Fargo Center', 1], ['2020-02-27T00:00:00', 2, '0021900874', 3, 'Final', '20200227/PORIND', 1610612754, 1610612757, '2019', 4, '     ', 'TNT', None, 'NBCSNW', 'Q4       - TNT', 'Bankers Life Fieldhouse', 1], ['2020-02-27T00:00:00', 3, '0021900875'

In [6]:

class BoxScoreTraditionalV2():
    endpoint = 'boxscoretraditionalv2'
    expected_data = {'PlayerStats': ['GAME_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'PLAYER_ID', 'PLAYER_NAME', 'START_POSITION', 'COMMENT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TO', 'PF', 'PTS', 'PLUS_MINUS'], 'TeamStarterBenchStats': ['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'STARTERS_BENCH', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TO', 'PF', 'PTS'], 'TeamStats': ['GAME_ID', 'TEAM_ID', 'TEAM_NAME', 'TEAM_ABBREVIATION', 'TEAM_CITY', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TO', 'PF', 'PTS', 'PLUS_MINUS']}

    nba_response = None
    data_sets = None
    player_stats = None
    team_stats = None
    headers = None
    base_url = ""

    def __init__(self,
                 game_id,
                 base_url,
                 end_period=None,
                 end_range=None,
                 range_type=None,
                 start_period=None,
                 start_range=None,
                 proxy=None,
                 headers=None,
                 timeout=30,
                 get_request=True):
        self.proxy = proxy
        self.base_url = base_url
        if headers is not None:
            self.headers = headers
        else:
            self.headers = {
            "Host": "stats.nba.com",
            "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0) Gecko/20100101 Firefox/74.0",
            "Accept": "application/json, text/plain, */*",
            "Accept-Language": "en-US,en;q=0.5",
            "Accept-Encoding": "gzip, deflate, br",
            "X-NewRelic-ID": "VQECWF5UChAHUlNTBwgBVw==",
            "x-nba-stats-origin": "stats",
            "x-nba-stats-token": "true"
            }
        self.timeout = timeout
        # if parameters is not None:
        #     self.parameters = parameters
        # else:
        #     self.parameters = {
        #             'GameID': game_id,
        #             'EndPeriod': end_period,
        #             'EndRange': end_range,
        #             'RangeType': range_type,
        #             'StartPeriod': start_period,
        #             'StartRange': start_range
        #     }
        
        # if get_request:
        #     self.get_request()
    
    def get_request(self, params):
        response = requests.get(url=self.base_url, params=params, headers=self.headers, verify=False, timeout=self.timeout)
        contents = response.text
        # custom return dictionary, TODO: modify
        cust_game = {
            'response_url': response.url,
            'status': response.status_code,
            'content': response.json()
        }
        return cust_game
        
    def load_response(self, scraper_response):
        # get all games for given day
        games = scraper_response['content']['resultSets'][1]

        # get headers
        headers = games['headers']

        # get rows --> the games (2 rows per game with same gameid)
        rows = games['rowSet']
        print(rows)

        # create df from response
        df = pd.DataFrame(np.array(rows), columns=headers)
        
        return df

    def write_games(self, filename, df):
        df.to_csv(filename, mode='a', header=False)

In [7]:
scraper = BoxScoreTraditionalV2(base_url = 'https://stats.nba.com/stats/scoreboardV2', game_id="0021900808", headers=headers)

In [8]:
scraper_response = scraper.get_request(params=parameters)
response_df1 = scraper.load_response(scraper_response)

[['2020-02-27T00:00:00', 1, '0021900873', 1610612752, 'NYK', 'New York', 'Knicks', '17-42', 21, 25, 33, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0.506, 0.583, 0.348, 28, 39, 8], ['2020-02-27T00:00:00', 1, '0021900873', 1610612755, 'PHI', 'Philadelphia', '76ers', '37-23', 26, 35, 26, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 0.495, 0.769, 0.484, 32, 44, 6], ['2020-02-27T00:00:00', 2, '0021900874', 1610612757, 'POR', 'Portland', 'Trail Blazers', '26-34', 24, 25, 26, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 0.461, 0.643, 0.3, 14, 45, 14], ['2020-02-27T00:00:00', 2, '0021900874', 1610612754, 'IND', 'Indiana', 'Pacers', '35-24', 30, 13, 37, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0.444, 0.783, 0.32, 21, 48, 11], ['2020-02-27T00:00:00', 3, '0021900875', 1610612758, 'SAC', 'Sacramento', 'Kings', '24-34', 27, 34, 21, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 108, 0.451, 1.0, 0.343, 30, 45, 15], ['2020-02-27T00:00:00', 3, '0021900875', 1610612760, 'OKC', 'Oklahoma City', 'Thunder', '37-22', 25, 25, 31, 31, 0, 

In [9]:
response_df1

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY_NAME,TEAM_NAME,TEAM_WINS_LOSSES,PTS_QTR1,PTS_QTR2,...,PTS_OT8,PTS_OT9,PTS_OT10,PTS,FG_PCT,FT_PCT,FG3_PCT,AST,REB,TOV
0,2020-02-27T00:00:00,1,21900873,1610612752,NYK,New York,Knicks,17-42,21,25,...,0,0,0,106,0.506,0.583,0.348,28,39,8
1,2020-02-27T00:00:00,1,21900873,1610612755,PHI,Philadelphia,76ers,37-23,26,35,...,0,0,0,115,0.495,0.769,0.484,32,44,6
2,2020-02-27T00:00:00,2,21900874,1610612757,POR,Portland,Trail Blazers,26-34,24,25,...,0,0,0,100,0.461,0.643,0.3,14,45,14
3,2020-02-27T00:00:00,2,21900874,1610612754,IND,Indiana,Pacers,35-24,30,13,...,0,0,0,106,0.444,0.783,0.32,21,48,11
4,2020-02-27T00:00:00,3,21900875,1610612758,SAC,Sacramento,Kings,24-34,27,34,...,0,0,0,108,0.451,1.0,0.343,30,45,15
5,2020-02-27T00:00:00,3,21900875,1610612760,OKC,Oklahoma City,Thunder,37-22,25,25,...,0,0,0,112,0.488,0.724,0.346,20,37,9
6,2020-02-27T00:00:00,4,21900876,1610612747,LAL,Los Angeles,Lakers,45-12,24,30,...,0,0,0,116,0.511,0.667,0.333,24,44,16
7,2020-02-27T00:00:00,4,21900876,1610612744,GSW,Golden State,Warriors,12-47,24,28,...,0,0,0,86,0.425,0.9,0.281,27,37,26


In [10]:
parameters = {
    "DayOffset": "0",
    "LeagueID": "00",
    "gameDate": "02/01/2020"
}

In [11]:
scraper_response_2 = scraper.get_request(params=parameters)
#scraper_response_2

In [12]:
response_df2 = scraper.load_response(scraper_response_2)
response_df2

[['2020-02-01T00:00:00', 1, '0021900727', 1610612750, 'MIN', 'Minnesota', 'Timberwolves', '15-33', 32, 23, 31, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 106, 0.405, 0.838, 0.179, 24, 44, 17], ['2020-02-01T00:00:00', 1, '0021900727', 1610612746, 'LAC', 'LA', 'Clippers', '34-15', 40, 22, 36, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118, 0.459, 0.889, 0.353, 20, 50, 13], ['2020-02-01T00:00:00', 2, '0021900728', 1610612752, 'NYK', 'New York', 'Knicks', '14-36', 24, 26, 22, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 0.415, 0.739, 0.269, 19, 57, 12], ['2020-02-01T00:00:00', 2, '0021900728', 1610612754, 'IND', 'Indiana', 'Pacers', '31-18', 11, 28, 32, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 0.421, 0.619, 0.333, 21, 34, 11], ['2020-02-01T00:00:00', 3, '0021900729', 1610612748, 'MIA', 'Miami', 'Heat', '33-15', 28, 29, 19, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 102, 0.429, 0.808, 0.455, 24, 48, 13], ['2020-02-01T00:00:00', 3, '0021900729', 1610612753, 'ORL', 'Orlando', 'Magic', '21-28', 25, 24, 18, 22, 0, 0, 0, 0, 0, 0, 0

Unnamed: 0,GAME_DATE_EST,GAME_SEQUENCE,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY_NAME,TEAM_NAME,TEAM_WINS_LOSSES,PTS_QTR1,PTS_QTR2,...,PTS_OT8,PTS_OT9,PTS_OT10,PTS,FG_PCT,FT_PCT,FG3_PCT,AST,REB,TOV
0,2020-02-01T00:00:00,1,21900727,1610612750,MIN,Minnesota,Timberwolves,15-33,32,23,...,0,0,0,106,0.405,0.838,0.179,24,44,17
1,2020-02-01T00:00:00,1,21900727,1610612746,LAC,LA,Clippers,34-15,40,22,...,0,0,0,118,0.459,0.889,0.353,20,50,13
2,2020-02-01T00:00:00,2,21900728,1610612752,NYK,New York,Knicks,14-36,24,26,...,0,0,0,92,0.415,0.739,0.269,19,57,12
3,2020-02-01T00:00:00,2,21900728,1610612754,IND,Indiana,Pacers,31-18,11,28,...,0,0,0,85,0.421,0.619,0.333,21,34,11
4,2020-02-01T00:00:00,3,21900729,1610612748,MIA,Miami,Heat,33-15,28,29,...,0,0,0,102,0.429,0.808,0.455,24,48,13
5,2020-02-01T00:00:00,3,21900729,1610612753,ORL,Orlando,Magic,21-28,25,24,...,0,0,0,89,0.398,0.824,0.273,21,39,9
6,2020-02-01T00:00:00,4,21900730,1610612744,GSW,Golden State,Warriors,11-39,31,30,...,0,0,0,131,0.533,0.895,0.514,39,44,11
7,2020-02-01T00:00:00,4,21900730,1610612739,CLE,Cleveland,Cavaliers,13-37,32,27,...,0,0,0,112,0.432,1.0,0.364,26,46,15
8,2020-02-01T00:00:00,5,21900731,1610612751,BKN,Brooklyn,Nets,21-27,36,25,...,0,0,0,107,0.415,0.619,0.34,25,53,15
9,2020-02-01T00:00:00,5,21900731,1610612764,WAS,Washington,Wizards,17-31,27,32,...,0,0,0,113,0.44,0.615,0.281,21,51,9


In [13]:
scraper.write_games(df=response_df1, filename='games.csv')

In [14]:
scraper.write_games(df=response_df2, filename='games.csv')