In [3]:
from requests import get
from pprint import PrettyPrinter
import pyodbc
import pandas as pd
import sqlalchemy as db
import urllib
import datetime as dt

printer = PrettyPrinter()

In [None]:
# NBA Stats - JSON Endpoints
# http://data.nba.net/10s/prod/v1/2021/players.json > Player data for a specific year
# https://data.nba.net/10s/prod/v1/2021/teams.json > Team data for a specific year
# http://data.nba.net/data/10s/prod/v1/20180928/0011800001_boxscore.json > Box score data for a specific game (needs date and game id)
# https://data.nba.net/10s/prod/v1/20180929/scoreboard.json > Box scores for all games on a specific day (needs date)
# https://data.nba.net/10s/prod/v1/2021/schedule.json > Schedule data for a specific year

In [None]:
# GETS TEAM ROSTER DATA
# team_roster_endpoint = api.data_links['teamRoster']
# team_roster_endpoint = team_roster_endpoint.replace('{{teamUrlCode}}','raptors')

# test_data = get(base_url + team_roster_endpoint).json()
# print(len(test_data['league']['standard']['players']))

In [None]:
# CONNECTING TO MS SQL SERVER DATABASE
# import urllib
# import sqlalchemy as db

# driver = "{ODBC Driver 18 for SQL Server}"
# server = "asqlsrv-nbadashboard-dev-canadacentral-001.database.windows.net"
# database = "asqldb-nbadashboard-dev-canadacentral-001"
# username = "db_admin"
# password = "Salazar5991!"

# connection_string = f"Driver={driver};Server=tcp:{server},1433;Database={database};Uid={username};Pwd={password};Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30";

# params = urllib.parse.quote_plus(connection_string)

# engine = db.create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

In [None]:
# GET BOX SCORE DATA
# api.data_links

# api.get_game_data('20211107', '0022100139').keys()
# test_data = get(base_url + '/prod/v1/20211107/0022100139_mini_boxscore.json').json()['basicGameData']
# api.schedule_data[204]

In [114]:
# # EXTRACTS NESTED PLAYER DATA
# for player_dict in player_data:

#     # EXTRACTS DATA FROM "teamSitesOnly" DICT
#     team_sites_only_dict = player_dict.get('teamSitesOnly')
    
#     if team_sites_only_dict != None:
#         for key in team_sites_only_dict:
#             player_dict[f"teamsitesonly_{key}"] = team_sites_only_dict[key]

#     # EXTRACTS DATA FROM "draft" DICT
#     draft_dict = player_dict.get('draft')

#     if draft_dict != None:
#         for key in draft_dict:
#             player_dict[f"draft_{key}"] = draft_dict[key]

#     # EXTRACT DATA from "teams" LIST OF DICTS
#     teams_dict = player_dict.get('teams')

#     if teams_dict != None:
#         counter = 1
    
#         for team in teams_dict:
#             for key in team:
#                 player_dict[f"teamhist_{counter}_{key}"] = team[key]
#             counter += 1

In [4]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# player_data_df.loc[player_data_df['teamId'] == '1610612748']

In [190]:
class NBAStatsAPI:

    base_url = "https://data.nba.net"
    all_json = "/prod/v1/today.json"

    
    def __init__(self):
        # Dict with JSON endpoints
        self._data_links = self._get_all_data_links()
        # List of dicts with each dict containing data for a team
        self._team_data = self._get_team_data()
        # List of dicts with each dict containing data for a player
        self._player_data = self._get_player_data()
        # List of dicts with each dict containing data for a scheduled game
        self._schedule_data = self._get_schedule_data()
        # List of dicts with each dict containing "game_id" and "game_date"
        self._game_ids_and_dates = self.__extract_data_attributes(self._schedule_data, ['gameId', 'startDateEastern'])
        # Date data was last refreshed
        self._date_last_refreshed = dt.datetime.strptime(self._data_links['currentDate'], '%Y%m%d')

    @property
    def data_links(self):
        return self._data_links
    
    @property
    def team_data(self):
        return self._team_data

    @property
    def player_data(self):
        return self._player_data

    @property
    def schedule_data(self):
        return self._schedule_data

    @property
    def game_ids_and_dates(self):
        return self._game_ids_and_dates

    def _get_all_data_links(self):
        data = get(NBAStatsAPI.base_url + NBAStatsAPI.all_json).json()
        links = data['links']

        return links

    def _get_team_data(self):
        team_data_endpoint = get(NBAStatsAPI.base_url + self.data_links['teams']).json()
        team_data = team_data_endpoint['league']['standard']
        team_data = list(filter(lambda x: x['isNBAFranchise'] == True, team_data))

        return team_data

    def _get_player_data(self):
        player_data_endpoint = get(NBAStatsAPI.base_url + self.data_links['leagueRosterPlayers']).json()
        player_data = player_data_endpoint['league']['standard']

        # EXTRACTS NESTED PLAYER DATA
        for player_dict in player_data:

            # EXTRACTS DATA FROM "teamSitesOnly" DICT
            player_dict = self.__extract_nested_dict(player_dict, 'teamSitesOnly')

            # EXTRACTS DATA FROM "draft" DICT
            player_dict = self.__extract_nested_dict(player_dict, 'draft')

            # EXTRACT DATA from "teams" LIST OF DICTS
            teams_dict = player_dict.get('teams')

            if teams_dict != None:
                counter = 1
            
                for team in teams_dict:
                    for key in team:
                        player_dict[f"teamhist_{counter}_{key}"] = team[key]
                    counter += 1
        
        return player_data
    
    def _get_schedule_data(self):
        schedule_data_endpoint = get(NBAStatsAPI.base_url + self.data_links['leagueSchedule']).json()
        schedule_data = schedule_data_endpoint['league']['standard']

        # EXTRACTS NESTED SCHEDULE DATA 
        for scheduled_game_dict in schedule_data:
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'period')
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'nugget')
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'hTeam')
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'vTeam')
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'playoffs')

        return schedule_data

    def get_game_data(self, game_date: str, game_id: str):
        game_data_link = self.data_links['boxscore']
        game_data_link = game_data_link.replace('{{gameDate}}', f'{game_date}')
        game_data_link = game_data_link.replace('{{gameId}}', f'{game_id}')

        game_data_endpoint = get(NBAStatsAPI.base_url + game_data_link).json()
        game_data = game_data_endpoint['basicGameData']

        return game_data

    def __extract_nested_dict(self, parent:dict, nested_key:str):
        nested = parent.get(f'{nested_key}')

        if nested != None:
            for key in nested:
                parent[f"{nested_key}_{key}"] = nested[key]

        return parent

    def __extract_data_attributes(self, input_list_of_dicts: list, data_attributes: list):
        '''
        Extracts specific data attribute(s) from exisiting dataset(s) and stores it in a list of dictionaries.
        
        Example Format: output_list = [{'gameId':'0012100001','startDateEastern':'20211003'}, {'gameId': '0012100002', 'startDateEastern': '20211004'}]
        The outputted list can then be used extract additional data from other NBA Statisitcs JSON Endpoints.

        Parameters
        ----------
        input_list_of_dicts : list
            List of dictionaries
        data_attributes : list
            List of strings 

        Returns
        ----------
        list
            List of dictionaries that contains the specified data attributes
        '''

        output_list = []

        for input_dict in input_list_of_dicts:

            temp_dict = {key:value for key,value in input_dict.items() if (key in data_attributes)}
            output_list.append(temp_dict)

        return output_list

    def __str__(self):
        return f"Data Last Refreshed On: {self._date_last_refreshed.strftime('%B %d, %Y')}"

In [191]:
# Instantiate API Object
api = NBAStatsAPI()


1359

1359

In [175]:
game_ids_and_dates = []
game_ids_and_dates = extract_data_attributes(api.schedule_data, ['gameId', 'startDateEastern'])

In [185]:
player_ids = extract_data_attributes(api.player_data, ['personId', 'temporaryDisplayName'])

In [187]:
len(player_ids)

592

In [188]:
len(api.player_data)

592

In [77]:
# OUTPTUS DATATYPES FOR VALUES IN A DICT
keys = api.schedule_data[0].keys()

for key in keys:
    print(key, type(api.schedule_data[0][key]))

gameId <class 'str'>
seasonStageId <class 'int'>
gameUrlCode <class 'str'>
statusNum <class 'int'>
extendedStatusNum <class 'int'>
isStartTimeTBD <class 'bool'>
startTimeUTC <class 'str'>
startDateEastern <class 'str'>
isNeutralVenue <class 'bool'>
startTimeEastern <class 'str'>
isBuzzerBeater <class 'bool'>
period <class 'dict'>
nugget <class 'dict'>
hTeam <class 'dict'>
vTeam <class 'dict'>
watch <class 'dict'>
period_current <class 'int'>
period_type <class 'int'>
period_maxRegular <class 'int'>
nugget_text <class 'str'>
hTeam_teamId <class 'str'>
hTeam_score <class 'str'>
hTeam_win <class 'str'>
hTeam_loss <class 'str'>
vTeam_teamId <class 'str'>
vTeam_score <class 'str'>
vTeam_win <class 'str'>
vTeam_loss <class 'str'>


In [39]:
# ### GETTING DATA FROM JSON ENDPOINTS ###
# base_url = "https://data.nba.net"
# data_links = data['links']

# # '/prod/v1/{{gameDate}}/{{gameId}}_boxscore.json'
# data_endpoint = get(base_url + '/prod/v1/20211003/0012100001_boxscore.json').json()
# data.keys()
        