In [204]:
from requests import get
from pprint import PrettyPrinter
import pyodbc
import pandas as pd
import sqlalchemy as db
import urllib
import datetime as dt

printer = PrettyPrinter()

In [None]:
# NBA Stats - JSON Endpoints
# http://data.nba.net/10s/prod/v1/2021/players.json > Player data for a specific year
# https://data.nba.net/10s/prod/v1/2021/teams.json > Team data for a specific year
# http://data.nba.net/data/10s/prod/v1/20180928/0011800001_boxscore.json > Box score data for a specific game (needs date and game id)
# https://data.nba.net/10s/prod/v1/20180929/scoreboard.json > Box scores for all games on a specific day (needs date)
# https://data.nba.net/10s/prod/v1/2021/schedule.json > Schedule data for a specific year

In [None]:
# GETS TEAM ROSTER DATA
# team_roster_endpoint = api.data_links['teamRoster']
# team_roster_endpoint = team_roster_endpoint.replace('{{teamUrlCode}}','raptors')

# test_data = get(base_url + team_roster_endpoint).json()
# print(len(test_data['league']['standard']['players']))

In [None]:
# CONNECTING TO MS SQL SERVER DATABASE
# import urllib
# import sqlalchemy as db

# driver = "{ODBC Driver 18 for SQL Server}"
# server = "asqlsrv-nbadashboard-dev-canadacentral-001.database.windows.net"
# database = "asqldb-nbadashboard-dev-canadacentral-001"
# username = "db_admin"
# password = "Salazar5991!"

# connection_string = f"Driver={driver};Server=tcp:{server},1433;Database={database};Uid={username};Pwd={password};Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30";

# params = urllib.parse.quote_plus(connection_string)

# engine = db.create_engine("mssql+pyodbc:///?odbc_connect=%s" % params)

In [None]:
# GET BOX SCORE DATA
# api.data_links

# api.get_game_data('20211107', '0022100139').keys()
# test_data = get(base_url + '/prod/v1/20211107/0022100139_mini_boxscore.json').json()['basicGameData']
# api.schedule_data[204]

In [114]:
# # EXTRACTS NESTED PLAYER DATA
# for player_dict in player_data:

#     # EXTRACTS DATA FROM "teamSitesOnly" DICT
#     team_sites_only_dict = player_dict.get('teamSitesOnly')
    
#     if team_sites_only_dict != None:
#         for key in team_sites_only_dict:
#             player_dict[f"teamsitesonly_{key}"] = team_sites_only_dict[key]

#     # EXTRACTS DATA FROM "draft" DICT
#     draft_dict = player_dict.get('draft')

#     if draft_dict != None:
#         for key in draft_dict:
#             player_dict[f"draft_{key}"] = draft_dict[key]

#     # EXTRACT DATA from "teams" LIST OF DICTS
#     teams_dict = player_dict.get('teams')

#     if teams_dict != None:
#         counter = 1
    
#         for team in teams_dict:
#             for key in team:
#                 player_dict[f"teamhist_{counter}_{key}"] = team[key]
#             counter += 1

In [4]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# player_data_df.loc[player_data_df['teamId'] == '1610612748']

In [326]:
class NBAStatsAPI:

    base_url = "https://data.nba.net"
    all_json = "/prod/v1/today.json"
    
    def __init__(self):
        # Dict with JSON endpoints
        self._data_links = self._get_all_data_links()
        # List of dicts with each dict containing data for a team
        self._team_data = self._get_all_team_data()
        # List of dicts with each dict containing data for a player
        self._player_profile_data = self._get_all_player_profiles_data()
        # List of dicts with each dict containing data for a scheduled game
        self._schedule_data = self._get_league_schedule_data()
        # List of dicts with each dict containing game id and game date
        self._game_ids_and_dates = self.__extract_data_attributes(self._schedule_data, ['gameId', 'startDateEastern'])
        # List of dicts with each dict containing person id
        self._player_ids = self.__extract_data_attributes(self._player_profile_data, ['personId', 'temporaryDisplayName'])
        # Date data was last refreshed
        self._date_last_refreshed = dt.datetime.strptime(self._data_links['currentDate'], '%Y%m%d')

    @property
    def data_links(self):
        return self._data_links
    
    @property
    def team_data(self):
        return self._team_data

    @property
    def player_profile_data(self):
        return self._player_profile_data

    @property
    def schedule_data(self):
        return self._schedule_data
    
    def get_all_players_career_stats_data():
        all_players_career_stats = []

        for player in self._player_ids:

            person_id = player['personId']

            player_data_endpoint = self.data_links['playerProfile'].replace('{{personId}}', person_id)

            player_career_summary_data = get(NBAStatsAPI.base_url + player_data_endpoint).json()['league']['standard']['stats']['careerSummary']

            career_player_stats = {f"career_summary_{key}":value for key,value in player_career_summary_data.items()}
            career_player_stats['personId'] = person_id

            all_players_career_stats.append(career_player_stats)

        return all_players_career_stats

    def get_game_data(self, game_date: str, game_id: str):
        '''
        Returns data for a specific game given the "gameId" and "gameDate"
        '''

        game_data_link = self.data_links['boxscore']
        game_data_link = game_data_link.replace('{{gameDate}}', f'{game_date}')
        game_data_link = game_data_link.replace('{{gameId}}', f'{game_id}')

        game_data_endpoint = get(NBAStatsAPI.base_url + game_data_link).json()
        game_data = game_data_endpoint['basicGameData']

        return game_data

    def _get_all_data_links(self):
        data = get(NBAStatsAPI.base_url + NBAStatsAPI.all_json).json()
        links = data['links']

        return links

    def _get_all_team_data(self):
        team_data_endpoint = get(NBAStatsAPI.base_url + self.data_links['teams']).json()
        team_data = team_data_endpoint['league']['standard']
        team_data = list(filter(lambda x: x['isNBAFranchise'] == True, team_data))

        return team_data

    def _get_all_player_profiles_data(self):
        player_data_endpoint = get(NBAStatsAPI.base_url + self.data_links['leagueRosterPlayers']).json()
        player_data = player_data_endpoint['league']['standard']

        # EXTRACTS NESTED PLAYER DATA
        for player_dict in player_data:

            # EXTRACTS DATA FROM "teamSitesOnly" DICT
            player_dict = self.__extract_nested_dict(player_dict, 'teamSitesOnly')

            # EXTRACTS DATA FROM "draft" DICT
            player_dict = self.__extract_nested_dict(player_dict, 'draft')

            # EXTRACT DATA from "teams" LIST OF DICTS
            teams_dict = player_dict.get('teams')

            if teams_dict != None:
                counter = 1
            
                for team in teams_dict:
                    for key in team:
                        player_dict[f"teamhist_{counter}_{key}"] = team[key]
                    counter += 1
        
        return player_data
    
    def _get_league_schedule_data(self):
        schedule_data_endpoint = get(NBAStatsAPI.base_url + self.data_links['leagueSchedule']).json()
        schedule_data = schedule_data_endpoint['league']['standard']

        # EXTRACTS NESTED SCHEDULE DATA 
        for scheduled_game_dict in schedule_data:
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'period')
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'nugget')
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'hTeam')
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'vTeam')
            scheduled_game_dict = self.__extract_nested_dict(scheduled_game_dict, 'playoffs')

        return schedule_data

    def __extract_nested_dict(self, parent:dict, nested_key:str):
        nested = parent.get(f'{nested_key}')

        if nested != None:
            for key in nested:
                parent[f"{nested_key}_{key}"] = nested[key]

        return parent

    def __extract_data_attributes(self, input_list_of_dicts: list, data_attributes: list):
        '''
        Extracts specific data attribute(s) from exisiting dataset(s) and stores it in a list of dictionaries.
        The outputted list can then be used as an input to extract additional data from other NBA Statisitcs JSON Endpoints.

        Parameters
        ----------
        input_list_of_dicts : list
            List of dictionaries
        data_attributes : list
            List of strings 

        Returns
        ----------
        list
            List of dictionaries that contains the specified data attributes

        Example Format: output_list = [{'gameId':'0012100001','startDateEastern':'20211003'}, {'gameId': '0012100002', 'startDateEastern': '20211004'}, ...]
        '''

        output_list = []

        for input_dict in input_list_of_dicts:

            temp_dict = {key:value for key,value in input_dict.items() if (key in data_attributes)}
            output_list.append(temp_dict)

        return output_list

    def __str__(self):
        return f"Data Last Refreshed On: {self._date_last_refreshed.strftime('%B %d, %Y')}"

In [327]:
# Instantiate API Object
api = NBAStatsAPI()


AttributeError: 'NBAStatsAPI' object has no attribute '_player_data'

In [207]:
api.data_links

{'anchorDate': '20220421',
 'currentDate': '20220421',
 'calendar': '/prod/v1/calendar.json',
 'todayScoreboard': '/prod/v1/20220421/scoreboard.json',
 'currentScoreboard': '/prod/v1/20220421/scoreboard.json',
 'teams': '/prod/v2/2021/teams.json',
 'scoreboard': '/prod/v2/{{gameDate}}/scoreboard.json',
 'leagueRosterPlayers': '/prod/v1/2021/players.json',
 'allstarRoster': '/prod/v1/allstar/2018/AS_roster.json',
 'leagueRosterCoaches': '/prod/v1/2021/coaches.json',
 'leagueSchedule': '/prod/v1/2021/schedule.json',
 'leagueConfStandings': '/prod/v1/current/standings_conference.json',
 'leagueDivStandings': '/prod/v1/current/standings_division.json',
 'leagueUngroupedStandings': '/prod/v1/current/standings_all.json',
 'leagueMiniStandings': '/prod/v1/current/standings_all_no_sort_keys.json',
 'leagueTeamStatsLeaders': '/prod/v1/2021/team_stats_rankings.json',
 'leagueLastFiveGameTeamStats': '/prod/v1/2021/team_stats_last_five_games.json',
 'previewArticle': '/prod/v1/{{gameDate}}/{{gameI

In [None]:

# 'boxscore': '/prod/v1/{{gameDate}}/{{gameId}}_boxscore.json'
def get_box_score_data:
    pass

#  'playerProfile': '/prod/v1/2021/players/{{personId}}_profile.json',
def get_player_stats_data:
    pass

In [175]:

def get_player_stats_data():
    all_career_player_statistics = []

    for player in api._player_data:

        person_id = player['personId']

        base_url = "https://data.nba.net"
        json_endpoint = '/prod/v1/2021/players/{{personId}}_profile.json'.replace('{{personId}}', person_id)

        career_summary = get(base_url + json_endpoint).json()['league']['standard']['stats']['careerSummary']

        career_player_statistics = {f"career_summary_{key}":value for key,value in career_summary.items()}
        career_player_statistics['personId'] = person_id

        all_career_player_statistics.append(career_player_statistics)

    return all_career_player_statistics

In [315]:
# # OUTPTUS DATATYPES FOR VALUES IN A DICT
# keys = api.schedule_data[0].keys()

# for key in keys:
#     print(key, type(api.schedule_data[0][key]))


In [324]:
### GETTING DATA FROM JSON ENDPOINTS ###
base_url = "https://data.nba.net"
# data_links = data['links']

# data_endpoint = get(base_url + '/prod/v1/20211003/0012100001_boxscore.json').json()
data_endpoint = get(base_url + '/prod/v1/2021/players/203507_uber_stats.json').json()
        

{'Message': 'Object not found.'}

In [292]:
data_endpoint['league']['standard']['stats']['regularSeason']['season'][0]['teams']
# [1]['teams'][0]


# season > List of Dicts
# Within each Dict, it contains a player's stats for a specific year


[{'teamId': '1610612749',
  'ppg': '29.9',
  'rpg': '11.6',
  'apg': '5.8',
  'mpg': '32.9',
  'topg': '3.3',
  'spg': '1.1',
  'bpg': '1.4',
  'tpp': '29.3',
  'ftp': '72.2',
  'fgp': '55.3',
  'assists': '388',
  'blocks': '91',
  'steals': '72',
  'turnovers': '219',
  'offReb': '134',
  'defReb': '644',
  'totReb': '778',
  'fgm': '689',
  'fga': '1245',
  'tpm': '71',
  'tpa': '242',
  'ftm': '553',
  'fta': '766',
  'pFouls': '212',
  'points': '2002',
  'gamesPlayed': '67',
  'gamesStarted': '67',
  'plusMinus': '397',
  'min': '2204',
  'dd2': '46',
  'td3': '4'}]

In [215]:
keys = data_endpoint['basicGameData'].keys()

for key in keys:
    print(key, type(data_endpoint['basicGameData'][key]))

seasonStageId <class 'int'>
seasonYear <class 'str'>
leagueName <class 'str'>
gameId <class 'str'>
arena <class 'dict'>
isGameActivated <class 'bool'>
statusNum <class 'int'>
extendedStatusNum <class 'int'>
startTimeEastern <class 'str'>
startTimeUTC <class 'str'>
endTimeUTC <class 'str'>
startDateEastern <class 'str'>
homeStartDate <class 'str'>
homeStartTime <class 'str'>
visitorStartDate <class 'str'>
visitorStartTime <class 'str'>
gameUrlCode <class 'str'>
clock <class 'str'>
isBuzzerBeater <class 'bool'>
isPreviewArticleAvail <class 'bool'>
isRecapArticleAvail <class 'bool'>
nugget <class 'dict'>
attendance <class 'str'>
tickets <class 'dict'>
hasGameBookPdf <class 'bool'>
isStartTimeTBD <class 'bool'>
isNeutralVenue <class 'bool'>
gameDuration <class 'dict'>
period <class 'dict'>
vTeam <class 'dict'>
hTeam <class 'dict'>
watch <class 'dict'>
officials <class 'dict'>


In [235]:
data_endpoint['stats']

# Don't need: nugget, tickets, period, gameDuration, watch

{'timesTied': '1',
 'leadChanges': '1',
 'vTeam': {'fastBreakPoints': '11',
  'pointsInPaint': '52',
  'biggestLead': '30',
  'secondChancePoints': '23',
  'pointsOffTurnovers': '21',
  'longestRun': '13',
  'totals': {'points': '123',
   'fgm': '42',
   'fga': '87',
   'fgp': '48.3',
   'ftm': '29',
   'fta': '40',
   'ftp': '72.5',
   'tpm': '10',
   'tpa': '24',
   'tpp': '41.7',
   'offReb': '14',
   'defReb': '40',
   'totReb': '54',
   'assists': '20',
   'pFouls': '30',
   'steals': '9',
   'turnovers': '16',
   'blocks': '7',
   'plusMinus': '26',
   'min': '240:00',
   'short_timeout_remaining': '0',
   'full_timeout_remaining': '2',
   'team_fouls': '25'},
  'leaders': {'points': {'value': '21',
    'players': [{'personId': '1630560',
      'firstName': 'Cam',
      'lastName': 'Thomas'}]},
   'rebounds': {'value': '10',
    'players': [{'personId': '200794',
      'firstName': 'Paul',
      'lastName': 'Millsap'}]},
   'assists': {'value': '3',
    'players': [{'personId': '

In [227]:
api.schedule_data[0]

{'gameId': '0012100001',
 'seasonStageId': 1,
 'gameUrlCode': '20211003/BKNLAL',
 'statusNum': 3,
 'extendedStatusNum': 0,
 'isStartTimeTBD': False,
 'startTimeUTC': '2021-10-03T19:30:00.000Z',
 'startDateEastern': '20211003',
 'isNeutralVenue': False,
 'startTimeEastern': '3:30 PM ET',
 'isBuzzerBeater': False,
 'period': {'current': 4, 'type': 0, 'maxRegular': 4},
 'nugget': {'text': ''},
 'hTeam': {'teamId': '1610612747', 'score': '97', 'win': '0', 'loss': '1'},
 'vTeam': {'teamId': '1610612751', 'score': '123', 'win': '1', 'loss': '0'},
 'watch': {'broadcast': {'video': {'regionalBlackoutCodes': '',
    'isLeaguePass': True,
    'isNationalBlackout': False,
    'isTNTOT': False,
    'canPurchase': False,
    'isVR': False,
    'isNextVR': False,
    'isNBAOnTNTVR': False,
    'isMagicLeap': False,
    'isOculusVenues': False,
    'national': {'broadcasters': [{'shortName': 'NBA TV',
       'longName': 'NBA TV'}]},
    'canadian': [{'shortName': 'NBAC', 'longName': 'NBA TV Canada'}]