<a href="https://colab.research.google.com/github/justxn/nbaStatsToCSV/blob/main/getNBASTATS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install xlsxwriter

In [78]:
import requests
import pandas as pd
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
from xlsxwriter import Workbook

pd.set_option('display.max_columns', None)

In [10]:
def getAllData(traditional, advanced, h):
  #getSeasonTraditionalData
  try:
    trad = requests.get(traditional, headers = h)
    tradHeads = trad.json()['resultSets'][0]['headers']
    tradLogs = trad.json()['resultSets'][0]['rowSet']
  except (ConnectionError, Timeout, TooManyRedirects) as e:
    print(e)
  regular = pd.DataFrame(tradLogs,columns=tradHeads)
  
  #getAdvancedSeasonData
  try:
    adv = requests.get(advanced, headers = h)
    advHeads = adv.json()['resultSets'][0]['headers']
    advLogs = adv.json()['resultSets'][0]['rowSet']
  except (ConnectionError, Timeout, TooManyRedirects) as e:
    print(e)
  regularADV = pd.DataFrame(advLogs, columns=advHeads)

  #clean advanced data of redundant columns contained in regular season
  regularADV.drop(columns=['SEASON_YEAR','PLAYER_ID','NICKNAME','TEAM_ID','TEAM_ABBREVIATION','TEAM_NAME','GAME_DATE','MATCHUP','WL','MIN','FGM','FGA','FG_PCT'],inplace=True)

  #join data frames
  df = pd.merge(regular, regularADV, on=['PLAYER_NAME','GAME_ID'])
  return df
  


URL DATA NEEDED TO ACCESS REGULAR SEASON AND PLAYOFFS 2021-2022

In [11]:
regular2022URL = 'https://stats.nba.com/stats/leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam=P&Season=2021-22&SeasonType=Regular+Season&Sorter=DATE'
regularADV2022URL = 'https://stats.nba.com/stats/playergamelogs?DateFrom=&DateTo=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlusMinus=N&Rank=N&Season=2021-22&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&VsConference=&VsDivision='
playoff2022URL = 'https://stats.nba.com/stats/leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam=P&Season=2021-22&SeasonType=Playoffs&Sorter=DATE'
playoffADV2022URL = 'https://stats.nba.com/stats/playergamelogs?DateFrom=&DateTo=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlusMinus=N&Rank=N&Season=2021-22&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&VsConference=&VsDivision='

h = {
            'Host': 'stats.nba.com',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36',
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer': 'https://www.nba.com/',
            'Accept-Encoding': 'gzip, deflate, br',
            'Connection': 'keep-alive',
            'x-nba-stats-origin': 'stats',
            'x-nba-stats-token': 'true'
        }


COMBINE PLAYOFF AND REGULAR SEASON DATA 

In [65]:
regularSeasonData = getAllData(regular2022URL, regularADV2022URL, h)
playoffSeasonData = getAllData(playoff2022URL,playoffADV2022URL,h)
seasonData20212022 = pd.concat([regularSeasonData,playoffSeasonData],ignore_index=True)

ORDER BY PLAYER NAME AND EARLIEST DATE PLAYED

In [66]:
seasonData20212022['GAME_DATE'] = pd.to_datetime(seasonData20212022['GAME_DATE'])
seasonData20212022 = seasonData20212022.sort_values(['PLAYER_NAME','GAME_DATE'])

In [67]:
seasonData20212022 = seasonData20212022.iloc[:, :62]

GET GAMES PLAYED

In [68]:
seasonData20212022['GP'] = seasonData20212022.groupby(['PLAYER_NAME']).cumcount()

CREATE DATA FRAMES TO TRANSFER TO CSV



In [69]:
season = seasonData20212022.iloc[:, [2,6]].copy()
last10Values = seasonData20212022.iloc[:,[2,6]].copy()
booleanBoost = seasonData20212022.iloc[:,[2,6]].copy()
boostScore = seasonData20212022.iloc[:,[2,6]].copy()
baseBoosted = seasonData20212022.iloc[:,[2,6]].copy()

CALCULATE SCORES AND ROLLING STATS

In [70]:
scoringKeys = {'PTS':0.5, 'AST':5, 'STL':12, 'BLK':16, 'FG3M':7}

In [71]:
#calculate base score of the game based on stats
seasonData20212022['BASESCORE'] = seasonData20212022['PTS'] + 1.5 * seasonData20212022['AST'] + 2 * seasonData20212022['STL'] + 2 * seasonData20212022['BLK'] + seasonData20212022['REB']

#loop for all scoring categories
for i in scoringKeys:
  #calculate season stats
  season[i] = seasonData20212022.groupby(['PLAYER_NAME'])[i].cumsum()
  season[i] = ((season[i] - seasonData20212022[i]) / seasonData20212022['GP']).round(2)

  #calculate last 10 Value
  last10 = seasonData20212022.groupby(['PLAYER_NAME'],as_index=False)[i].rolling(10, min_periods=1).mean()
  last10.drop('PLAYER_NAME',axis=1, inplace=True)
  last10Values[i] = last10.round(2)

  #calculate if the player hit their boost or not between all categories
  booleanBoost[i] = seasonData20212022[i] > last10Values[i]

  #calculate the boost score the player achieved across all categories
  boostScore.loc[booleanBoost[i] == 1, i] = ((seasonData20212022[i] - last10Values[i]) * scoringKeys[i] + 5).round(2)
  boostScore.loc[booleanBoost[i] != 1, i] = 0
  
  #calculate final score based on all categories
  baseBoosted[i] = seasonData20212022['BASESCORE'] + boostScore[i] 


TO CSV FILE

In [79]:
writer = pd.ExcelWriter('file.xlsx', engine='xlsxwriter')
seasonData20212022.to_excel(writer, 'mainGameLog', index=False)
booleanBoost.to_excel(writer, 'BooleanBoost', index=False)
last10Values.to_excel(writer,'last10Values',index=False)
boostScore.to_excel(writer,'boostScore',index=False)
baseBoosted.to_excel(writer, 'baseBoosted',index=False)
season.to_excel(writer,'seasonAverages',index=False)
writer.save()
writer.close()


  warn("Calling close() on already closed file.")
