In [56]:
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.endpoints import playerfantasyprofile
from nba_api.stats.static import players
import time
import pandas as pd
from sqlalchemy import create_engine
import seaborn as sns
#from scipy.stats import zscore

# Access NBA API via NBApy packages.
-- players.get_active_players: dictionary of players and ids

-- Create list of dataframes of each player's game logs using a loop. 

-- The game logs are generated by playergamelog.PlayerGameLog(). 

-- The get_data_frames()  call retrieves the game log as a dataframe wrapped in a list, so access with hash 0.

-- pd.concat() concatenates the new list of player gamelog dfs into one big df

-- time.sleep(0.600) allows the call not to timeout; however, I don't want to wait 5 minutes every time I work on this, so I'm saving the df to a csv, which I'll access hereafter and comment this loop portion out. Of course, I can do this because the NBA season is over. Mid-season, I'd have to pull the data again each day.

In [57]:
player_dict = players.get_active_players()

In [58]:
# plgs = []
# for i in range(len(player_dict)):
#     pid = player_dict[i]['id']
#     plog = playergamelog.PlayerGameLog(player_id = pid,
#                                           season = '2020-21',
#                                           season_type_all_star = 'Regular Season')
#     plogdf = plog.get_data_frames()[0]
#     time.sleep(0.600)
#     plgs.append(plogdf)
# plogs21 = pd.concat(plgs)
# plogs21.to_csv('player_logs_2021', index = False)

# Prepare player logs dataframe (plogs)

-- read in CSV (saved from earlier code)

-- create new dictionary from player_dict that just has player ids as keys and full names as values

-- use id_dict to add player names column to plogs

-- drop unnecessary column

-- use MATCHUP column to determine player's team, opponent, and whether the game was home or away

-- rename columns not consistent with convention

-- convert GAME_DATE to datetime

In [59]:
plogs = pd.read_csv('player_logs_2021')

id_dict = {}
for d in player_dict:
    new_key = d['id']
    id_dict[new_key] = d['full_name']
    
plogs['PLAYER'] = plogs['Player_ID'].map(id_dict)
plogs.drop(columns = 'VIDEO_AVAILABLE', inplace = True)
plogs['TEAM'] = plogs['MATCHUP'].str.split(pat = "vs\.|\@", expand = True)[0]
plogs['OPPONENT'] = plogs['MATCHUP'].str.split(pat = "vs\.|\@", expand = True)[1]
plogs['HOME_AWAY'] = plogs['MATCHUP'].apply(lambda x: 'AWAY' if '@' in x else 'HOME')
plogs.rename(columns = {'Player_ID': 'PLAYER_ID', 'Game_ID': 'GAME_ID'}, inplace = True)
plogs['GAME_DATE'] = pd.to_datetime(plogs['GAME_DATE'])

# Prepare season averages dataframe (season) and season z-score dataframe (seasonz)

-- preset stats column names

-- create season df using groupby and mean

-- recalculate FG_PCT and FT_PCT to represent season averages rather than per-game averages

-- write functions to calculate player FT_CONTRIB and FG_CONTRIB, which account for pct *and volume*

-- create TOV_CONTRIB column in season by multiplying TOV*(-1). This will treat low turnovers as a good thing in seasonz

-- create seasonz by using classic z-score formula (which handles NANs better than scipy's zscore formula)

-- pop PLAYER_ID and PLAYER out of index, convert some data types

In [60]:
stats = ['PTS','FGM','FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'MIN', 'PLUS_MINUS']
season = plogs.groupby(['PLAYER_ID', 'PLAYER'])[stats].mean()
season['FG_PCT'] = season['FGM'] / season['FGA']
season['FT_PCT'] = season['FTM'] / season['FTA']

In [61]:
FTMpg_all = season['FTM'].mean()
FTApg_all = season['FTA'].mean()
FTMpg_total = season['FTM'].sum()
FTApg_total = season['FTA'].sum()
FGMpg_all = season['FGM'].mean()
FGApg_all = season['FGA'].mean()
FGMpg_total = season['FGM'].sum()
FGApg_total = season['FGA'].sum()

def ft_pct_contrib(makespg, attemptspg):
    return ((12*FTMpg_all + makespg)/(12*FTApg_all + attemptspg)) - (FTMpg_all/FTApg_all)

def fg_pct_contrib(makespg, attemptspg):
    return ((12*FGMpg_all + makespg)/(12*FGApg_all + attemptspg)) - (FGMpg_all/FGApg_all)

season['FT_CONTRIB'] = ft_pct_contrib(season['FTM'], season['FTA'])
season['FG_CONTRIB'] = fg_pct_contrib(season['FGM'], season['FGA'])

season['TOV_CONTRIB'] = season['TOV']*(-1)

In [62]:
seasonz = (season - season.mean()) / season.std(ddof = 0)

to_convert = ['SEASON_ID', 'PLAYER_ID', 'GAME_ID', 'WL', 'PLAYER', 'TEAM', 'OPPONENT', 'HOME_AWAY']
plogs[to_convert] = plogs[to_convert].astype('category')

to_convert = ['PLAYER_ID', 'PLAYER']
seasonz = seasonz.reset_index()
seasonz[to_convert] = seasonz[to_convert].astype('category')
season = season.reset_index()
season[to_convert] = seasonz[to_convert].astype('category')

In [63]:
stats9 = ['PTS','FG_CONTRIB', 'FG3M', 'FT_CONTRIB', 'REB', 'AST', 'STL', 'BLK', 'TOV_CONTRIB']
seasonz['CAT_OVERALL'] = seasonz[stats9].mean(axis = 1)
stats9ovr = ['PLAYER', 'PTS','FG_CONTRIB', 'FG3M', 'FT_CONTRIB', 'REB', 'AST', 'STL', 'BLK', 'TOV_CONTRIB', 'CAT_OVERALL']
#seasonz[stats9ovr].sort_values('CAT_OVERALL', ascending = False).head(20).reset_index(drop = True)

# Prepare plogs z-score dataframe (plogsz)

-- pull plogs apart into statless and stats parts so we can calculate z-scores on stats part easily

-- copy plogs parts into plogsz parts. Use deep = True to avoid any reference conflicts

-- calculate each game log's FT_CONTRIB and FG_CONTRIB, which account for pct *and volume*

-- create TOV_CONTRIB column by multiplying TOV*(-1). This will treat low turnovers as a good thing in plogsz

-- create plogsz by using classic z-score formula (which handles NANs better than scipy's zscore formula)

-- concat the plogsz parts together horizontally using axis = 1.

In [64]:
plogs_statless = plogs[plogs.columns[~plogs.columns.isin(stats)]]
plogsz_statless = plogs_statless.copy(deep = True)
plogs_stats = plogs[stats].copy(deep = True)
plogs_stats['TOV_CONTRIB'] = plogs_stats['TOV']*(-1)
plogs_stats['FT_CONTRIB'] = ft_pct_contrib(plogs_stats['FTM'], plogs_stats['FTA'])
plogs_stats['FG_CONTRIB'] = fg_pct_contrib(plogs_stats['FGM'], plogs_stats['FGA'])
plogsz_stats = (plogs_stats - plogs_stats.mean()) / plogs_stats.std(ddof = 0)
plogsz = pd.concat([plogsz_statless, plogsz_stats], axis=1)
plogsz['CAT_OVERALL'] = plogsz[stats9].mean(axis = 1)
games9ovr = ['PLAYER', 'MATCHUP', 'GAME_DATE', 'PTS','FG_CONTRIB', 'FG3M', 'FT_CONTRIB', 'REB', 'AST', 'STL', 'BLK', 'TOV_CONTRIB', 'CAT_OVERALL']
#plogsz[games9ovr].sort_values('CAT_OVERALL', ascending = False).head(20).reset_index(drop = True)

In [65]:
headshots = season[["PLAYER_ID", "PLAYER"]].copy(deep = True)
headshots["headshot_url"] = headshots["PLAYER_ID"].apply(lambda x: f'https://cdn.nba.com/headshots/nba/latest/1040x760/{x}.png')

In [66]:
plogs.to_csv('plogs.csv', index = False)
plogsz.to_csv('plogsz.csv', index = False)
season.to_csv('season.csv', index = False)
seasonz.to_csv('seasonz.csv', index = False)
headshots.to_csv("headshots.csv", index = False)

# Addendum

## Verifying FT_CONTRIB and FG_CONTRIB calculations work as desired

-- Include FT_CONTRIB_OVR and FG_CONTRIB_OVR in season (not seasonz) for reference. These are not 

above-replacement measures, and they therefore skew toward volume more than FT_CONTRIB and FG_CONTRIB do.

In [67]:
def ft_pct_contrib_total(makespg, attemptspg):
    return (FTMpg_total/FTApg_total) - ((FTMpg_total-makespg)/(FTApg_total-attemptspg))

def fg_pct_contrib_total(makespg, attemptspg):
    return (FGMpg_total/FGApg_total) - ((FGMpg_total-makespg)/(FGApg_total-attemptspg))

season['FT_CONTRIB_OVR'] = ft_pct_contrib_total(season['FTM'], season['FTA'])
season['FG_CONTRIB_OVR'] = fg_pct_contrib_total(season['FGM'], season['FGA'])

In [68]:
seasonz[['PLAYER','FTM', 'FTA', 'FT_PCT', 'FT_CONTRIB']].sort_values('FT_CONTRIB', ascending = False).head(20)

Unnamed: 0,PLAYER,FTM,FTA,FT_PCT,FT_CONTRIB
89,Damian Lillard,3.659831,3.058413,1.239328,3.89517
338,Trae Young,4.351629,3.894997,0.938847,3.256868
41,Stephen Curry,2.986077,2.505224,1.157501,3.234688
88,Bradley Beal,3.729295,3.305494,0.95664,3.035248
161,Joel Embiid,5.446572,5.100152,0.742423,2.87342
42,DeMar DeRozan,3.407926,3.052171,0.888561,2.665621
16,Kevin Durant,3.165165,2.812631,0.909017,2.616268
81,Jimmy Butler,3.802523,3.508437,0.767865,2.425292
66,Kyrie Irving,1.558963,1.210558,1.198032,2.345166
75,Kawhi Leonard,2.473564,2.159197,0.926452,2.319708


In [69]:
seasonz[['PLAYER','FGM', 'FGA', 'FG_PCT', 'FG_CONTRIB']].sort_values('FG_CONTRIB', ascending = False).head(20)

Unnamed: 0,PLAYER,FGM,FGA,FG_PCT,FG_CONTRIB
373,Zion Williamson,2.827379,1.866404,1.486262,4.402248
132,Giannis Antetokounmpo,2.772603,2.076427,1.082244,3.306497
126,Rudy Gobert,0.786493,0.067704,2.09698,3.286784
169,Nikola Jokic,2.732662,2.061287,1.053911,3.202816
283,Thomas Bryant,0.950618,0.25989,1.839496,3.172208
182,Richaun Holmes,0.992385,0.326231,1.726474,3.066309
339,Deandre Ayton,1.089235,0.434069,1.625088,3.024922
69,Jonas Valanciunas,1.450471,0.846343,1.302968,2.83096
165,Clint Capela,1.224422,0.652723,1.322561,2.67181
178,Montrezl Harrell,0.756312,0.186505,1.586688,2.62081


In [70]:
print(FTMpg_all)
print(FTApg_all)
print(FTMpg_total)
print(FTApg_total)
print(FGMpg_all)
print(FGApg_all)
print(FGMpg_total)
print(FGApg_total)

1.5114356435777758
1.9492872879575711
657.4745049563319
847.9399702615433
3.623978676396791
7.8219941212315485
1576.4307242326047
3402.567442735727
