### Coding for the Basketball Sharpe Ratio Project

In [None]:
#-----------------------------------------------------#
#               Load in Libraries                     #
#-----------------------------------------------------#


#----------- NBA API Libraries -----------------------#
from nba_api.stats.static import players
from nba_api.stats.static import teams 
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.library.parameters import SeasonAll

#----------- Data Libraries --------------------------#
import pandas as pd 
import numpy as np
import time
import os

#----------- Visualization Libraries -----------------#
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates


#---------- Define Vars for Plotting -----------------#
custom_params = {"axes.spines.right": False, "axes.spines.top": False}
sns.set_theme(style="ticks", rc=custom_params)


In [None]:
#-----------------------------------------------------#
#               Load in Data                          #
#-----------------------------------------------------#

nba_1718 = pd.read_csv('nba_2017_2018_per_game.csv')
nba_1819 = pd.read_csv('nba_2018_2019_per_game.csv')
nba_1920 = pd.read_csv('nba_2019_2020_per_game.csv')
nba_2021 = pd.read_csv('nba_2020_2021_per_game.csv')
nba_2122 = pd.read_csv('nba_2021_2022_per_game.csv')

In [None]:
#-----------------------------------------------------#
#           Define Data Retrival Functions            #
#-----------------------------------------------------#

#-- Get Player Names Dictionary
player_dict = players.get_players()

#-- Define Function for Getting Season Games

def player_gl_api(player_name, season_year):
    
    nba_player = [player for player in player_dict if player['full_name'] == player_name][0]
    nba_player_id = nba_player['id']
    gamelog_nba_player = playergamelog.PlayerGameLog(player_id = nba_player_id, season = season_year)
    nba_player_gamelog_df = gamelog_nba_player.get_data_frames()[0]
    nba_player_gamelog_df['PLAYER'] = player_name

    return nba_player_gamelog_df

def get_bb_sharpe(player_name, season_year):
    
    nba_player = [player for player in player_dict if player['full_name'] == player_name][0]
    nba_player_id = nba_player['id']
    gamelog_nba_player = playergamelog.PlayerGameLog(player_id = nba_player_id, season = season_year)
    nba_player_gamelog_df = gamelog_nba_player.get_data_frames()[0]
    nba_player_gamelog_df['PLAYER'] = player_name
    nba_player_gamelog_df['PTS_PER_MIN'] = nba_player_gamelog_df['PTS'] / nba_player_gamelog_df['MIN']
    nba_player_gamelog_df['FTA_PER_MIN'] = nba_player_gamelog_df['FTA'] / nba_player_gamelog_df['MIN']


    games = len(nba_player_gamelog_df)
    fts = np.mean(nba_player_gamelog_df['FTM'] / nba_player_gamelog_df['PTS'])
    mu_ftper = np.mean(nba_player_gamelog_df['FTM'] / nba_player_gamelog_df['FTA'])
    mu_ppg = np.mean(nba_player_gamelog_df['PTS'])
    mu_fta = np.mean(nba_player_gamelog_df['FTA'])
    sigma_ppg = np.std(nba_player_gamelog_df['PTS'])
    ppg_fta_cov = np.cov(nba_player_gamelog_df['PTS'], nba_player_gamelog_df['FTA'])[1][0]
    ppg_fta_corr = np.corrcoef(nba_player_gamelog_df['PTS'], nba_player_gamelog_df['FTA'])[1][0]
    bb_sharpe_2 = (mu_ppg - mu_fta) / np.sqrt(ppg_fta_cov)
    sigma_min = np.std(nba_player_gamelog_df['MIN'])
    mu_ppgm = np.mean(nba_player_gamelog_df['PTS_PER_MIN'])
    mu_ftam = np.mean(nba_player_gamelog_df['FTA_PER_MIN'])
    sigma_ppgm = np.std(nba_player_gamelog_df['PTS_PER_MIN'])
    bb_sharpe_3 = np.mean(nba_player_gamelog_df['PTS_PER_MIN'] - nba_player_gamelog_df['FTA_PER_MIN']) / np.std(nba_player_gamelog_df['PTS_PER_MIN'])


    basketball_sharpe = (mu_ppg - mu_fta) / sigma_ppg

    temp_df = {'Player' : player_name, 'Season' : season_year, 'Games Played' : games
                , 'Average PPG' : mu_ppg, 'Volatility PPG' : sigma_ppg
                , 'Average FTA' : mu_fta, 'Average FTPER' : mu_ftper, 'Average FTS' : fts, 'nbaBB Sharpe' : basketball_sharpe
                , 'FTA PTS Correlation' : ppg_fta_corr, 'FTA PTS Covariance' : ppg_fta_cov, 'nbaBB Sharpe2' : bb_sharpe_2
                , 'Average PPGM' : mu_ppgm, 'Average FTAM' : mu_ftam, 'Volatility PPGM' : sigma_ppgm, 'Volatility MIN' : sigma_min, 'nbaBB Sharpe3' : bb_sharpe_3}

    temp_df = pd.DataFrame(temp_df, index = [0])

    return temp_df    

#-- Year Range BB Sharpe Function --#

def get_bb_sharpe_years(player_name, start_year, end_year):
    
    base = get_bb_sharpe(player_name, str(start_year))

    for year in range(start_year + 1, end_year + 1):
        base = pd.concat( [ base, get_bb_sharpe(player_name, str(year)) ], ignore_index = True)
    
    return base

#-- BB Sharpe function modified to pass in player IDs instead of player names --#

def get_bb_sharpe_id_pass(nba_player_id, nba_player_name, season_year):

    gamelog_nba_player = playergamelog.PlayerGameLog(player_id = nba_player_id, season = season_year)
    nba_player_gamelog_df = gamelog_nba_player.get_data_frames()[0]
    nba_player_gamelog_df['PLAYER'] = nba_player_name
    nba_player_gamelog_df['PTS_PER_MIN'] = nba_player_gamelog_df['PTS'] / nba_player_gamelog_df['MIN']
    nba_player_gamelog_df['FTA_PER_MIN'] = nba_player_gamelog_df['FTA'] / nba_player_gamelog_df['MIN']


    games = len(nba_player_gamelog_df)
    fts = np.mean(nba_player_gamelog_df['FTM'] / nba_player_gamelog_df['PTS'])
    mu_ftper = np.mean(nba_player_gamelog_df['FTM'] / nba_player_gamelog_df['FTA'])
    mu_ppg = np.mean(nba_player_gamelog_df['PTS'])
    mu_fta = np.mean(nba_player_gamelog_df['FTA'])
    sigma_ppg = np.std(nba_player_gamelog_df['PTS'])
    ppg_fta_cov = np.cov(nba_player_gamelog_df['PTS'], nba_player_gamelog_df['FTA'])[1][0]
    ppg_fta_corr = np.corrcoef(nba_player_gamelog_df['PTS'], nba_player_gamelog_df['FTA'])[1][0]
    bb_sharpe_2 = (mu_ppg - mu_fta) / np.sqrt(ppg_fta_cov)
    sigma_min = np.std(nba_player_gamelog_df['MIN'])
    mu_ppgm = np.mean(nba_player_gamelog_df['PTS_PER_MIN'])
    mu_ftam = np.mean(nba_player_gamelog_df['FTA_PER_MIN'])
    sigma_ppgm = np.std(nba_player_gamelog_df['PTS_PER_MIN'])
    bb_sharpe_3 = np.mean(nba_player_gamelog_df['PTS_PER_MIN'] - nba_player_gamelog_df['FTA_PER_MIN']) / np.std(nba_player_gamelog_df['PTS_PER_MIN'])


    basketball_sharpe = (mu_ppg - mu_fta) / sigma_ppg

    temp_df = {'Player' : nba_player_name, 'Season' : season_year, 'Games Played' : games
                , 'Average PPG' : mu_ppg, 'Volatility PPG' : sigma_ppg
                , 'Average FTA' : mu_fta, 'Average FTPER' : mu_ftper, 'Average FTS' : fts, 'nbaBB Sharpe' : basketball_sharpe
                , 'FTA PTS Correlation' : ppg_fta_corr, 'FTA PTS Covariance' : ppg_fta_cov, 'nbaBB Sharpe2' : bb_sharpe_2
                , 'Average PPGM' : mu_ppgm, 'Average FTAM' : mu_ftam, 'Volatility PPGM' : sigma_ppgm, 'Volatility MIN' : sigma_min, 'nbaBB Sharpe3' : bb_sharpe_3}

    temp_df = pd.DataFrame(temp_df, index = [0])

    return temp_df    

In [None]:
#-- James Harden Data Retrival --#

HARDEN18 = player_gl_api('James Harden', '2017')
HARDEN18['GAME_DATE'] = pd.to_datetime(HARDEN18['GAME_DATE'])

#-- Save data to CSV
HARDEN18.to_csv('HARDEN18.csv')

In [None]:
#-- Plot James Harden Data for 2017-2018 MVP Season --#

fig, ax = plt.subplots(1, 2, sharex = True, figsize = (25, 10))
fontparams = {'font' : 'Arial', 'fontsize' : 16}

#-------------------- James Harden PTS & FTA --------------------------------------#
ax[0].plot(HARDEN18['GAME_DATE'], HARDEN18['FTA'], color = 'red', marker = 'o')
ax[0].plot(HARDEN18['GAME_DATE'], HARDEN18['PTS'], color = 'blue', marker = 'o')

for t in range(0, len(HARDEN18['GAME_DATE'])):
    ax[0].vlines(x = HARDEN18['GAME_DATE'][t]
                , ymin = HARDEN18['FTA'][t]
                , ymax = HARDEN18['PTS'][t]
                , color = 'gray', linestyles = 'dashed', alpha = .5)

#-- X Axis
x_ticks = []
for i in np.arange(0, len(HARDEN18['GAME_DATE']), step = 4):
    x_ticks.append(HARDEN18['GAME_DATE'][i])
x_ticks = pd.Series(x_ticks)

x_labs = []
for i in range(0, len(x_ticks)):
    x_labs.append(x_ticks[i].strftime('%b %-d'))
x_labs = pd.Series(x_labs)

ax[0].set_xticks(x_ticks)

ax[0].set_xticklabels(x_labs, rotation=45)

#-- Y Axis 
ax[0].set_yticks(np.arange(0, 68, step = 4))

#-- Title and Legend
ax[0].legend(['FTA', 'PTS'])
ax[0].set_title('James Harden FTA & PPG from his MVP Season (2017 - 2018)'
            , fontdict = fontparams
            , loc = 'left')


#-------------------- James Harden FTA / PTS --------------------------------------#
y_vector = (HARDEN18['FTA'] / HARDEN18['PTS'])
ax[1].plot(HARDEN18['GAME_DATE'], y_vector, color = 'magenta', linestyle = 'solid', marker = 'o')
ax[1].plot(HARDEN18['GAME_DATE'], np.repeat(np.mean(y_vector), len(y_vector)), color = 'black', linestyle = 'dashed', alpha = .5)



#-- X & Y Axis 
ax[1].set_xticklabels(x_labs, rotation=45)
ax[1].set_yticks(np.arange(0, 1.05, step = .05))

#-- Title and Legend
ax[1].set_title('Attempted Free Throws as a % of Points'
            , fontdict = fontparams
            , loc = 'left')

ax[1].legend(['FTA / PTS', 'Average %'])

fig.savefig('james_harden_dual_plot.png', dpi = 500)
fig.show()

In [None]:
#-- Get data to compute B ratios for the top 30 scorers from the 2017-2018 season --#

top_30_1718 = nba_1718.iloc[0:31, 1].to_list()

TS_201718 = pd.DataFrame()

for p in top_30_1718:
    
    try: 
        data = get_bb_sharpe(p, '2017')
        
        TS_201718 = pd.concat([TS_201718, data], ignore_index = True)

        time.sleep(2) #-- rest the loop to prevent web overload
        
    except:
        pass

TS_201718.to_csv('TS_201718.csv')

In [None]:
#-- Plot the B Ratios versus PPG for 2017 - 2018 Season --#

plt.figure(figsize = (20, 7))

plt.scatter(x = TS_201718['nbaBB Sharpe'], y = TS_201718['Average PPG'], ec = 'black', color = 'cyan', alpha = .75)

#-- Add player name labels 
xdodge = .01
ydodge = .1

for i in range(0, len(TS_201718)):
    plt.text(x = TS_201718['nbaBB Sharpe'][i] + xdodge, y = TS_201718['Average PPG'][i] + ydodge, s = TS_201718['Player'][i]
                , fontdict = dict(color = 'purple', size = 10)
                #, bbox = dict(facecolor= 'yellow', alpha = 0.5)
                )

#-- Adjust X & Y Axes 
plt.xticks(np.arange(1.8, 3.1, step = .1))
plt.yticks(np.arange(18, 32, step = 2))

plt.xlabel('Basketball Sharpe Ratio $(B)$'
            , fontdict = {'family' : 'arial', 'size' : 16})
plt.ylabel('Average PPG $(\mu_{fta})$'
            , fontdict = {'family' : 'arial', 'size' : 16})

plt.title('The Basketball Sharpe Ratios versus PPG for the Top 30 Scorers from the 2017-2018 NBA Season'
            , fontdict = {'family' : 'arial', 'size' : 16}
            , loc = 'left')

plt.savefig('B_Versus_PPG_1718.png', dpi = 300)

plt.show()

In [None]:
#-- B Ratio Components Plot to Show Efficient Frontier from 2017-2018 --#

plt.figure(figsize = (15, 7))

#-- Base plot
y_vec = (TS_201718['Average PPG'] - TS_201718['Average FTA'])
x_vec = TS_201718['Volatility PPG']


plt.scatter(x = x_vec #-- BB Sharpe Numerator
            , y = y_vec #-- BB Sharpe Denominator
            , ec = 'black', color = 'cyan', alpha = .75)

#-- Add player name labels 
xdodge = .05
ydodge = .01

for i in range(0, len(TS_201718)):
    plt.text(x = x_vec[i] + xdodge, y = y_vec[i] + ydodge, s = TS_201718['Player'][i]
                , fontdict = dict(color = 'purple', size = 10)
                #, bbox = dict(facecolor= 'yellow', alpha = 0.5)
                )

#-- Add Quadrants 
def midpoint(vector):
    return (np.max(vector) + np.min(vector)) / 2

plt.vlines(x = midpoint(x_vec), ymin = np.min(y_vec), ymax = np.max(y_vec)
            , linestyles = 'dashed', color = 'grey', alpha = .5)

plt.hlines(y = midpoint(y_vec), xmin = np.min(x_vec), xmax = np.max(x_vec)
            , linestyles = 'dashed', color = 'grey', alpha = .5)

#-- Adjust X & Y Axes 
plt.yticks(np.arange(15, 22, step = .5))
plt.xticks(np.arange(5.5, 10, step = .25))

plt.ylabel('Average Excess Points $\mu_{ppg} - \mu_{fta}$'
            , fontdict = {'family' : 'arial', 'size' : 16})

plt.xlabel('Volatility in PPG $\sigma_{ppg}$'
            , fontdict = {'family' : 'arial', 'size' : 16})

plt.title('$B$ Ratio Efficient Frontier for the Top 30 Scorers from the 2017-18 NBA Season'
            , fontdict = {'family' : 'arial', 'size' : 16}
            , loc = 'left')

plt.savefig('sharpe_components_1718.png', dpi = 300)

plt.show()

In [None]:
#-- Getting data from the 2022-2023 NBA Season --#

#- Fetch data from NBA API for league leaders
from nba_api.stats import endpoints
data = endpoints.leagueleaders.LeagueLeaders()
data = data.get_data_frames()[0]
data.sort_values(by = 'PTS', ascending = False)

#-- Create PPG variable and sort on that index
data['PPG'] = data['PTS'] / data['GP']
data =  data.sort_values(by = 'PPG', ascending = False)
data = data.reset_index()
data.iloc[1:31,:]

#-- Create two lists of IDs and names for passing into Sharpe function
nba_2k23_player_ids = data.iloc[1:31,1].to_list()
nba_2k23_player_names = data.iloc[1:31,3].to_list()



In [None]:
#-- Get Sharpe data for 2022-2023 season --#

#-- Initiate empty data frame
TS_2K23 = pd.DataFrame()

#-- Loop through lists from above cell to pull in Sharpe data
for i in range(0, len(nba_2k23_player_ids)):
    
    try:
        #-- Sharpe function 
        function_df = get_bb_sharpe_id_pass(nba_2k23_player_ids[i], nba_2k23_player_names[i], '2022') 
        
        TS_2K23 = pd.concat([TS_2K23, function_df], ignore_index = True) 

        #-- Rest loop to prevent web API overload    
        time.sleep(2)
        
    except:
        pass

TS_2K23.to_csv('TS_2K23.csv')

In [None]:
#-- Joint Plot for B Ratio and Efficient Frontier Visualization --#

#-- Initate plot figure
fig, ax = plt.subplots(1, 2, sharex = False, figsize = (25, 10))
fontparams = {'font' : 'Arial', 'fontsize' : 14}

#--------- The first subplot will be the BB Sharpe Ratio vs PPG Plot ------------#
x_vec_1 = TS_2K23['nbaBB Sharpe']
y_vec_1 = TS_2K23['Average PPG']
s_vec_1 = TS_2K23['Player']

ax[0].scatter(x = x_vec_1
                , y = y_vec_1
                , ec = 'black', color = 'cyan', alpha = .75)

#-- Add player name labels 
xdodge = .01
ydodge = .1

for i in range(0, len(TS_2K23)):
            ax[0].text(x = x_vec_1[i] + xdodge
                        , y = y_vec_1[i] + ydodge
                        , s = s_vec_1[i]
                , fontdict = dict(color = 'purple', size = 9)
                #, bbox = dict(facecolor= 'yellow', alpha = 0.5)
                )


#-- Adjust X & Y Axes 
ax[0].set_xticks(np.arange(np.min(x_vec_1), np.max(x_vec_1), step = .1))
ax[0].set_yticks(np.arange(np.min(y_vec_1), np.max(y_vec_1), step = 1))
ax[0].set_xticklabels(np.round(np.arange(np.min(x_vec_1), np.max(x_vec_1), step = 0.1),1), rotation = 45)
# ax[0].set_yticklabels(np.arange(np.min(y_vec_1), np.max(y_vec_1), step = 1))


ax[0].set_xlabel('Basetball Sharpe Ratio $(B)$'
            , fontdict = fontparams)
ax[0].set_ylabel('Average PPG'
            , fontdict = fontparams)

ax[0].set_title('$B$ for the Top 30 Scorers from the 2022-23 NBA Season'
            , fontdict = fontparams
            , loc = 'left')

#------------ The second plot will be the B ratio components plot -------------#
y_vec = (TS_2K23['Average PPG'] - TS_2K23['Average FTA'])
x_vec = TS_2K23['Volatility PPG']
s_vec = TS_2K23['Player']


ax[1].scatter(x = x_vec #-- BB Sharpe Denominator
            , y = y_vec #-- BB Sharpe Numerator 
            , ec = 'black', color = 'cyan', alpha = .75)

#-- Add player name labels 
ydodge = .05
xdodge = .01

for i in range(0, len(TS_2K23)):
    ax[1].text(x = x_vec[i] + xdodge, y = y_vec[i] + ydodge, s = s_vec[i]
                , fontdict = dict(color = 'purple', size = 9)
                #, bbox = dict(facecolor= 'yellow', alpha = 0.5)
                )

#-- Add Quadrants 
def midpoint(vector):
    return (np.max(vector) + np.min(vector)) / 2

ax[1].vlines(x = midpoint(x_vec), ymin = np.min(y_vec), ymax = np.max(y_vec)
            , linestyles = 'dashed', color = 'grey', alpha = .5)

ax[1].hlines(y = midpoint(y_vec), xmin = np.min(x_vec), xmax = np.max(x_vec)
            , linestyles = 'dashed', color = 'grey', alpha = .5)

#-- Adjust X & Y Axes 
ax[1].set_yticks(np.arange(np.round(np.min(y_vec),1), np.round(np.max(y_vec),1), step = .5))
ax[1].set_yticklabels(np.arange(np.round(np.min(y_vec),1), np.round(np.max(y_vec),1), step = .5))
ax[1].set_xticks(np.arange(np.round(np.min(x_vec),1), np.round(np.max(x_vec),1), step = .25))
ax[1].set_xticklabels(np.arange(np.round(np.min(x_vec),1), np.round(np.max(x_vec),1), step = .25), rotation = 45)

ax[1].set_ylabel('Average Excess Points $\mu_{ppg} - \mu_{fta}$'
            , fontdict = fontparams)
ax[1].set_xlabel('Volatility in PPG $\sigma_{ppg}$'
            , fontdict = fontparams)

ax[1].set_title('$B$ Components for Top 30 Scorers from the 2022-23 NBA Season'
            , fontdict = fontparams
            , loc = 'left')

fig.savefig('joint_sharpe_plots_for_2K23.png', dpi = 500)

fig.show()