In [138]:
import numpy as np
import pandas as pd
from scipy.stats import zscore
import unicodedata

def strip_accents(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
                  if unicodedata.category(c) != 'Mn')



pd.set_option('display.max_rows', 500)



df = pd.read_html('https://www.basketball-reference.com/leagues/NBA_2021_totals.html')[0]

df = df.drop_duplicates(subset=['Player']).reset_index()
df = df[df['Player'] != 'Player'].reset_index()
df = df.apply(pd.to_numeric, errors='ignore')
df['Player'] = df['Player'].apply(strip_accents)

num_players = len(df)



df['p/g'] = df['PTS']/df['G']
df['3/g'] = df['3P']/df['G']
df['r/g'] = df['TRB']/df['G']
df['a/g'] = df['AST']/df['G']
df['s/g'] = df['STL']/df['G']
df['b/g'] = df['BLK']/df['G']
df['to/g'] = df['TOV']/df['G']

df['ft+/g'] = df["FTA"]*df['FT%']/df["G"]
df['ft-/g'] = -df["FTA"]*(1-df['FT%'])/df["G"]
df['fg+/g'] = df["FGA"]*df['FG%']/df["G"]
df['fg-/g'] = -df["FGA"]*(1-df['FG%'])/df["G"]

df = df.fillna(0)





In [139]:
rankings = pd.DataFrame()

rankings['Player'] = df['Player']
rankings["pV"] = df['p/g']/sum(df['p/g'])
rankings["3V"] = df['3/g']/sum(df['3/g'])
rankings["rV"] = df['r/g']/sum(df['r/g'])
rankings["aV"] = df['a/g']/sum(df['a/g'])
rankings["sV"] = df['s/g']/sum(df['s/g'])
rankings["bV"] = df['b/g']/sum(df['b/g'])
rankings["toV"] = -df['to/g']/sum(df['to/g'])


rankings['ft+V'] = df['ft+/g']/sum(df['ft+/g'])
rankings['ft-V'] = -df['ft-/g']/sum(df['ft-/g'])

rankings['fg+V'] = df['fg+/g']/sum(df['fg+/g'])
rankings['fg-V'] = -df['fg-/g']/sum(df['fg-/g'])

rankings['ft%V'] = (rankings['ft+V'] + rankings['ft-V'])/2
rankings['fg%V'] = (rankings['fg+V'] + rankings['fg-V'])/2


rankings = rankings.drop(['ft+V', 'ft-V', 'fg+V', 'fg-V'], axis=1)



rankings["Value"] = rankings.sum(axis=1)
rankings = rankings[['Value'] + [col for col in rankings.columns if col != 'Value']]
rankings[rankings.select_dtypes(include=['number']).columns] *= num_players




rankings_sorted = rankings.sort_values("Value", ascending=False).reset_index(drop=True)


In [140]:
top_200 = rankings_sorted.head(200)['Player']

df_200 = df[df['Player'].isin(top_200)]



rankings_200 = pd.DataFrame()

rankings_200['Player'] = df_200['Player']
rankings_200["pV"] = df_200['p/g']/sum(df_200['p/g'])
rankings_200["3V"] = df_200['3/g']/sum(df_200['3/g'])
rankings_200["rV"] = df_200['r/g']/sum(df_200['r/g'])
rankings_200["aV"] = df_200['a/g']/sum(df_200['a/g'])
rankings_200["sV"] = df_200['s/g']/sum(df_200['s/g'])
rankings_200["bV"] = df_200['b/g']/sum(df_200['b/g'])
rankings_200["toV"] = -df_200['to/g']/sum(df_200['to/g'])



rankings_200['ft+V'] = df_200['ft+/g']/sum(df_200['ft+/g'])
rankings_200['ft-V'] = -df_200['ft-/g']/sum(df_200['ft-/g'])

rankings_200['fg+V'] = df_200['fg+/g']/sum(df_200['fg+/g'])
rankings_200['fg-V'] = -df_200['fg-/g']/sum(df_200['fg-/g'])

rankings_200['ft%V'] = (rankings_200['ft+V'] + rankings_200['ft-V'])/2
rankings_200['fg%V'] = (rankings_200['fg+V'] + rankings_200['fg-V'])/2


rankings_200 = rankings_200.drop(['ft+V', 'ft-V', 'fg+V', 'fg-V'], axis=1)



rankings_200["Value"] = rankings_200.sum(axis=1)
rankings_200 = rankings_200[['Value'] + [col for col in rankings_2.columns if col != 'Value']]
rankings_200[rankings_200.select_dtypes(include=['number']).columns] *= 200




rankings_200_sorted = rankings_200.sort_values("Value", ascending=False).reset_index(drop=True)



rankings_200_sorted.to_csv(r'/Users/timothyhuang/Desktop/FantasyBasketball/19-20_Rankings_top200.csv', index = False, header=True)




In [141]:
# Run this cell to display rankings (no punts)

rankings_200_sorted

Unnamed: 0,Value,Player,pV,3V,rV,aV,sV,bV,toV,ft%V,fg%V
0,11.077539,Karl-Anthony Towns,1.340164,0.911864,2.212686,1.573487,1.026933,4.68284,-1.72247,1.188885,-0.136849
1,9.665012,Myles Turner,0.937107,0.781598,1.071984,0.179827,1.613751,5.574809,-0.738202,0.193162,0.050974
2,9.391832,James Harden,2.327653,2.735592,0.913935,3.382998,1.283666,0.390237,-2.583706,0.911289,0.030167
3,9.135814,CJ McCollum,1.954826,2.952702,0.797117,1.708358,1.90716,0.445985,-0.738202,0.13808,-0.030213
4,8.979398,Stephen Curry,2.257118,2.69217,1.017011,2.023055,1.026933,0.445985,-2.05056,1.577624,-0.009937
5,8.961654,Kawhi Leonard,1.645815,0.911864,1.122377,2.045534,2.053865,1.821104,-0.66985,0.134613,-0.103669
6,8.77318,Kyrie Irving,1.91452,2.257949,1.017011,1.933142,1.613751,0.445985,-1.230336,0.679363,0.141796
7,8.489211,Fred VanVleet,1.540013,2.330319,1.058241,1.730836,1.540399,1.040631,-0.765542,0.087251,-0.072937
8,8.39062,Michael Porter Jr.,1.375432,1.671751,1.29875,0.550721,1.026933,2.34142,-0.287078,0.169017,0.243675
9,8.365778,Nikola Jokic,1.702915,0.694754,2.253916,3.731413,1.467046,0.891969,-2.952806,0.184423,0.392147


In [152]:
all_categories = ['pV', '3V', 'rV', 'aV', 'sV', 'bV', 'toV', 'ft%V', 'fg%V']
punts = ['toV', 'fg%V']


rankings_punt = rankings_200.copy()

rankings_punt['Punt Value'] = -rankings_punt[punts].sum(axis=1)
rankings_punt["Value"] = rankings_punt['Value'] - rankings_punt[punts].sum(axis=1)

rankings_punt = rankings_punt[['Value', 'Punt Value'] + [col for col in rankings_punt.columns if col != 'Value' and col != 'Punt Value']]

rankings_punt_sorted = rankings_punt.sort_values("Value", ascending=False).reset_index(drop=True)



In [154]:
# Run this cell to display rankings using your chosen punt categories

rankings_punt_sorted

Unnamed: 0,Value,Punt Value,Player,pV,3V,rV,aV,sV,bV,toV,ft%V,fg%V
0,12.936859,1.85932,Karl-Anthony Towns,1.340164,0.911864,2.212686,1.573487,1.026933,4.68284,-1.72247,1.188885,-0.136849
1,11.94537,2.553538,James Harden,2.327653,2.735592,0.913935,3.382998,1.283666,0.390237,-2.583706,0.911289,0.030167
2,11.039895,2.060497,Stephen Curry,2.257118,2.69217,1.017011,2.023055,1.026933,0.445985,-2.05056,1.577624,-0.009937
3,10.926437,2.560659,Nikola Jokic,1.702915,0.694754,2.253916,3.731413,1.467046,0.891969,-2.952806,0.184423,0.392147
4,10.352239,0.687228,Myles Turner,0.937107,0.781598,1.071984,0.179827,1.613751,5.574809,-0.738202,0.193162,0.050974
5,10.314646,2.493706,Paul George,1.77345,2.605326,1.099471,1.618444,1.760456,0.891969,-2.624717,0.56553,0.131011
6,9.904228,0.768414,CJ McCollum,1.954826,2.952702,0.797117,1.708358,1.90716,0.445985,-0.738202,0.13808,-0.030213
7,9.86172,1.08854,Kyrie Irving,1.91452,2.257949,1.017011,1.933142,1.613751,0.445985,-1.230336,0.679363,0.141796
8,9.735173,0.773518,Kawhi Leonard,1.645815,0.911864,1.122377,2.045534,2.053865,1.821104,-0.66985,0.134613,-0.103669
9,9.696961,1.854318,Kevin Durant,1.986734,1.519773,1.346852,1.521038,1.198088,1.300789,-2.009549,0.823687,0.155231


In [None]:

# Save rankings to CSV file
# rankings_200_sorted.to_csv(r'/Users/...../Downloads/20-21_Rankings.csv', index = False, header=True)

# Save punt rankings to CSV file
# rankings_punt_sorted.to_csv(r'/Users/...../Downloads/20-21_Rankings_punt.csv', index = False, header=True)
