In [101]:
import numpy as np
import pandas as pd
from scipy.stats import zscore

pd.set_option('display.max_rows', 500)



df = pd.read_html('https://www.basketball-reference.com/leagues/NBA_2020_totals.html')[0]

df = df.drop_duplicates(subset=['Player']).reset_index()
df = df[df['Player'] != 'Player'].reset_index()
df = df.apply(pd.to_numeric, errors='ignore')

num_players = len(df)



df['p/g'] = df['PTS']/df['G']
df['3/g'] = df['3P']/df['G']
df['r/g'] = df['TRB']/df['G']
df['a/g'] = df['AST']/df['G']
df['s/g'] = df['STL']/df['G']
df['b/g'] = df['BLK']/df['G']
df['to/g'] = df['TOV']/df['G']

df['ft+/g'] = df["FTA"]*df['FT%']/df["G"]
df['ft-/g'] = -df["FTA"]*(1-df['FT%'])/df["G"]
df['fg+/g'] = df["FGA"]*df['FG%']/df["G"]
df['fg-/g'] = -df["FGA"]*(1-df['FG%'])/df["G"]

dfd = df.fillna(0)





In [107]:
rankings = pd.DataFrame()

rankings['Player'] = df['Player']
rankings["pV"] = df['p/g']/sum(df['p/g'])
rankings["3V"] = df['3/g']/sum(df['3/g'])
rankings["rV"] = df['r/g']/sum(df['r/g'])
rankings["aV"] = df['a/g']/sum(df['a/g'])
rankings["sV"] = df['s/g']/sum(df['s/g'])
rankings["bV"] = df['b/g']/sum(df['b/g'])
rankings["toV"] = -df['to/g']/sum(df['to/g'])


rankings['ft+V'] = df['ft+/g']/sum(df_capped['ft+/g'])
rankings['ft-V'] = -df['ft-/g']/sum(df_capped['ft-/g'])

rankings['fg+V'] = df['fg+/g']/sum(df['fg+/g'])
rankings['fg-V'] = -df['fg-/g']/sum(df['fg-/g'])

rankings['ft%V'] = (rankings['ft+V'] + rankings['ft-V'])/2
rankings['fg%V'] = (rankings['fg+V'] + rankings['fg-V'])/2


rankings = rankings.drop(['ft+V', 'ft-V', 'fg+V', 'fg-V'], axis=1)



rankings["Value"] = rankings.sum(axis=1)
rankings = rankings[['Value'] + [col for col in rankings.columns if col != 'Value']]
rankings[rankings.select_dtypes(include=['number']).columns] *= num_players




rankings_sorted = rankings.sort_values("Value", ascending=False).reset_index(drop=True)






rankings_sorted.to_csv(r'/Users/timothyhuang/Desktop/FantasyBasketball/19-20_Rankings.csv', index = False, header=True)

rankings_sorted

Unnamed: 0,Value,Player,pV,3V,rV,aV,sV,bV,toV,ft%V,fg%V
0,17.296877,James Harden,3.935106,4.686026,1.806746,3.922964,2.971248,2.226586,-4.036684,1.784886,
1,15.487885,Anthony Davis,2.990648,1.237608,2.563629,1.680705,2.372397,5.820248,-2.213665,1.036314,
2,14.568768,Damian Lillard,3.434479,4.359756,1.185347,4.183938,1.714320,0.841155,-2.619635,1.469408,
3,13.798814,Karl-Anthony Towns,3.031946,3.471196,2.975054,2.277596,1.523995,3.028157,-2.800964,0.291834,
4,13.599988,Kristaps Porziņģis,2.340220,2.654947,2.609696,0.932349,1.162644,5.091200,-1.438454,0.247386,
5,13.254773,Kawhi Leonard,3.102198,2.299708,1.942774,2.559390,2.920789,1.460953,-2.329670,1.298632,
6,13.226361,Kyrie Irving,3.139996,2.984011,1.418660,3.334519,2.182084,1.261732,-2.317161,1.222520,
7,13.152219,Jusuf Nurkić,2.019796,0.266430,2.823546,2.084075,2.222493,5.046929,-2.116638,0.805588,
8,12.207110,Trae Young,3.395932,3.641204,1.170739,4.862841,1.751055,0.336462,-4.292690,1.341567,
9,12.138473,Hassan Whiteside,1.778841,0.063625,3.720873,0.606559,0.627244,7.382075,-1.596208,-0.444535,


In [108]:
top_200 = rankings_sorted.head(200)['Player']

df_200 = df[df['Player'].isin(top_200)]



rankings_200 = pd.DataFrame()

rankings_200['Player'] = df_200['Player']
rankings_200["pV"] = df_200['p/g']/sum(df_200['p/g'])
rankings_200["3V"] = df_200['3/g']/sum(df_200['3/g'])
rankings_200["rV"] = df_200['r/g']/sum(df_200['r/g'])
rankings_200["aV"] = df_200['a/g']/sum(df_200['a/g'])
rankings_200["sV"] = df_200['s/g']/sum(df_200['s/g'])
rankings_200["bV"] = df_200['b/g']/sum(df_200['b/g'])
rankings_200["toV"] = -df_200['to/g']/sum(df_200['to/g'])


rankings_200['ft+V'] = df_200['ft+/g']/sum(df_200['ft+/g'])
rankings_200['ft-V'] = -df_200['ft-/g']/sum(df_200['ft-/g'])

rankings_200['fg+V'] = df_200['fg+/g']/sum(df_200['fg+/g'])
rankings_200['fg-V'] = -df_200['fg-/g']/sum(df_200['fg-/g'])

rankings_200['ft%V'] = (rankings_200['ft+V'] + rankings_200['ft-V'])/2
rankings_200['fg%V'] = (rankings_200['fg+V'] + rankings_200['fg-V'])/2


rankings_200 = rankings_200.drop(['ft+V', 'ft-V', 'fg+V', 'fg-V'], axis=1)



rankings_200["Value"] = rankings_200.sum(axis=1)
rankings_200 = rankings_2[['Value'] + [col for col in rankings_2.columns if col != 'Value']]
rankings_200[rankings_2.select_dtypes(include=['number']).columns] *= 200




rankings_200_sorted = rankings_200.sort_values("Value", ascending=False).reset_index(drop=True)






# rankings_200_sorted.to_csv(r'/Users/timothyhuang/Desktop/FantasyBasketball/19-20_Rankings.csv', index = False, header=True)

rankings_200_sorted




Unnamed: 0,Value,Player,pV,3V,rV,aV,sV,bV,toV,ft%V,fg%V
0,9115.647499,James Harden,2081.671134,2478.907573,955.768419,2075.247919,1571.790017,1177.864079,-2135.405777,944.204643,-34.400508
1,8323.739728,Anthony Davis,1582.052807,654.694761,1356.159760,889.093110,1254.998275,3078.911371,-1171.028975,548.210216,130.648403
2,7734.114586,Damian Lillard,1816.839233,2306.311090,627.048502,2213.303001,906.875210,444.970874,-1385.786748,777.316917,27.236507
3,7444.373516,Karl-Anthony Towns,1603.899272,1836.262925,1573.803423,1204.848178,806.193554,1601.895147,-1481.710131,154.380216,144.800932
4,7127.031673,Kristaps Porziņģis,1237.976134,1404.467026,1380.529319,493.212704,615.038676,2693.244764,-760.942045,130.867120,-67.362026
5,7071.715676,Kyrie Irving,1661.057669,1578.541812,750.471077,1763.960731,1154.322589,667.456311,-1225.778381,646.713306,74.970561
6,7059.335779,Kawhi Leonard,1641.062865,1216.545382,1027.727382,1353.917228,1545.097162,772.844150,-1232.395269,686.976223,47.560656
7,7041.713528,Jusuf Nurkić,1068.472314,140.941233,1493.656027,1102.475457,1175.698933,2669.825245,-1119.701406,426.155959,84.189766
8,6685.940105,Hassan Whiteside,941.006632,33.657608,1968.341761,320.869723,331.811911,3905.117522,-844.393833,-235.158968,264.687750
9,6403.328507,Trae Young,1796.448014,1926.196855,619.320792,2572.442733,926.308250,177.988350,-2270.833027,709.689044,-54.232504


In [135]:
punts = ['toV', 'fg%V']


rankings_punt = rankings.copy()


rankings_punt["Value"] = rankings_punt['Value'] - rankings_punt[punts].sum(axis=1)
rankings_punt_sorted = rankings_punt.sort_values("Value", ascending=False).reset_index(drop=True)

# rankings_punt_sorted


In [136]:
punt_value_rankings = rankings_punt.copy()
punt_value_rankings['Punt_Value'] = -rankings_punt[punts].sum(axis=1)
punt_value_rankings = punt_value_rankings[['Punt_Value'] + [col for col in punt_value_rankings.columns if col != 'Punt_Value']]

punt_value_rankings_sorted = punt_value_rankings.sort_values("Punt_Value", ascending=False).reset_index(drop=True)
# punt_value_rankings_sorted

In [80]:
ratios_df = df_capped[['p/g','3/g','r/g','a/g','s/g','b/g','to/g']]


# normalized_by_sum = (df_capped[['PTS', '3P', 'TRB', 'AST', 'STL','BLK', 'TOV']].apply(sum)/sum(df_capped['PTS'])).apply(lambda x: 1/x)


ratios = pd.DataFrame(data=np.array([sum(ratios_df['p/g'])/ratios_df.sum(axis=0), np.std(ratios_df['p/g'])/np.std(ratios_df, axis=0)]),
                     index = ["Normalizing using sum", "Normalizing using std"],
                     columns = ratios_df.columns)       

ratios

Unnamed: 0,p/g,3/g,r/g,a/g,s/g,b/g,to/g
Normalizing using sum,1.0,8.703284,2.421295,4.437072,13.94828,21.302622,7.375775
Normalizing using std,1.0,6.93721,2.567516,3.480613,12.770231,13.811681,6.799274
