In [59]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [10]:
vorp_df = pd.read_csv("2023-VORP-Sheet1.csv")
vorp_df.head()

Unnamed: 0,Player,Career VORP,Seasons Played,Total Career Games,Points per Game,Rebounds per Game,Assists per Game,Field Goal %,Minutes per Game,Field Goals per Game,Championships,HOF?
0,Álex Abrines,0.1,3,174,5.3,1.4,0.5,38.7%,16.0,1.8,0,False
1,Jaylen Adams,-0.3,2,41,2.7,1.5,1.6,33.1%,10.9,1.0,0,False
2,LaMarcus Aldridge,33.4,16,1076,19.1,8.1,1.9,49.3%,33.7,7.7,0,False
3,Shandon Anderson,3.0,10,719,7.4,3.1,1.4,45.7%,22.2,2.8,1,False
4,Carmelo Anthony,36.7,19,1260,22.5,6.2,2.7,44.7%,34.5,8.0,0,False


In [14]:
# https://stackoverflow.com/questions/42033720/python-sklearn-multiple-linear-regression-display-r-squared
model = LinearRegression()

X = vorp_df[["Seasons Played", "Total Career Games", "Points per Game", "Rebounds per Game",
       "Assists per Game", "Minutes per Game", "Field Goals per Game", "Championships"]]
y = vorp_df[["Career VORP"]]

model.fit(X, y)
model.score(X, y)

0.6681030546640458

In [23]:
vorp_df = vorp_df.rename(columns = {'Career VORP':'VORP', 'Seasons Played':'SP', 
                         'Total Career Games':'G', 'Points per Game':'PPG',
                         'Rebounds per Game':'RPG', 'Assists per Game':'APG',
                         'Field Goal %':'FG%', 'Minutes per Game':'MPG',
                         'Field Goals per Game':'FGPG'})

In [43]:
vorp_df["PPG*SP"] = vorp_df["PPG"] * vorp_df["SP"]
vorp_df["RPG*SP"] = vorp_df["RPG"] * vorp_df["SP"]
vorp_df["APG*SP"] = vorp_df["APG"] * vorp_df["SP"]
vorp_df["MPG*SP"] = vorp_df["MPG"] * vorp_df["SP"]
vorp_df["FGPG*SP"] = vorp_df["FGPG"] * vorp_df["SP"]
vorp_df["GPS"] = vorp_df["G"]/vorp_df["SP"]

In [41]:
vorp_df["HOF?"] = vorp_df["HOF?"].astype("category")
vorp_df["HOF?"] = vorp_df["HOF?"].cat.codes
vorp_df["FG%"] = [x.str.strip('%') for x in vorp_df["FG%"]]
vorp_df["FG%"] = vorp_df["FG%"]*100
vorp_df

Unnamed: 0,Player,VORP,SP,G,PPG,RPG,APG,FG%,MPG,FGPG,Championships,HOF?,PPG*SP,RPG*SP,APG*SP,MPG*SP,FGPG*SP
0,Álex Abrines,0.1,3,174,5.3,1.4,0.5,0.387,16.0,1.8,0,0,15.9,4.2,1.5,48.0,5.4
1,Jaylen Adams,-0.3,2,41,2.7,1.5,1.6,0.331,10.9,1.0,0,0,5.4,3.0,3.2,21.8,2.0
2,LaMarcus Aldridge,33.4,16,1076,19.1,8.1,1.9,0.493,33.7,7.7,0,0,305.6,129.6,30.4,539.2,123.2
3,Shandon Anderson,3.0,10,719,7.4,3.1,1.4,0.457,22.2,2.8,1,0,74.0,31.0,14.0,222.0,28.0
4,Carmelo Anthony,36.7,19,1260,22.5,6.2,2.7,0.447,34.5,8.0,0,0,427.5,117.8,51.3,655.5,152.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,Trevor Winter,0.0,1,1,0.0,3.0,0.0,0.000,5.0,0.0,0,0,0.0,3.0,0.0,5.0,0.0
195,Luther Wright,-0.2,1,15,1.3,0.7,0.0,0.348,6.1,0.5,0,0,1.3,0.7,0.0,6.1,0.5
196,Danny Young,6.6,10,574,4.6,1.2,2.9,0.437,16.6,1.7,0,0,46.0,12.0,29.0,166.0,17.0
197,Michael Young,0.2,3,49,4.6,1.8,0.5,0.465,9.6,1.9,0,0,13.8,5.4,1.5,28.8,5.7


In [68]:
vorp_df.isnull().sum()

Player           0
VORP             0
SP               0
G                0
PPG              0
RPG              0
APG              0
FG%              0
MPG              0
FGPG             0
Championships    0
HOF?             0
PPG*SP           0
RPG*SP           0
APG*SP           0
MPG*SP           0
FGPG*SP          0
GPS              0
dtype: int64

In [92]:
vorp_df.to_csv("vorp_full_dataset")

In [93]:
vorp_df_final = vorp_df.drop(columns = ["SP", "G", "PPG", "RPG", "APG",
                                       "FG%", "MPG", "FGPG", "Championships"])
vorp_df_final

Unnamed: 0,Player,VORP,HOF?,PPG*SP,RPG*SP,APG*SP,MPG*SP,FGPG*SP,GPS
0,Álex Abrines,0.1,0,15.9,4.2,1.5,48.0,5.4,58.000000
1,Jaylen Adams,-0.3,0,5.4,3.0,3.2,21.8,2.0,20.500000
2,LaMarcus Aldridge,33.4,0,305.6,129.6,30.4,539.2,123.2,67.250000
3,Shandon Anderson,3.0,0,74.0,31.0,14.0,222.0,28.0,71.900000
4,Carmelo Anthony,36.7,0,427.5,117.8,51.3,655.5,152.0,66.315789
...,...,...,...,...,...,...,...,...,...
194,Trevor Winter,0.0,0,0.0,3.0,0.0,5.0,0.0,1.000000
195,Luther Wright,-0.2,0,1.3,0.7,0.0,6.1,0.5,15.000000
196,Danny Young,6.6,0,46.0,12.0,29.0,166.0,17.0,57.400000
197,Michael Young,0.2,0,13.8,5.4,1.5,28.8,5.7,16.333333


In [94]:
vorp_df_final.to_csv("vorp_dataset_final.csv")

In [None]:
#test