<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Model-Creation" data-toc-modified-id="Model-Creation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Model Creation</a></span><ul class="toc-item"><li><span><a href="#DataFrame-setup" data-toc-modified-id="DataFrame-setup-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>DataFrame setup</a></span></li><li><span><a href="#Linear-Models-using-Standard-Scaler" data-toc-modified-id="Linear-Models-using-Standard-Scaler-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Linear Models using Standard Scaler</a></span><ul class="toc-item"><li><span><a href="#Train-Test-Split-the-data" data-toc-modified-id="Train-Test-Split-the-data-1.2.1"><span class="toc-item-num">1.2.1&nbsp;&nbsp;</span>Train Test Split the data</a></span></li><li><span><a href="#First-Try" data-toc-modified-id="First-Try-1.2.2"><span class="toc-item-num">1.2.2&nbsp;&nbsp;</span>First Try</a></span></li><li><span><a href="#Second-Try" data-toc-modified-id="Second-Try-1.2.3"><span class="toc-item-num">1.2.3&nbsp;&nbsp;</span>Second Try</a></span></li></ul></li><li><span><a href="#Linear-Models-using-Min-Max-Scaler" data-toc-modified-id="Linear-Models-using-Min-Max-Scaler-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Linear Models using Min Max Scaler</a></span><ul class="toc-item"><li><span><a href="#Train-Test-Split-the-data" data-toc-modified-id="Train-Test-Split-the-data-1.3.1"><span class="toc-item-num">1.3.1&nbsp;&nbsp;</span>Train Test Split the data</a></span></li><li><span><a href="#First-try" data-toc-modified-id="First-try-1.3.2"><span class="toc-item-num">1.3.2&nbsp;&nbsp;</span>First try</a></span></li></ul></li></ul></li><li><span><a href="#Map-predictions-to-Data" data-toc-modified-id="Map-predictions-to-Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Map predictions to Data</a></span></li><li><span><a href="#Predicting-with-unseen-data" data-toc-modified-id="Predicting-with-unseen-data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Predicting with unseen data</a></span></li></ul></div>

In [1]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_squared_log_error

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns',100)

# Model Creation

## DataFrame setup

In [54]:
# DataFrame
data = pd.read_pickle('final_df.pickle')

dupes = data[data.duplicated(['pid','age'],keep=False)].sort_values(by=['player','g'],ascending=False)

data = data.drop(dupes.index)

data = pd.concat([data, dupes[dupes.tm == 'TOT']])

data = data.rename(columns={'3p':'fg3',
                   '3pa':'fg3a',
                   '2p':'fg2',
                   '2pa':'fg2a',
                   '3p_pct':'fg3_pct',
                   '2p_pct':'fg2_pct'})

In [62]:
upto_2019 = data[(data.to == 2019)]

In [64]:
newdf = pd.DataFrame()
for player in list(set(data.player)):
    adf = upto_2019[upto_2019.player == player].sort_values(by='season', ascending=False)
    adf.ws = adf.ws.shift(1)
    newdf = pd.concat([newdf,adf])

In [66]:
newdf = newdf.sort_values(by=['player','season']).reset_index().drop(['index','rk'],axis=1)

In [67]:
newdf = newdf.drop(newdf[newdf.duplicated(['pid','age'], keep=False)].index)

In [68]:
train = newdf[newdf.season != 2019]

In [69]:
train = pd.get_dummies(train, columns=['pos'])

In [72]:
features = ['age','g','mp','fg3','fg3a','fg2','fg2a','ft','fta', 
            'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'pos_C', 
            'pos_C-PF', 'pos_PF','pos_PF-C', 'pos_PF-SF', 'pos_PG', 
            'pos_PG-SG', 'pos_SF','pos_SF-PF', 'pos_SF-SG', 
            'pos_SG', 'pos_SG-PF', 'pos_SG-PG','pos_SG-SF']

## Linear Models using Standard Scaler

### Train Test Split the data

In [73]:
X = train
y = train.ws
# this is data to use for train and test
# take random 80% of data to train, 20% to test (train test split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scale = StandardScaler()
X_train[features] = scale.fit_transform(X_train[features])
X_tr_feats = X_train[features]

X_test[features] = scale.fit_transform(X_test[features])
X_ts_feats = X_test[features]

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


### First Try

In [119]:
# Build a Ridge, Lasso and regular linear regression model. 
# Note that in scikit learn, the regularization parameter is denoted by alpha (and not lambda)
ridge1 = Ridge(alpha=0.5)
ridge1.fit(X_tr_feats, y_train)

lasso1 = Lasso(alpha=0.5)
lasso1.fit(X_tr_feats, y_train)

lin1 = LinearRegression()
lin1.fit(X_tr_feats, y_train)

pass

In [120]:
# Create preditions for training and test sets 
y_h_ridge_train1 = ridge1.predict(X_tr_feats)
y_h_ridge_test1 = ridge1.predict(X_ts_feats)

y_h_lasso_train1 = lasso1.predict(X_tr_feats)
y_h_lasso_test1 = lasso1.predict(X_ts_feats)

y_h_lin_train1 = lin1.predict(X_tr_feats)
y_h_lin_test1 = lin1.predict(X_ts_feats)

In [121]:
# Print errors 
print('Train Error Ridge Model', np.sum((y_train - y_h_ridge_train1)**2))
print('Test Error Ridge Model', np.sum((y_test - y_h_ridge_test1)**2))
print('\n')

print('Train Error Lasso Model', np.sum((y_train - y_h_lasso_train1)**2))
print('Test Error Lasso Model', np.sum((y_test - y_h_lasso_test1)**2))
print('\n')

print('Train Error Unpenalized Linear Model', np.sum((y_train - lin1.predict(X_tr_feats))**2))
print('Test Error Unpenalized Linear Model', np.sum((y_test - lin1.predict(X_ts_feats))**2))

Train Error Ridge Model 9001.065980138514
Test Error Ridge Model 2399.173962218293


Train Error Lasso Model 11267.7510160227
Test Error Lasso Model 3042.367921035061


Train Error Unpenalized Linear Model 9000.694249405391
Test Error Unpenalized Linear Model 2396.847085083272


In [122]:
# First Ridge regression 
print('Training r^2:', ridge1.score(X_tr_feats, y_train))
print('Testing r^2:', ridge1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, ridge1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, ridge1.predict(X_ts_feats)))

Training r^2: 0.50756362739767
Testing r^2: 0.5208899615567733
Training MSE: 5.7004851045842395
Testing MSE: 6.07385813219821


In [123]:
# First Lasso regression 
print('Training r^2:', lasso1.score(X_tr_feats, y_train))
print('Testing r^2:', lasso1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, lasso1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, lasso1.predict(X_ts_feats)))

Training r^2: 0.38355629766964006
Testing r^2: 0.3924454689155539
Training MSE: 7.136004443332932
Testing MSE: 7.702197268443192


In [124]:
# First Linear regression 
print('Training r^2:', lin1.score(X_tr_feats, y_train))
print('Testing r^2:', lin1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, lin1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, lin1.predict(X_ts_feats)))

Training r^2: 0.5075839642926787
Testing r^2: 0.5213546340695502
Training MSE: 5.700249682967315
Testing MSE: 6.067967304008283


In [125]:
# Print coefficients 
print('Ridge parameter coefficients: \n', ridge1.coef_)
print('Lasso parameter coefficients: \n', lasso1.coef_)
print('Linear model parameter coefficients: \n', lin1.coef_)

Ridge parameter coefficients: 
 [-0.26832533 -0.42007364  0.10886989  1.61370009 -2.19641436  1.69628518
 -2.91060182  1.76848838 -0.60382563  0.84208932  1.07231727  0.51932755
  0.52806195 -1.14134579 -0.48948398  2.08827703  0.05438357  0.
  0.09254524  0.0289858  -0.00506737 -0.05214548 -0.08900319 -0.00511677
  0.02215804  0.0142629  -0.08017964  0.02401406  0.         -0.03491177]
Lasso parameter coefficients: 
 [ 0.          0.          0.          0.          0.          0.07999284
  0.          0.98356054  0.28361408  0.43006089  0.          0.
  0.01341094  0.          0.          0.          0.          0.
  0.          0.         -0.          0.         -0.         -0.
  0.         -0.         -0.         -0.          0.         -0.        ]
Linear model parameter coefficients: 
 [-2.70677263e-01 -4.19984978e-01  1.16470772e-01  1.64512363e+00
 -2.25444119e+00  1.75006818e+00 -3.00667241e+00  1.80470727e+00
 -6.52803758e-01  8.39203567e-01  1.07125826e+00  5.19771750e-01
  

In [126]:
# First Ridge coefficients for features
dict(zip(ridge1.coef_,X_tr_feats.columns))

{-0.2683253265771707: 'age',
 -0.4200736358300264: 'g',
 0.10886989243383319: 'mp',
 1.6137000864182653: 'fg3',
 -2.1964143564204734: 'fg3a',
 1.6962851798229959: 'fg2',
 -2.9106018200761232: 'fg2a',
 1.7684883801891167: 'ft',
 -0.6038256296961837: 'fta',
 0.8420893241117723: 'trb',
 1.0723172734581874: 'ast',
 0.5193275458660954: 'stl',
 0.5280619535877584: 'blk',
 -1.1413457942196372: 'tov',
 -0.48948397832450624: 'pf',
 2.08827703029971: 'pts',
 0.054383574003378836: 'pos_C',
 0.0: 'pos_SG-PG',
 0.09254523693390235: 'pos_PF',
 0.02898580302084076: 'pos_PF-C',
 -0.005067368441528095: 'pos_PF-SF',
 -0.05214547959574399: 'pos_PG',
 -0.08900318932524669: 'pos_PG-SG',
 -0.005116771981128474: 'pos_SF',
 0.022158038948987362: 'pos_SF-PF',
 0.014262904414134499: 'pos_SF-SG',
 -0.08017963616618076: 'pos_SG',
 0.024014058668863622: 'pos_SG-PF',
 -0.03491177482807353: 'pos_SG-SF'}

In [128]:
# First Lasso coefficients for features
dict(zip(lasso1.coef_,X_tr_feats.columns))

{0.0: 'pos_SG-SF',
 0.07999283723771598: 'fg2',
 0.9835605423054685: 'ft',
 0.28361407930344645: 'fta',
 0.43006089400700637: 'trb',
 0.013410941640544483: 'blk'}

In [127]:
# First Linear Reg coefficients for features
dict(zip(lin1.coef_,X_tr_feats.columns))

{-0.2706772628606942: 'age',
 -0.41998497778025246: 'g',
 0.11647077208019335: 'mp',
 1.6451236334931414: 'fg3',
 -2.254441188367909: 'fg3a',
 1.750068181041483: 'fg2',
 -3.006672411244105: 'fg2a',
 1.8047072722335686: 'ft',
 -0.6528037575244947: 'fta',
 0.8392035665274975: 'trb',
 1.0712582641229764: 'ast',
 0.5197717498151113: 'stl',
 0.525886306026266: 'blk',
 -1.1332469544371933: 'tov',
 -0.48875336532875163: 'pf',
 2.1427750416761855: 'pts',
 0.051572222151613153: 'pos_C',
 7.771561172376096e-16: 'pos_C-PF',
 0.0925388082633836: 'pos_PF',
 0.028930386035915707: 'pos_PF-C',
 -0.004866681621763081: 'pos_PF-SF',
 -0.05175936230503867: 'pos_PG',
 -0.08870943912268561: 'pos_PG-SG',
 -0.004389064896931874: 'pos_SF',
 0.02225641571398076: 'pos_SF-PF',
 0.014894503930919446: 'pos_SF-SG',
 -0.07863817363577011: 'pos_SG',
 0.02376829849560195: 'pos_SG-PF',
 0.0: 'pos_SG-PG',
 -0.0349693446695098: 'pos_SG-SF'}

### Second Try

In [129]:
# Build a Ridge, Lasso and regular linear regression model. 
# Note that in scikit learn, the regularization parameter is denoted by alpha (and not lambda)
ridge1 = Ridge(alpha=1)
ridge1.fit(X_tr_feats, y_train)

lasso1 = Lasso(alpha=1)
lasso1.fit(X_tr_feats, y_train)

lin1 = LinearRegression()
lin1.fit(X_tr_feats, y_train)

pass

In [130]:
# Create preditions for training and test sets 
y_h_ridge_train1 = ridge1.predict(X_tr_feats)
y_h_ridge_test1 = ridge1.predict(X_ts_feats)

y_h_lasso_train1 = lasso1.predict(X_tr_feats)
y_h_lasso_test1 = lasso1.predict(X_ts_feats)

y_h_lin_train1 = lin1.predict(X_tr_feats)
y_h_lin_test1 = lin1.predict(X_ts_feats)

In [131]:
# Print errors 
print('Train Error Ridge Model', np.sum((y_train - y_h_ridge_train1)**2))
print('Test Error Ridge Model', np.sum((y_test - y_h_ridge_test1)**2))
print('\n')

print('Train Error Lasso Model', np.sum((y_train - y_h_lasso_train1)**2))
print('Test Error Lasso Model', np.sum((y_test - y_h_lasso_test1)**2))
print('\n')

print('Train Error Unpenalized Linear Model', np.sum((y_train - lin1.predict(X_tr_feats))**2))
print('Test Error Unpenalized Linear Model', np.sum((y_test - lin1.predict(X_ts_feats))**2))

Train Error Ridge Model 9002.09509458539
Test Error Ridge Model 2401.549448834042


Train Error Lasso Model 12869.646850924983
Test Error Lasso Model 3483.646098572196


Train Error Unpenalized Linear Model 9000.694249405391
Test Error Unpenalized Linear Model 2396.847085083272


In [132]:
# Second Ridge regression 
print('Training r^2:', ridge1.score(X_tr_feats, y_train))
print('Testing r^2:', ridge1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, ridge1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, ridge1.predict(X_ts_feats)))

Training r^2: 0.5075073259122316
Testing r^2: 0.5204155818320366
Training MSE: 5.701136855342235
Testing MSE: 6.079872022364663


In [133]:
# Second Lasso regression 
print('Training r^2:', lasso1.score(X_tr_feats, y_train))
print('Testing r^2:', lasso1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, lasso1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, lasso1.predict(X_ts_feats)))

Training r^2: 0.29591870274847476
Testing r^2: 0.30432313684068735
Training MSE: 8.150504655430641
Testing MSE: 8.81935721157518


In [134]:
# Second Linear regression 
print('Training r^2:', lin1.score(X_tr_feats, y_train))
print('Testing r^2:', lin1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, lin1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, lin1.predict(X_ts_feats)))

Training r^2: 0.5075839642926787
Testing r^2: 0.5213546340695502
Training MSE: 5.700249682967315
Testing MSE: 6.067967304008283


In [135]:
# Print coefficients 
print('Ridge parameter coefficients: \n', ridge1.coef_)
print('Lasso parameter coefficients: \n', lasso1.coef_)
print('Linear model parameter coefficients: \n', lin1.coef_)

Ridge parameter coefficients: 
 [-0.26607689 -0.42020387  0.10190495  1.5830286  -2.14081923  1.64629046
 -2.82098463  1.73487269 -0.55838898  0.84468377  1.07296594  0.51890064
  0.53007622 -1.14849106 -0.49017317  2.03712454  0.0570388   0.
  0.09255629  0.02903851 -0.00526151 -0.05248522 -0.08927696 -0.00582525
  0.02206592  0.01367058 -0.08164449  0.02424029  0.         -0.03485527]
Lasso parameter coefficients: 
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  8.56383961e-02  0.00000000e+00  7.00954345e-04
  1.00769301e+00  1.87963434e-02  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00 -0.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  0.00000000e+00 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
  0.00000000e+00 -0.00000000e+00]
Linear model parameter coefficients: 
 [-2.70677263e-01 -4.19984978e-01  1.16470772e-01  

In [136]:
# Second Ridge coefficients for features 
dict(zip(ridge1.coef_,X_tr_feats.columns))

{-0.26607689195766443: 'age',
 -0.4202038698412985: 'g',
 0.10190495345588926: 'mp',
 1.5830285991186488: 'fg3',
 -2.1408192278752622: 'fg3a',
 1.646290458954069: 'fg2',
 -2.8209846287815554: 'fg2a',
 1.7348726872287548: 'ft',
 -0.558388978471235: 'fta',
 0.8446837729396091: 'trb',
 1.072965941888443: 'ast',
 0.5189006366747075: 'stl',
 0.5300762245257017: 'blk',
 -1.1484910559230235: 'tov',
 -0.49017316613633705: 'pf',
 2.0371245374230504: 'pts',
 0.057038796268298025: 'pos_C',
 0.0: 'pos_SG-PG',
 0.09255629343042343: 'pos_PF',
 0.029038513167840542: 'pos_PF-C',
 -0.0052615104969335105: 'pos_PF-SF',
 -0.052485224442124064: 'pos_PG',
 -0.08927695643323695: 'pos_PG-SG',
 -0.005825254814467825: 'pos_SF',
 0.02206591657755437: 'pos_SF-PF',
 0.013670580490726517: 'pos_SF-SG',
 -0.08164448783197585: 'pos_SG',
 0.024240290188495092: 'pos_SG-PF',
 -0.03485527324802235: 'pos_SG-SF'}

In [137]:
# Second Lasso coefficients for features 
dict(zip(lasso1.coef_,X_tr_feats.columns))

{0.0: 'pos_SG-SF',
 0.08563839611017023: 'fg2',
 0.0007009543446779006: 'ft',
 1.00769301057652: 'fta',
 0.018796343420461936: 'trb'}

## Linear Models using Min Max Scaler

### Train Test Split the data

In [164]:
X2 = train
y2 = train.ws
# this is data to use for train and test
# take random 80% of data to train, 20% to test (train test split)
X2_train, X2_test, y2_train, y2_test = train_test_split(X, y, test_size=0.2, random_state=42)

scale = MinMaxScaler()
X2_train[features] = scale.fit_transform(X2_train[features])
X2_tr_feats = X2_train[features]

X2_test[features] = scale.fit_transform(X2_test[features])
X2_ts_feats = X2_test[features]

  return self.partial_fit(X, y)
  return self.partial_fit(X, y)


### First try

In [165]:
# Build a Ridge, Lasso and regular linear regression model. 
# Note that in scikit learn, the regularization parameter is denoted by alpha (and not lambda)
ridge2 = Ridge(alpha=0.5)
ridge2.fit(X2_tr_feats, y2_train)

lasso2 = Lasso(alpha=0.5)
lasso2.fit(X2_tr_feats, y2_train)

lin2 = LinearRegression()
lin2.fit(X2_tr_feats, y2_train)

pass

In [166]:
# Create preditions for training and test sets 
y_h_ridge_train2 = ridge2.predict(X2_tr_feats)
y_h_ridge_test2 = ridge2.predict(X2_ts_feats)

y_h_lasso_train2 = lasso2.predict(X2_tr_feats)
y_h_lasso_test2 = lasso2.predict(X2_ts_feats)

y_h_lin_train2 = lin2.predict(X2_tr_feats)
y_h_lin_test2 = lin2.predict(X2_ts_feats)

In [167]:
# Print errors 
print('Train Error Ridge Model', np.sum((y2_train - y_h_ridge_train2)**2))
print('Test Error Ridge Model', np.sum((y2_test - y_h_ridge_test2)**2))
print('\n')

print('Train Error Lasso Model', np.sum((y2_train - y_h_lasso_train2)**2))
print('Test Error Lasso Model', np.sum((y2_test - y_h_lasso_test2)**2))
print('\n')

print('Train Error Unpenalized Linear Model', np.sum((y2_train - lin2.predict(X2_tr_feats))**2))
print('Test Error Unpenalized Linear Model', np.sum((y2_test - lin2.predict(X2_ts_feats))**2))

Train Error Ridge Model 9101.760624824541
Test Error Ridge Model 2398.1614257735564


Train Error Lasso Model 18278.637568081067
Test Error Lasso Model 5028.342197978454


Train Error Unpenalized Linear Model 9000.694249405391
Test Error Unpenalized Linear Model 2391.076528416159


In [168]:
# Ridge regression 
print('Training r^2:', ridge2.score(X2_tr_feats, y2_train))
print('Testing r^2:', ridge2.score(X2_ts_feats, y2_test))
print('Training MSE:', mean_squared_error(y2_train, ridge2.predict(X2_tr_feats)))
print('Testing MSE:', mean_squared_error(y2_test, ridge2.predict(X2_ts_feats)))

Training r^2: 0.5020547570395278
Testing r^2: 0.5210921629738454
Training MSE: 5.764256253847081
Testing MSE: 6.071294748793814


In [169]:
# Lasso regression 
print('Training r^2:', lasso2.score(X2_tr_feats, y2_train))
print('Testing r^2:', lasso2.score(X2_ts_feats, y2_test))
print('Training MSE:', mean_squared_error(y2_train, ridge2.predict(X2_tr_feats)))
print('Testing MSE:', mean_squared_error(y2_test, ridge2.predict(X2_ts_feats)))

Training r^2: 0.0
Testing r^2: -0.004149453819371374
Training MSE: 5.764256253847081
Testing MSE: 6.071294748793814


In [170]:
# Linear regression 
print('Training r^2:', lin2.score(X2_tr_feats, y2_train))
print('Testing r^2:', lin2.score(X2_ts_feats, y2_test))
print('Training MSE:', mean_squared_error(y2_train, ridge2.predict(X2_tr_feats)))
print('Testing MSE:', mean_squared_error(y2_test, ridge2.predict(X2_ts_feats)))

Training r^2: 0.5075839642926787
Testing r^2: 0.5225070022054827
Training MSE: 5.764256253847081
Testing MSE: 6.071294748793814


In [171]:
# Print coefficients 
print('Ridge parameter coefficients: \n', ridge2.coef_)
print('Lasso parameter coefficients: \n', lasso2.coef_)
print('Linear model parameter coefficients: \n', lin2.coef_)

Ridge parameter coefficients: 
 [-1.41948688 -1.87008258  0.28093173  5.45273183 -6.22540896  5.1689646
 -8.46403768  7.45046013  0.37493189  4.87972236  5.8032237   3.04609323
  3.95302073 -7.37183548 -2.63212135  7.76220448  0.2991302   0.
  0.29013336  0.71493624 -0.13000473 -0.09339123 -1.36889032 -0.01310046
  0.51426202  0.14871016 -0.22746248  0.73183434  0.         -0.86615709]
Lasso parameter coefficients: 
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0. -0. -0. -0. -0. -0. -0. -0. -0. -0.  0. -0.]
Linear model parameter coefficients: 
 [-1.73670118e+00 -1.78404669e+00  4.93126469e-01  1.09340875e+01
 -1.34628791e+01  7.55674193e+00 -1.40423801e+01  1.08234126e+01
 -3.91335615e+00  4.93164273e+00  6.32154390e+00  3.15873992e+00
  3.90229195e+00 -7.55541208e+00 -2.57384337e+00  1.27057224e+01
  1.48591733e-01 -1.11022302e-15  2.49891767e-01  8.32002179e-01
 -7.82158192e-02 -1.12674476e-01 -1.42322343e+00  7.42574072e-03
  6.44357802e-01  3.1490392

In [172]:
# First Ridge coefficients for features using MinMaxScaler
dict(zip(ridge2.coef_,X_tr_feats.columns))

{-1.4194868844265236: 'age',
 -1.870082581080154: 'g',
 0.28093173054441345: 'mp',
 5.452731825124206: 'fg3',
 -6.225408958781497: 'fg3a',
 5.168964604462456: 'fg2',
 -8.464037678708527: 'fg2a',
 7.450460130445268: 'ft',
 0.37493188859838: 'fta',
 4.879722363399208: 'trb',
 5.803223701515055: 'ast',
 3.046093228508463: 'stl',
 3.953020731242767: 'blk',
 -7.3718354767893715: 'tov',
 -2.6321213462284807: 'pf',
 7.762204478886355: 'pts',
 0.2991302029239762: 'pos_C',
 0.0: 'pos_SG-PG',
 0.2901333576140042: 'pos_PF',
 0.7149362356919373: 'pos_PF-C',
 -0.13000473256820552: 'pos_PF-SF',
 -0.09339123465949785: 'pos_PG',
 -1.368890320380488: 'pos_PG-SG',
 -0.013100460451681851: 'pos_SF',
 0.5142620211845025: 'pos_SF-PF',
 0.14871015978321875: 'pos_SF-SG',
 -0.2274624752038193: 'pos_SG',
 0.7318343395000199: 'pos_SG-PF',
 -0.8661570934337501: 'pos_SG-SF'}

In [173]:
# First Lasso coefficients for features using MinMaxScaler
dict(zip(lasso2.coef_,X_tr_feats.columns))

{0.0: 'pos_SG-SF'}

In [174]:
# First Linear Reg coefficients for features using MinMaxScaler
dict(zip(lin2.coef_,X_tr_feats.columns))

{-1.7367011786220228: 'age',
 -1.7840466891275826: 'g',
 0.4931264687714929: 'mp',
 10.934087520307457: 'fg3',
 -13.462879134815395: 'fg3a',
 7.556741933089347: 'fg2',
 -14.042380072432033: 'fg2a',
 10.823412552162623: 'ft',
 -3.913356153730349: 'fta',
 4.931642733393885: 'trb',
 6.321543898359589: 'ast',
 3.158739919491063: 'stl',
 3.9022919517302306: 'blk',
 -7.555412082341085: 'tov',
 -2.573843370139351: 'pf',
 12.7057223603621: 'pts',
 0.14859173265155656: 'pos_C',
 -1.1102230246251565e-15: 'pos_C-PF',
 0.24989176687846365: 'pos_PF',
 0.8320021794605029: 'pos_PF-C',
 -0.07821581916000325: 'pos_PF-SF',
 -0.11267447610978287: 'pos_PG',
 -1.4232234339905077: 'pos_PG-SG',
 0.0074257407233129236: 'pos_SF',
 0.6443578016678416: 'pos_SF-PF',
 0.3149039203659649: 'pos_SF-SG',
 -0.17502586002751275: 'pos_SG',
 0.9633710268631228: 'pos_SG-PF',
 0.0: 'pos_SG-PG',
 -1.371404579322061: 'pos_SG-SF'}

# Map predictions to Data

In [175]:
X_train['lasso_pred'] = y_h_lasso_train1
X_train['ridge_pred'] = y_h_ridge_train1
X_train['lin_pred'] = y_h_lin_train1

In [176]:
X_test['lasso_pred'] = y_h_lasso_test1
X_test['ridge_pred'] = y_h_ridge_test1
X_test['lin_pred'] = y_h_lin_test1

In [177]:
preds_df = pd.concat([X_train,X_test])

In [203]:
# Display actual next-season win share with all predictions 
train.merge(preds_df[['pid','lasso_pred','ridge_pred','lin_pred','season']],on=['pid','season'])[['player','season','ws','lasso_pred','ridge_pred','lin_pred']]

Unnamed: 0,player,season,ws,lasso_pred,ridge_pred,lin_pred
0,Aaron Gordon,2015,5.4,3.405775,2.953659,2.959901
1,Aaron Gordon,2016,3.7,4.388927,5.622312,5.605683
2,Aaron Gordon,2017,2.9,4.583317,4.501880,4.472165
3,Aaron Gordon,2018,5.1,4.610666,5.430652,5.377793
4,Abdel Nader,2018,0.9,3.223253,1.483456,1.473314
5,Al Horford,2008,6.8,4.593711,5.309519,5.315449
6,Al Horford,2009,10.9,4.521720,6.723150,6.735525
7,Al Horford,2010,10.1,5.049290,8.304471,8.355763
8,Al Horford,2011,1.3,4.525044,8.010841,8.066097
9,Al Horford,2012,8.8,3.180559,3.266717,3.264470


# Predicting with unseen data

In [179]:
# this is the data to predict on (NOT TRAIN OR TEST) because we cannot validate
df_last = newdf[newdf.season == 2019]

In [180]:
df_last = pd.get_dummies(df_last, columns=['pos'])

In [195]:
dflfeats = ['age', 'g', 'mp', 'fg3', 'fg3a', 'fg2', 'fg2a', 
            'ft', 'fta', 'trb', 'ast', 'stl', 'blk', 'tov', 
            'pf', 'pts', 'pos_C', 'pos_C-PF', 'pos_PF', 'pos_PF-C', 
            'pos_PF-SF', 'pos_PG', 'pos_SF', 'pos_SF-SG', 'pos_SG', 
            'pos_SG-PF','pos_SF-PF', 'pos_SG-PG', 'pos_SG-SF', 'pos_PG-SG']

In [196]:
for col in ['pos_SF-PF', 'pos_SG-PG', 'pos_SG-SF', 'pos_PG-SG', 'pos_PG-SF']:
    df_last[col] = 0

In [197]:
scale = StandardScaler()
df_last[dflfeats] = scale.fit_transform(df_last[dflfeats])

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [198]:
df_last['ridge_pred'] = ridge1.predict(df_last[dflfeats])

In [199]:
df_last[['pid','season','ridge_pred']]

Unnamed: 0,pid,season,ridge_pred
4,gordoaa01,2019,6.535598
5,holidaa01,2019,2.581106
7,naderab01,2019,1.855462
19,horfoal01,2019,7.153937
28,aminual01,2019,6.353019
36,burksal01,2019,4.000629
42,lenal01,2019,4.664130
43,johnsal02,2019,2.926138
49,crabbal01,2019,2.514757
63,johnsam01,2019,2.097928


In [200]:
base_df = newdf[newdf.season == newdf.to]

In [201]:
predlast_df = base_df.merge(df_last[['pid','season','ridge_pred']], on=['pid','season'])

In [202]:
predlast_df[predlast_df.season==2019][['player', 'age', 'season', 'g', 'mp', 'fg3', 'fg3a', 'fg2', 'fg2a', 
                                       'ft', 'fta', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'ws', 
                                       'ridge_pred']]

Unnamed: 0,player,age,season,g,mp,fg3,fg3a,fg2,fg2a,ft,fta,trb,ast,stl,blk,tov,pf,pts,ws,ridge_pred
0,Aaron Gordon,23,2019,78,2633,121,347,349,699,185,253,574,289,57,56,162,172,1246,,6.535598
1,Aaron Holiday,22,2019,50,646,43,127,62,135,41,50,67,87,21,13,40,71,294,,2.581106
2,Abdel Nader,25,2019,61,694,32,100,59,115,27,36,116,20,20,12,26,68,241,,1.855462
3,Al Horford,32,2019,68,1973,73,203,314,520,78,95,458,283,59,86,102,126,925,,7.153937
4,Al-Farouq Aminu,28,2019,81,2292,96,280,161,313,150,173,610,104,68,33,72,143,760,,6.353019
5,Alec Burks,27,2019,64,1375,61,168,131,306,116,141,235,128,39,21,65,91,561,,4.000629
6,Alex Len,25,2019,77,1544,74,204,246,444,140,216,424,86,27,69,97,200,854,,4.664130
7,Alize Johnson,22,2019,14,64,1,2,3,14,4,8,19,1,1,3,0,7,13,,2.926138
8,Allen Crabbe,26,2019,43,1133,98,259,39,114,41,56,148,46,23,13,46,102,413,,2.514757
9,Amir Johnson,31,2019,51,529,12,40,67,117,31,41,147,60,16,13,45,99,201,,2.097928
