<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Model-Creation" data-toc-modified-id="Model-Creation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Model Creation</a></span><ul class="toc-item"><li><span><a href="#DataFrame-setup" data-toc-modified-id="DataFrame-setup-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>DataFrame setup</a></span></li><li><span><a href="#Linear-Models-using-Standard-Scaler" data-toc-modified-id="Linear-Models-using-Standard-Scaler-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Linear Models using Standard Scaler</a></span><ul class="toc-item"><li><span><a href="#Train-Test-Split-the-data" data-toc-modified-id="Train-Test-Split-the-data-1.2.1"><span class="toc-item-num">1.2.1&nbsp;&nbsp;</span>Train Test Split the data</a></span></li><li><span><a href="#First-Try" data-toc-modified-id="First-Try-1.2.2"><span class="toc-item-num">1.2.2&nbsp;&nbsp;</span>First Try</a></span></li><li><span><a href="#Second-Try" data-toc-modified-id="Second-Try-1.2.3"><span class="toc-item-num">1.2.3&nbsp;&nbsp;</span>Second Try</a></span></li></ul></li><li><span><a href="#Linear-Models-using-Min-Max-Scaler" data-toc-modified-id="Linear-Models-using-Min-Max-Scaler-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Linear Models using Min Max Scaler</a></span></li></ul></li><li><span><a href="#Map-predictions-to-Data" data-toc-modified-id="Map-predictions-to-Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Map predictions to Data</a></span></li><li><span><a href="#Predicting-with-unseen-data" data-toc-modified-id="Predicting-with-unseen-data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Predicting with unseen data</a></span></li></ul></div>

In [242]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_squared_log_error

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns',100)

# Model Creation

## DataFrame setup

In [229]:
# DataFrame
data = pd.read_pickle('final_df.pickle')

dupes = data[data.duplicated(['pid','age'],keep=False)].sort_values(by=['player','g'],ascending=False)

data = data.drop(dupes.index)

data = pd.concat([data, dupes[dupes.tm == 'TOT']])

data = data.rename(columns={'3p':'fg3',
                   '3pa':'fg3a',
                   '2p':'fg2',
                   '2pa':'fg2a',
                   '3p_pct':'fg3_pct',
                   '2p_pct':'fg2_pct'})

In [230]:
newdf = pd.DataFrame()
for player in list(set(data.player)):
    adf = data[data.player == player].sort_values(by='season', ascending=False)
    adf.ws = adf.ws.shift(1)
    newdf = pd.concat([newdf,adf])

In [231]:
newdf = newdf.sort_values(by=['player','season']).reset_index().drop(['index','rk'],axis=1)

In [232]:
newdf = newdf.drop(newdf[newdf.duplicated(['pid','age'], keep=False)].index)

In [233]:
train = newdf[newdf.season != newdf.to]

In [234]:
train = pd.get_dummies(train, columns=['pos'])

In [235]:
features = ['age','g','mp','fg3','fg3a','fg2','fg2a','ft','fta', 
            'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'pos_C', 
            'pos_C-PF', 'pos_PF','pos_PF-C', 'pos_PF-SF', 'pos_PG', 
            'pos_PG-SF', 'pos_PG-SG', 'pos_SF','pos_SF-PF', 'pos_SF-SG', 
            'pos_SG', 'pos_SG-PF', 'pos_SG-PG','pos_SG-SF']

## Linear Models using Standard Scaler

### Train Test Split the data

In [236]:
X = train
y = train.ws
# this is data to use for train and test
# take random 80% of data to train, 20% to test (train test split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scale = StandardScaler()
X_train[features] = scale.fit_transform(X_train[features])
X_tr_feats = X_train[features]

X_test[features] = scale.fit_transform(X_test[features])
X_ts_feats = X_test[features]

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to

### First Try

In [194]:
# Build a Ridge, Lasso and regular linear regression model. 
# Note that in scikit learn, the regularization parameter is denoted by alpha (and not lambda)
ridge1 = Ridge(alpha=0.5)
ridge1.fit(X_tr_feats, y_train)

lasso1 = Lasso(alpha=0.5)
lasso1.fit(X_tr_feats, y_train)

lin1 = LinearRegression()
lin1.fit(X_tr_feats, y_train)

pass

In [195]:
# Create preditions for training and test sets 
y_h_ridge_train1 = ridge1.predict(X_tr_feats)
y_h_ridge_test1 = ridge1.predict(X_ts_feats)

y_h_lasso_train1 = lasso1.predict(X_tr_feats)
y_h_lasso_test1 = lasso1.predict(X_ts_feats)

y_h_lin_train1 = lin1.predict(X_tr_feats)
y_h_lin_test1 = lin1.predict(X_ts_feats)

In [196]:
# Print errors 
print('Train Error Ridge Model', np.sum((y_train - y_h_ridge_train1)**2))
print('Test Error Ridge Model', np.sum((y_test - y_h_ridge_test1)**2))
print('\n')

print('Train Error Lasso Model', np.sum((y_train - y_h_lasso_train1)**2))
print('Test Error Lasso Model', np.sum((y_test - y_h_lasso_test1)**2))
print('\n')

print('Train Error Unpenalized Linear Model', np.sum((y_train - lin1.predict(X_tr_feats))**2))
print('Test Error Unpenalized Linear Model', np.sum((y_test - lin1.predict(X_ts_feats))**2))

Train Error Ridge Model 45497.70050515531
Test Error Ridge Model 10620.059842835311


Train Error Lasso Model 58616.82322574347
Test Error Lasso Model 13123.715280760278


Train Error Unpenalized Linear Model 45497.618207703425
Test Error Unpenalized Linear Model 10619.928634029651


In [197]:
# First Ridge regression 
print('Training r^2:', ridge1.score(X_tr_feats, y_train))
print('Testing r^2:', ridge1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, ridge1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, ridge1.predict(X_ts_feats)))

Training r^2: 0.5532769046904354
Testing r^2: 0.5450722599644962
Training MSE: 4.737369898495972
Testing MSE: 4.423181942038863


In [198]:
# First Lasso regression 
print('Training r^2:', lasso1.score(X_tr_feats, y_train))
print('Testing r^2:', lasso1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, lasso1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, lasso1.predict(X_ts_feats)))

Training r^2: 0.4244656671022168
Testing r^2: 0.4378240591955329
Training MSE: 6.10337601267633
Testing MSE: 5.465937226472419


In [199]:
# First Linear regression 
print('Training r^2:', lin1.score(X_tr_feats, y_train))
print('Testing r^2:', lin1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, lin1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, lin1.predict(X_ts_feats)))

Training r^2: 0.5532777127350627
Testing r^2: 0.5450778805096075
Training MSE: 4.737361329415184
Testing MSE: 4.423127294472991


In [200]:
# Print coefficients 
print('Ridge parameter coefficients: \n', ridge1.coef_)
print('Lasso parameter coefficients: \n', lasso1.coef_)
print('Linear model parameter coefficients: \n', lin1.coef_)

Ridge parameter coefficients: 
 [-3.04658143e-01 -3.02022956e-01 -2.84006333e-01  1.24911648e+00
 -1.55691025e+00  2.41715754e+00 -3.76337431e+00  1.43151534e+00
 -5.52111742e-01  1.14990452e+00  1.15098474e+00  5.74620278e-01
  3.78189800e-01 -1.20799387e+00 -3.10148256e-01  2.37789774e+00
 -6.93251177e-02  6.13073520e-04  2.38947862e-02  2.52916172e-02
  2.17709389e-02 -1.15711726e-02  0.00000000e+00 -2.81791283e-02
  2.37634815e-02 -1.41538984e-02  6.61284669e-03  3.31702031e-02
  3.44578701e-02  1.64838369e-02 -3.09001681e-02]
Lasso parameter coefficients: 
 [-0.          0.          0.          0.          0.          0.
  0.          0.88479212  0.          0.29479977  0.02719438  0.18807644
  0.          0.          0.          0.41868862 -0.         -0.
  0.          0.          0.          0.          0.         -0.
 -0.         -0.         -0.         -0.          0.         -0.
 -0.        ]
Linear model parameter coefficients: 
 [-3.04846257e-01 -3.02013630e-01 -2.83603780e

### Second Try

In [222]:
# Build a Ridge, Lasso and regular linear regression model. 
# Note that in scikit learn, the regularization parameter is denoted by alpha (and not lambda)
ridge1 = Ridge(alpha=2)
ridge1.fit(X_tr_feats, y_train)

lasso1 = Lasso(alpha=2)
lasso1.fit(X_tr_feats, y_train)

lin1 = LinearRegression()
lin1.fit(X_tr_feats, y_train)

pass

In [223]:
# Create preditions for training and test sets 
y_h_ridge_train1 = ridge1.predict(X_tr_feats)
y_h_ridge_test1 = ridge1.predict(X_ts_feats)

y_h_lasso_train1 = lasso1.predict(X_tr_feats)
y_h_lasso_test1 = lasso1.predict(X_ts_feats)

y_h_lin_train1 = lin1.predict(X_tr_feats)
y_h_lin_test1 = lin1.predict(X_ts_feats)

In [224]:
# Print errors 
print('Train Error Ridge Model', np.sum((y_train - y_h_ridge_train1)**2))
print('Test Error Ridge Model', np.sum((y_test - y_h_ridge_test1)**2))
print('\n')

print('Train Error Lasso Model', np.sum((y_train - y_h_lasso_train1)**2))
print('Test Error Lasso Model', np.sum((y_test - y_h_lasso_test1)**2))
print('\n')

print('Train Error Unpenalized Linear Model', np.sum((y_train - lin1.predict(X_tr_feats))**2))
print('Test Error Unpenalized Linear Model', np.sum((y_test - lin1.predict(X_ts_feats))**2))

Train Error Ridge Model 45498.89187782525
Test Error Ridge Model 10620.65870579119


Train Error Lasso Model 98242.7740367927
Test Error Lasso Model 22494.916051594584


Train Error Unpenalized Linear Model 45497.618207703425
Test Error Unpenalized Linear Model 10619.928634029651


In [225]:
# Second Ridge regression 
print('Training r^2:', ridge1.score(X_tr_feats, y_train))
print('Testing r^2:', ridge1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, ridge1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, ridge1.predict(X_ts_feats)))

Training r^2: 0.5532652070951531
Testing r^2: 0.5450466066842754
Training MSE: 4.73749394812841
Testing MSE: 4.423431364344519


In [226]:
# Second Lasso regression 
print('Training r^2:', lasso1.score(X_tr_feats, y_train))
print('Testing r^2:', lasso1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, lasso1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, lasso1.predict(X_ts_feats)))

Training r^2: 0.03539485243784313
Testing r^2: 0.036393252666627185
Training MSE: 10.229360062140016
Testing MSE: 9.368977947353013


In [227]:
# Second Linear regression 
print('Training r^2:', lin1.score(X_tr_feats, y_train))
print('Testing r^2:', lin1.score(X_ts_feats, y_test))
print('Training MSE:', mean_squared_error(y_train, lin1.predict(X_tr_feats)))
print('Testing MSE:', mean_squared_error(y_test, lin1.predict(X_ts_feats)))

Training r^2: 0.5532777127350627
Testing r^2: 0.5450778805096075
Training MSE: 4.737361329415184
Testing MSE: 4.423127294472991


In [228]:
# Print coefficients 
print('Ridge parameter coefficients: \n', ridge1.coef_)
print('Lasso parameter coefficients: \n', lasso1.coef_)
print('Linear model parameter coefficients: \n', lin1.coef_)

Ridge parameter coefficients: 
 [-3.04102156e-01 -3.02059500e-01 -2.85105722e-01  1.24218036e+00
 -1.54037739e+00  2.37713673e+00 -3.69863692e+00  1.41789264e+00
 -5.28900990e-01  1.15016206e+00  1.15151507e+00  5.73604883e-01
  3.78529457e-01 -1.21026469e+00 -3.10624802e-01  2.34455868e+00
 -6.86731674e-02  6.35659372e-04  2.40260010e-02  2.52625799e-02
  2.17156050e-02 -1.18907712e-02  0.00000000e+00 -2.83266199e-02
  2.36299072e-02 -1.41675698e-02  6.45571201e-03  3.28681583e-02
  3.45140609e-02  1.62995684e-02 -3.10086138e-02]
Lasso parameter coefficients: 
 [-0.          0.          0.          0.          0.          0.
  0.          0.09173426  0.          0.          0.          0.
  0.          0.          0.          0.         -0.         -0.
  0.         -0.         -0.          0.          0.         -0.
  0.         -0.         -0.         -0.          0.         -0.
 -0.        ]
Linear model parameter coefficients: 
 [-3.04846257e-01 -3.02013630e-01 -2.83603780e-01  1.2

## Linear Models using Min Max Scaler

In [126]:
X2 = train[features]
y2 = train.ws
# this is data to use for train and test
# take random 80% of data to train, 20% to test (train test split)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)

scale = MinMaxScaler()
transformed1 = scale.fit_transform(X2_train)
X2_train = pd.DataFrame(transformed1, columns = X2_train.columns)

transformed2 = scale.fit_transform(X2_test)
X2_test = pd.DataFrame(transformed2, columns = X2_test.columns)

  return self.partial_fit(X, y)
  return self.partial_fit(X, y)


In [127]:
# Build a Ridge, Lasso and regular linear regression model. 
# Note that in scikit learn, the regularization parameter is denoted by alpha (and not lambda)
ridge2 = Ridge(alpha=0.5)
ridge2.fit(X2_train, y2_train)

lasso2 = Lasso(alpha=0.5)
lasso2.fit(X2_train, y2_train)

lin2 = LinearRegression()
lin2.fit(X2_train, y2_train)

pass

In [128]:
# Create preditions for training and test sets 
y_h_ridge_train2 = ridge2.predict(X2_train)
y_h_ridge_test2 = ridge2.predict(X2_test)

y_h_lasso_train2 = lasso2.predict(X2_train)
y_h_lasso_test2 = lasso2.predict(X2_test)

y_h_lin_train2 = lin2.predict(X2_train)
y_h_lin_test2 = lin2.predict(X2_test)

In [129]:
# Print errors 
print('Train Error Ridge Model', np.sum((y2_train - y_h_ridge_train2)**2))
print('Test Error Ridge Model', np.sum((y2_test - y_h_ridge_test2)**2))
print('\n')

print('Train Error Lasso Model', np.sum((y2_train - y_h_lasso_train2)**2))
print('Test Error Lasso Model', np.sum((y2_test - y_h_lasso_test2)**2))
print('\n')

print('Train Error Unpenalized Linear Model', np.sum((y2_train - lin2.predict(X2_train))**2))
print('Test Error Unpenalized Linear Model', np.sum((y2_test - lin2.predict(X2_test))**2))

Train Error Ridge Model 45595.91341777848
Test Error Ridge Model 11413.067064464383


Train Error Lasso Model 101847.6568211162
Test Error Lasso Model 23360.145143950438


Train Error Unpenalized Linear Model 45497.618207703425
Test Error Unpenalized Linear Model 11879.570240877862


In [130]:
# Ridge regression 
print('Training r^2:', ridge2.score(X2_train, y2_train))
print('Testing r^2:', ridge2.score(X2_test, y2_test))
print('Training MSE:', mean_squared_error(y2_train, ridge2.predict(X2_train)))
print('Testing MSE:', mean_squared_error(y2_test, ridge2.predict(X2_test)))

Training r^2: 0.5523125927397377
Testing r^2: 0.5111024906311405
Training MSE: 4.747596149289721
Testing MSE: 4.7534640001934125


In [131]:
# Lasso regression 
print('Training r^2:', lasso2.score(X2_train, y2_train))
print('Testing r^2:', lasso2.score(X2_test, y2_test))
print('Training MSE:', mean_squared_error(y2_train, lasso2.predict(X2_train)))
print('Testing MSE:', mean_squared_error(y2_test, lasso2.predict(X2_test)))

Training r^2: 0.0
Testing r^2: -0.0006702593496414888
Training MSE: 10.60471228874596
Testing MSE: 9.729339918346705


In [132]:
# Linear regression 
print('Training r^2:', lin2.score(X2_train, y2_train))
print('Testing r^2:', lin2.score(X2_test, y2_test))
print('Training MSE:', mean_squared_error(y2_train, lin2.predict(X2_train)))
print('Testing MSE:', mean_squared_error(y2_test, lin2.predict(X2_test)))

Training r^2: 0.5532777127350627
Testing r^2: 0.4911190593787882
Training MSE: 4.737361329415184
Testing MSE: 4.947759367296069


In [133]:
# Print coefficients 
print('Ridge parameter coefficients: \n', ridge2.coef_)
print('Lasso parameter coefficients: \n', lasso2.coef_)
print('Linear model parameter coefficients: \n', lin2.coef_)

Ridge parameter coefficients: 
 [ -1.81912931  -1.21112345  -1.11711323   8.40152914  -9.04809067
  11.38842882 -19.69570495   8.68059363  -1.97948465   7.37232856
   8.75954793   3.75526801   3.53781633  -8.29474329  -1.59993149
  13.84375075  -0.27228902  -0.09552526  -0.06150769   0.62201791
   0.42788092  -0.1599333    0.          -0.9318184   -0.07185533
  -0.51406899  -0.02006603  -0.05123036   1.82236031   0.28519148
  -0.97915625]
Lasso parameter coefficients: 
 [-0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -0. -0.
  0. -0. -0.  0.  0. -0.  0. -0. -0. -0.  0. -0. -0.]
Linear model parameter coefficients: 
 [-1.87008742e+00 -1.19960213e+00 -1.11715748e+00  1.04350515e+01
 -1.16812336e+01  1.34899456e+01 -2.38488197e+01  9.73743488e+00
 -3.68744643e+00  7.40129979e+00  8.83499118e+00  3.84853205e+00
  3.54098200e+00 -8.21245221e+00 -1.57528142e+00  1.64406831e+01
 -3.41753078e-01 -1.55242390e-01 -1.12500153e-01  6.13937151e-01
  4.22071405e-01 -2.00375979e-01  

# Map predictions to Data

In [243]:
X_train['lasso_pred'] = y_h_lasso_train1
X_train['ridge_pred'] = y_h_ridge_train1
X_train['lin_pred'] = y_h_lin_train1

In [245]:
X_test['lasso_pred'] = y_h_lasso_test1
X_test['ridge_pred'] = y_h_ridge_test1
X_test['lin_pred'] = y_h_lin_test1

In [246]:
preds_df = pd.concat([X_train,X_test])

In [253]:
train.merge(preds_df[['pid','lasso_pred','ridge_pred','lin_pred','season']],on=['pid','season'])

Unnamed: 0,player,age,tm,g,gs,mp,fg,fga,fg_pct,fg3,fg3a,fg3_pct,fg2,fg2a,fg2_pct,efg_pct,ft,fta,ft_pct,orb,drb,trb,ast,stl,blk,tov,pf,pts,pid,per,ts_pct,3par,ftr,orb_pct,drb_pct,trb_pct,ast_pct,stl_pct,blk_pct,tov_pct,usg_pct,ows,dws,ws,ws/48,obpm,dbpm,bpm,vorp,season,year,from,to,pk,experience,ftsy_pts,pos_C,pos_C-PF,pos_PF,pos_PF-C,pos_PF-SF,pos_PG,pos_PG-SF,pos_PG-SG,pos_SF,pos_SF-PF,pos_SF-SG,pos_SG,pos_SG-PF,pos_SG-PG,pos_SG-SF,lasso_pred,ridge_pred,lin_pred
0,A.C. Green,22,LAL,82,1.0,1542,209,388,0.539,1,6,0.167,208,382,0.545,0.540,102,167,0.611,160,221,381,54,49,49,99,229,521,greenac01,11.8,0.564,0.015,0.430,12.4,15.5,14.0,4.2,1.5,1.7,17.7,14.7,1.4,2.0,7.6,0.103,-0.5,0.8,0.3,0.9,1986,1985,1986,2001,23,0,1254.2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.287009,2.903535,2.911407
1,A.C. Green,23,LAL,79,72.0,2240,316,587,0.538,0,5,0.000,316,582,0.543,0.538,220,282,0.780,210,405,615,84,70,80,102,171,852,greenac01,15.7,0.599,0.009,0.480,11.2,18.8,15.3,4.6,1.5,2.0,12.5,14.7,4.3,3.3,7.9,0.163,0.6,1.0,1.7,2.1,1987,1985,1986,2001,23,1,2064.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.378088,6.740016,6.755558
2,A.C. Green,24,LAL,82,64.0,2636,322,640,0.503,0,2,0.000,322,638,0.505,0.503,293,379,0.773,245,465,710,93,87,45,120,204,937,greenac01,14.5,0.581,0.003,0.592,11.1,19.1,15.3,4.5,1.6,1.0,12.9,14.7,4.5,3.4,9.4,0.144,0.2,0.8,1.0,2.0,1988,1985,1986,2001,23,2,2204.5,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.435585,7.004987,7.009978
3,A.C. Green,25,LAL,82,82.0,2510,401,758,0.529,4,17,0.235,397,741,0.536,0.532,282,359,0.786,258,481,739,103,94,55,119,172,1088,greenac01,17.8,0.594,0.022,0.474,12.3,20.0,16.4,5.5,1.8,1.2,11.5,17.0,5.8,3.5,7.7,0.179,1.6,0.6,2.2,2.7,1989,1985,1986,2001,23,3,2457.3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.430262,8.201028,8.216084
4,A.C. Green,26,LAL,82,82.0,2709,385,806,0.478,13,46,0.283,372,760,0.489,0.486,278,370,0.751,262,450,712,90,66,50,116,207,1061,greenac01,14.7,0.548,0.057,0.459,11.5,18.4,15.1,4.6,1.2,1.1,10.7,17.1,4.4,3.3,6.1,0.137,0.3,0.0,0.4,1.6,1990,1985,1986,2001,23,4,2282.4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.423770,6.218455,6.212310
5,A.C. Green,27,LAL,82,21.0,2164,258,542,0.476,11,55,0.200,247,487,0.507,0.486,223,302,0.738,201,315,516,71,59,23,99,117,750,greenac01,13.8,0.556,0.101,0.557,11.3,16.2,13.8,4.5,1.4,0.6,12.8,15.6,3.2,2.9,8.8,0.135,0.0,0.6,0.5,1.4,1991,1985,1986,2001,23,5,1622.7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.380451,4.571136,4.568408
6,A.C. Green,28,LAL,82,53.0,2902,382,803,0.476,12,56,0.214,370,747,0.495,0.483,340,457,0.744,306,456,762,117,91,36,111,141,1116,greenac01,16.7,0.556,0.070,0.569,11.8,18.2,14.9,6.0,1.6,0.8,10.0,16.8,5.8,2.9,8.6,0.145,1.2,0.2,1.4,2.5,1992,1985,1986,2001,23,6,2475.9,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.472604,7.718161,7.708809
7,A.C. Green,29,LAL,82,55.0,2819,379,706,0.537,16,46,0.348,363,660,0.550,0.548,277,375,0.739,287,424,711,116,88,39,116,149,1051,greenac01,16.3,0.603,0.065,0.531,11.5,17.3,14.4,5.9,1.6,0.9,11.8,15.0,6.1,2.6,9.3,0.147,1.6,0.5,2.0,2.9,1993,1985,1986,2001,23,7,2343.2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.422983,7.532588,7.546843
8,A.C. Green,30,PHO,82,55.0,2825,465,926,0.502,8,35,0.229,457,891,0.513,0.506,266,362,0.735,275,478,753,137,70,38,100,142,1204,greenac01,17.0,0.555,0.038,0.391,11.1,18.8,15.0,6.9,1.2,0.8,8.4,17.6,6.7,2.6,6.7,0.157,1.5,-0.3,1.2,2.3,1994,1985,1986,2001,23,8,2537.1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.417528,7.431288,7.429404
9,A.C. Green,31,PHO,82,52.0,2687,311,617,0.504,43,127,0.339,268,490,0.547,0.539,251,343,0.732,194,475,669,127,55,31,114,146,916,greenac01,14.2,0.596,0.206,0.556,8.3,20.4,14.3,6.5,1.0,0.8,12.9,14.2,4.6,2.1,4.4,0.120,0.6,0.2,0.7,1.9,1995,1985,1986,2001,23,9,2053.3,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3.402504,6.149649,6.157391


# Predicting with unseen data

In [255]:
# this is the data to predict on (NOT TRAIN OR TEST) because we cannot validate
df_last = newdf[newdf.season == newdf.to]

In [257]:
df_last = pd.get_dummies(df_last, columns=['pos'])

In [271]:
df_last['pos_PG-SF'] = 0

In [273]:
dflfeats = ['age', 'g', 'mp', 'fg3', 'fg3a', 'fg2', 'fg2a', 
            'ft', 'fta', 'trb', 'ast', 'stl', 'blk', 'tov', 
            'pf', 'pts', 'pos_C', 'pos_C-PF', 'pos_PF', 'pos_PF-C', 
            'pos_PF-SF', 'pos_PG', 'pos_PG-SG', 'pos_SF', 'pos_SF-PF',
            'pos_SF-SG', 'pos_SG', 'pos_SG-PF', 'pos_SG-PG', 'pos_SG-SF','pos_PG-SF']

In [275]:
scale = StandardScaler()
df_last[dflfeats] = scale.fit_transform(df_last[dflfeats])

In [278]:
df_last['ridge_pred'] = ridge1.predict(df_last[dflfeats])

In [280]:
df_last[['pid','season','ridge_pred']]

Unnamed: 0,pid,season,ridge_pred
15,greenac01,2001,3.217192
16,bramlaj01,2000,2.008303
18,engliaj01,1992,3.052378
21,guytoaj01,2003,2.313002
22,hammoaj01,2017,2.285528
28,priceaj01,2015,2.375383
38,brookaa01,2018,1.672656
43,gordoaa01,2019,11.429882
50,grayaa01,2014,1.847255
51,holidaa01,2019,3.727456


In [281]:
base_df = newdf[newdf.season == newdf.to]

In [283]:
predlast_df = base_df.merge(df_last[['pid','season','ridge_pred']], on=['pid','season'])

In [292]:
predlast_df[predlast_df.season==2019][['player', 'age', 'season', 'g', 'mp', 'fg3', 'fg3a', 'fg2', 'fg2a', 
                                       'ft', 'fta', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'ws', 
                                       'ridge_pred']]

Unnamed: 0,player,age,season,g,mp,fg3,fg3a,fg2,fg2a,ft,fta,trb,ast,stl,blk,tov,pf,pts,ws,ridge_pred
7,Aaron Gordon,23,2019,78,2633,121,347,349,699,185,253,574,289,57,56,162,172,1246,,11.429882
9,Aaron Holiday,22,2019,50,646,43,127,62,135,41,50,67,87,21,13,40,71,294,,3.727456
11,Abdel Nader,25,2019,61,694,32,100,59,115,27,36,116,20,20,12,26,68,241,,2.992541
22,Al Horford,32,2019,68,1973,73,203,314,520,78,95,458,283,59,86,102,126,925,,12.043657
27,Al-Farouq Aminu,28,2019,81,2292,96,280,161,313,150,173,610,104,68,33,72,143,760,,10.833504
32,Alec Burks,27,2019,64,1375,61,168,131,306,116,141,235,128,39,21,65,91,561,,6.109456
39,Alex Len,25,2019,77,1544,74,204,246,444,140,216,424,86,27,69,97,200,854,,8.260073
45,Alize Johnson,22,2019,14,64,1,2,3,14,4,8,19,1,1,3,0,7,13,,2.574053
48,Allen Crabbe,26,2019,43,1133,98,259,39,114,41,56,148,46,23,13,46,102,413,,3.745453
63,Amir Johnson,31,2019,51,529,12,40,67,117,31,41,147,60,16,13,45,99,201,,2.912356
