In [41]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from mpl_toolkits.mplot3d import Axes3D
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.neural_network import MLPRegressor


warnings.filterwarnings('ignore')

In [42]:
# Training Data
season_stats = pd.read_csv('/Users/sabol/thinking_files/thinkful_files/unit_3/lesson_7/Seasons_Stats.csv')
season_stats = season_stats.drop(['blanl', 'blank2', 'Unnamed: 0'], axis=1)
season_stats = season_stats[season_stats.Tm.str.contains('TOT') == False]
season_stats['MVP_Candidate'] = np.where(season_stats['MVPSHARES'] > 0, 'Yes', 'No')
season_stats['Player'] = season_stats['Player'].astype(str).str.replace('*', '')
season_stats['PPG'] = season_stats['PTS'] / season_stats['G']
season_stats['ASTPG'] = season_stats['AST'] / season_stats['G']
season_stats['STLPG'] = season_stats['STL'] / season_stats['G']
season_stats['BLKPG'] = season_stats['BLK'] / season_stats['G']
season_stats['PFPG'] = season_stats['PF'] / season_stats['G']
season_stats['TRPG'] = season_stats['TRB'] / season_stats['G']
season_stats['ORPG'] = season_stats['ORB'] / season_stats['G']
season_stats['DRPG'] = season_stats['DRB'] / season_stats['G']
season_stats['MPPG'] = season_stats['MP'] / season_stats['G']
season_stats['TOVPG'] = season_stats['TOV'] / season_stats['G']
season_stats = season_stats[season_stats['G'] >= 41.0]
season_stats = season_stats[season_stats['MPPG'] >= 25.0]
season_stats['Year'] = season_stats['Year'].astype(int)
season_stats = season_stats.dropna()

In [9]:
from sklearn.neural_network import MLPRegressor
Y = season_stats['MVPSHARES']
X = season_stats.drop(['MVPSHARES', 'MVP_Candidate', 
                       'MVP', 'Tm', 'Player', 'Decade', 'Pos'], axis=1)

names_X = X.columns
#names_Y = Y.columns
X_scale = pd.DataFrame(preprocessing.scale(X), columns=names_X)
Y_scale = pd.DataFrame(preprocessing.scale(Y), columns=['MVPSHARES'])

X_train, X_test, Y_train, Y_test = train_test_split(
    X_scale, Y_scale, test_size=0.33, random_state=42)

mlp = MLPRegressor(hidden_layer_sizes=(1000,1000,1000), max_iter=500,
                   activation='logistic')
mlp.fit(X_train, Y_train)

MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(1000, 1000, 1000), learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [10]:
#print(cross_val_score(mlp, X_test, Y_test, cv=5))
print(mlp.score(X_test,Y_test))

0.8292369901062444


In [43]:
feat_to_use = ['WS', 'PER', 'TMWL%', 'PPG', 'BPM']
Y = season_stats['MVPSHARES']
X = season_stats[feat_to_use]
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.33, random_state=42)

rf = RandomForestRegressor(bootstrap = True, max_depth = 30, max_features = 'auto',
                           min_samples_leaf = 3, min_samples_split = 2,
                           n_estimators = 400)
rf.fit(X_train, Y_train)

print(cross_val_score(rf, X_test, Y_test, cv=5))
print(rf.score(X_test,Y_test))

[0.62101235 0.77827282 0.6420777  0.68786587 0.75682709]
0.767327699563884


In [54]:
season_2016 = season_stats[season_stats['Year'] == 2016.0]
season_not2016 = season_stats[season_stats['Year'] != 2016.0]
mlp_feat_to_use_rows = season_2016.drop(['MVPSHARES', 'MVP_Candidate', 
                       'MVP', 'Tm', 'Player', 'Decade', 'Pos'], axis=1)
mlp_feat_to_use = mlp_feat_to_use_rows.columns


In [60]:
season_2016 = season_stats[season_stats['Year'] == 2016.0]
season_not2016 = season_stats[season_stats['Year'] != 2016.0]
X_2016 = season_2016[mlp_feat_to_use]
X_2016 = X_2016.reset_index(drop=True)
season_columns = X_2016.columns
player_2016 = pd.DataFrame(season_2016['Player'], columns=['Player'])
player_2016 = player_2016.reset_index(drop=True)
mvp_2016 = pd.DataFrame(season_2016[['MVPSHARES', 'MVP']], columns=['MVPSHARES', 'MVP'])
mvp_2016 = mvp_2016.reset_index(drop=True)
season_2016_scale = pd.DataFrame(preprocessing.scale(X_2016), columns=season_columns)
season_2016_scale = pd.concat([season_2016_scale, player_2016, mvp_2016], axis=1)


X = season_not2016[feat_to_use]
y = season_not2016['MVPSHARES']

Unnamed: 0,Year,Age,G,GS,MP,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,TMWL%,PPG,ASTPG,STLPG,BLKPG,PFPG,TRPG,ORPG,DRPG,MPPG,TOVPG,Player,MVPSHARES,MVP
0,0.0,0.803007,-0.089608,-0.097312,0.365730,-1.234913,-0.370996,0.047807,-0.852505,-0.966996,-0.643485,-0.788939,-0.702200,-2.000447,-0.938475,-1.186602,-0.763106,-0.471284,-1.132377,-0.779356,-0.918530,-0.667548,-1.115604,-1.234272,-1.099692,-0.265544,-0.219171,-0.192222,0.028483,-0.052811,0.538204,-0.290840,-0.212038,-0.375189,-0.108655,-0.717964,-0.810895,0.679174,-0.938166,-0.422428,-0.621192,-0.578942,-1.549631,-0.849963,-0.936265,-0.508304,-0.393441,-0.599071,-0.402683,-0.596610,-1.708279,-0.871391,-0.562051,-0.641384,-0.972331,-0.423859,0.740565,-1.004772,Arron Afflalo,0.000,0
1,0.0,0.803007,0.217928,0.646604,0.084027,1.352771,0.563607,-1.555089,0.026708,1.518738,1.184478,1.421857,-0.879820,-1.335954,1.018963,-1.469043,0.814118,1.013329,1.872507,1.474578,1.796711,-0.101411,0.644826,0.307541,0.228296,1.086913,0.654978,1.230962,-1.354541,-1.394048,-2.894364,1.820373,1.662522,0.773393,0.200671,0.563784,0.370728,0.877810,1.360908,1.081573,1.238440,-0.792607,-1.161388,1.028094,-0.652148,-0.288187,0.691340,1.903487,0.667542,-0.854478,-1.321415,1.009632,-0.475191,1.251152,1.363495,1.085154,-0.132719,-0.763440,LaMarcus Aldridge,0.000,0
2,0.0,1.776142,-0.807193,-0.097312,-1.557533,-0.784881,-1.085692,-1.130294,0.061877,0.843600,-0.137531,0.181104,-0.998233,2.983244,-0.390393,0.100072,-0.979974,-0.979178,-0.297687,-0.870732,-0.630036,-1.059489,0.826939,-0.389946,-0.546363,-1.298465,-1.391810,0.112746,-1.126570,-1.236967,0.313560,-0.778043,-0.748771,-0.397277,-0.727307,-0.890010,-0.762525,-1.395467,0.278991,-0.782541,-0.468763,-1.043977,0.988882,-0.638351,-1.003116,0.298791,-1.354835,0.115945,-1.320079,-1.041440,1.567434,-0.594479,1.011381,-0.294573,0.506928,-0.656595,-1.744290,-0.920566,Tony Allen,0.000,0
3,0.0,-0.413412,1.038025,0.996682,0.288902,-0.829884,-0.316019,1.106964,-0.479719,0.076398,0.596918,0.429255,-0.810745,-0.339216,0.235988,0.225601,-0.960259,-0.510353,0.036189,-0.383395,-0.460333,-0.580450,0.402008,-0.279816,-0.269699,-0.674254,-0.503447,-0.741165,0.560416,0.617807,0.349503,-0.986844,-0.977082,-0.397277,-0.037272,-0.674952,-0.638143,-0.457465,0.188831,0.693216,0.562672,-0.616648,-0.145983,0.287452,-0.301180,0.200961,-0.573381,0.262465,-0.933516,-0.760962,-0.447655,0.125569,-0.379151,0.281931,0.018171,0.384569,-0.749261,-0.597753,Al-Farouq Aminu,0.000,0
4,0.0,0.073155,-0.602169,-2.285301,-0.563889,0.182688,0.041329,0.540570,-0.282775,0.843600,0.042001,0.293900,-1.077176,-1.335954,-0.390393,-1.406278,0.577534,0.036610,-1.215846,-0.413853,-0.290630,0.246981,-1.783353,-0.867174,-0.823028,0.053992,0.242778,-0.517521,0.636406,0.672182,0.394432,-0.267640,-0.171983,-0.441453,-0.156244,0.047644,-0.140618,1.043340,0.684710,-0.267085,0.039333,-1.025124,-1.161388,-0.453190,-0.802563,-1.315399,0.159232,-0.739730,0.470127,-1.035580,-1.172222,-0.402111,-1.308696,0.248231,0.916064,-0.097440,-0.172631,-0.729401,Ryan Anderson,0.000,0
5,0.0,-1.386548,0.833000,0.865403,1.523274,0.542713,0.591095,-1.028343,0.835585,0.107086,0.825414,0.632287,0.294446,-0.006970,1.488748,0.727718,0.104367,0.466366,0.537003,0.560821,0.201507,0.029236,0.887644,0.527800,0.781625,0.915998,0.541268,1.088643,-0.928995,-0.832179,-0.585018,1.425970,1.161838,1.126803,0.367231,0.882070,1.110107,-0.600924,0.414231,1.385197,1.136821,0.684192,0.511044,1.874543,1.169542,2.328757,0.740181,-0.528742,0.438063,0.490847,0.264112,1.636618,2.062568,0.899596,0.265323,1.126650,1.322629,0.901873,Giannis Antetokounmpo,0.000,0
6,0.0,1.046291,0.012904,0.559085,0.772919,0.880237,-0.398484,-0.297694,0.160349,0.015022,0.743808,0.542051,0.481934,-0.671462,-0.312095,-0.590338,1.563299,0.349160,0.203127,0.347611,0.201507,0.813117,-0.144332,0.601220,0.615626,1.317276,1.585983,-0.375203,0.241256,0.382185,0.151816,1.248103,1.534347,-0.507718,-0.727307,1.208958,1.068646,0.557786,0.188831,1.095695,0.852287,0.395117,-0.444631,-0.109321,0.634734,0.421078,1.313419,-0.599071,1.464632,0.414811,-0.488253,-0.108267,0.509222,0.928107,0.206895,1.194737,1.276937,0.696631,Carmelo Anthony,0.000,0
7,0.0,0.803007,0.935512,0.952923,1.615467,-0.784881,0.178770,1.650703,-0.690730,-0.506675,-0.545558,-0.563348,-0.692332,1.820383,-0.546988,-0.527574,-1.196843,0.036610,0.036189,0.043025,-0.324571,0.116334,0.159190,0.197412,0.394295,-0.243251,-0.005964,-0.741165,1.457102,1.524049,0.439361,-0.994578,-1.017137,-0.176396,0.438614,-0.580327,-0.603593,0.050161,-0.276994,-0.027010,-0.113096,-0.302435,2.482124,-0.426739,-0.418169,0.347706,-0.095255,0.045616,-0.433341,-0.457213,2.182383,-0.516107,-0.165283,-0.336609,-0.400189,-0.275789,1.325334,-0.687938,Trevor Ariza,0.000,0
8,0.0,-0.899980,-0.602169,-0.009792,-0.476818,-0.919891,0.398677,0.274365,-0.514887,0.015022,-0.414989,-0.224960,-0.909423,-1.169831,-0.860178,-1.531808,-1.157412,0.036610,-0.297687,-0.109267,0.116656,-0.232058,-0.387150,-0.426656,-0.491031,-0.703978,-0.809044,0.275395,-0.108300,-0.197810,0.547190,-0.677509,-0.780814,0.486247,0.628968,-0.786782,-0.790165,-0.192617,-0.126728,-0.387123,-0.321415,-0.748617,-1.071793,-0.849963,-1.354083,-0.655048,-0.740469,2.331324,-0.624740,-0.718213,-1.063271,-0.850347,-0.432200,-0.176442,-0.014182,-0.239316,-0.014242,-1.389108,Harrison Barnes,0.000,0
9,0.0,2.019426,0.422952,-0.622429,-0.097799,-1.054900,-1.140669,1.384498,-0.606325,-0.015666,0.547955,0.293900,-0.534447,0.159153,0.705773,0.413895,-0.763106,-1.018247,-0.047280,-0.809814,-0.901560,-0.580450,0.462712,-0.206397,-0.269699,-0.934342,-0.595837,-1.452756,0.454029,0.744681,-0.000943,-1.203379,-1.165339,-0.816951,-0.893867,-0.709362,-0.762525,0.281902,-0.021542,0.234249,0.161276,-0.459541,0.033206,0.393258,-0.284467,1.741779,-0.781598,0.115945,-0.986787,-0.541118,-0.092272,0.330009,1.749643,0.061236,-0.083996,0.126134,-0.667167,-0.427956,Matt Barnes,0.000,0


In [56]:
Y = season_not2016['MVPSHARES']
X = season_not2016[mlp_feat_to_use]

names_X = X.columns
#names_Y = Y.columns
X_scale = pd.DataFrame(preprocessing.scale(X), columns=names_X)
Y = pd.DataFrame(preprocessing.scale(Y), columns=['MVPSHARES'])

mlp = MLPRegressor(hidden_layer_sizes=(1000,1000,1000), max_iter=500,
                   activation='logistic')
mlp.fit(X_scale, Y)

MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(1000, 1000, 1000), learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [61]:
test_pred = mlp.predict(season_2016_scale[mlp_feat_to_use])
test_results = pd.DataFrame(season_2016_scale.Player)
test_results['score'] = test_pred
test_results['Year'] = season_2016_scale['Year']
test_results['Share'] = season_2016_scale['MVPSHARES']
test_results['MVP'] = season_2016_scale['MVP']
test_results.sort_values('score', ascending=False).head()

Unnamed: 0,Player,score,Year,Share,MVP
36,Stephen Curry,8.446813,0.0,1.0,1
45,Kevin Durant,7.108617,0.0,0.112,0
77,LeBron James,6.87779,0.0,0.482,0
136,Russell Westbrook,6.567977,0.0,0.371,0
85,Kawhi Leonard,1.898237,0.0,0.484,0
