In [31]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from mpl_toolkits.mplot3d import Axes3D
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

warnings.filterwarnings('ignore')

In [7]:
# Training Data
season_stats = pd.read_csv('/Users/sabol/thinking_files/thinkful_files/unit_3/lesson_7/Seasons_Stats.csv')
season_stats = season_stats.drop(['blanl', 'blank2', 'Unnamed: 0'], axis=1)
season_stats = season_stats[season_stats.Tm.str.contains('TOT') == False]
season_stats['MVP_Candidate'] = np.where(season_stats['MVPSHARES'] > 0, 'Yes', 'No')
season_stats['Player'] = season_stats['Player'].astype(str).str.replace('*', '')
season_stats['PPG'] = season_stats['PTS'] / season_stats['G']
season_stats['ASTPG'] = season_stats['AST'] / season_stats['G']
season_stats['STLPG'] = season_stats['STL'] / season_stats['G']
season_stats['BLKPG'] = season_stats['BLK'] / season_stats['G']
season_stats['PFPG'] = season_stats['PF'] / season_stats['G']
season_stats['TRPG'] = season_stats['TRB'] / season_stats['G']
season_stats['ORPG'] = season_stats['ORB'] / season_stats['G']
season_stats['DRPG'] = season_stats['DRB'] / season_stats['G']
season_stats['MPPG'] = season_stats['MP'] / season_stats['G']
season_stats['TOVPG'] = season_stats['TOV'] / season_stats['G']
season_stats = season_stats[season_stats['G'] >= 41.0]
season_stats = season_stats[season_stats['MPPG'] >= 25.0]
season_stats['Year'] = season_stats['Year'].astype(int)
season_stats = season_stats.dropna()

In [93]:
from sklearn.neural_network import MLPRegressor
Y = season_stats['MVPSHARES']
X = season_stats.drop(['MVPSHARES', 'MVP_Candidate', 
                       'MVP', 'Tm', 'Player', 'Decade', 'Pos'], axis=1)

names_X = X.columns
#names_Y = Y.columns
X_scale = pd.DataFrame(preprocessing.scale(X), columns=names_X)
Y_scale = pd.DataFrame(preprocessing.scale(Y), columns=['MVPSHARES'])

X_train, X_test, Y_train, Y_test = train_test_split(
    X_scale, Y_scale, test_size=0.33, random_state=42)

mlp = MLPRegressor(hidden_layer_sizes=(1000,1000,1000), max_iter=500,
                   activation='logistic')
mlp.fit(X_train, Y_train)

MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(1000, 1000, 1000), learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [94]:
print(cross_val_score(mlp, X_test, Y_test, cv=5))
print(mlp.score(X_test,Y_test))

[0.75052854 0.67048151 0.78241058 0.74116032 0.81797614]
0.7984195485880519


In [65]:
feat_to_use = ['WS', 'PER', 'TMWL%', 'PPG', 'BPM']
Y = season_stats['MVPSHARES']
X = season_stats[feat_to_use]
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.33, random_state=42)

rf = RandomForestRegressor(bootstrap = True, max_depth = 30, max_features = 'auto',
                           min_samples_leaf = 3, min_samples_split = 2,
                           n_estimators = 400)
rf.fit(X_train, Y_train)

print(cross_val_score(rf, X_test, Y_test, cv=5))
print(rf.score(X_test,Y_test))

[0.60743426 0.77578868 0.64165664 0.68823957 0.7536039 ]
0.7685666818130868


In [85]:
season_2016 = season_stats[season_stats['Year'] == 2016.0]
season_not2016 = season_stats[season_stats['Year'] != 2016.0]

X = season_not2016[feat_to_use]
y = season_not2016['MVPSHARES']
rf = RandomForestRegressor(bootstrap = True, max_depth = 80, max_features = 3,
                           min_samples_leaf = 3, min_samples_split = 3,
                           n_estimators = 400)
rf.fit(X,y)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=80,
           max_features=3, max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=3,
           min_samples_split=3, min_weight_fraction_leaf=0.0,
           n_estimators=400, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [86]:
test_pred = rf.predict(season_2016[feat_to_use])
test_results = pd.DataFrame(season_2016.Player)
test_results['score'] = test_pred
test_results['Year'] = season_2016['Year']
test_results['Share'] = season_2016['MVPSHARES']
test_results['MVP'] = season_2016['MVP']
test_results.sort_values('score', ascending=False).head()

Unnamed: 0,Player,score,Year,Share,MVP
23633,Stephen Curry,0.928965,2016,1.0,1
23839,Kawhi Leonard,0.482267,2016,0.484,0
23781,LeBron James,0.458425,2016,0.482,0
24070,Russell Westbrook,0.38261,2016,0.371,0
23654,Kevin Durant,0.342702,2016,0.112,0


In [89]:
Y = season_not2016['MVPSHARES']
X = season_not2016.drop(['MVPSHARES', 'MVP_Candidate', 
                       'MVP', 'Tm', 'Player', 'Decade', 'Pos'], axis=1)

names_X = X.columns
#names_Y = Y.columns
#X_scale = pd.DataFrame(preprocessing.scale(X), columns=names_X)
#Y_scale = pd.DataFrame(preprocessing.scale(Y), columns=['MVPSHARES'])

mlp = MLPRegressor(hidden_layer_sizes=(1000,1000,1000), max_iter=500,
                   activation='logistic')
mlp.fit(X, Y)

MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(1000, 1000, 1000), learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [95]:
test_pred = mlp.predict(season_2016.drop(['MVPSHARES', 'MVP_Candidate', 
                       'MVP', 'Tm', 'Player', 'Decade', 'Pos'], axis=1))
test_results = pd.DataFrame(season_2016.Player)
test_results['score'] = test_pred
test_results['Year'] = season_2016['Year']
test_results['Share'] = season_2016['MVPSHARES']
test_results['MVP'] = season_2016['MVP']
test_results.sort_values('score', ascending=False).head()

Unnamed: 0,Player,score,Year,Share,MVP
23623,DeMarcus Cousins,-0.271689,2016,0.0,0
23635,Anthony Davis,-0.349662,2016,0.0,0
23654,Kevin Durant,-0.418542,2016,0.112,0
23523,LaMarcus Aldridge,-0.420631,2016,0.0,0
23927,Jahlil Okafor,-0.423851,2016,0.0,0


In [None]:
mlp.score