## Dependencies

In [1]:
import pandas as pd
import requests
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import joblib

## Import and Prep Data

In [2]:
# Player data from URL
results = requests.get("http://nhl-app.eba-tmaqptju.us-west-2.elasticbeanstalk.com/api/avgplayerstats/all").json()

In [3]:
# Create Dataframe of player data
df = pd.DataFrame()
pos_list = []
toi_list = []
g_list = []
a_list = []
s_list = []
h_list = []
ppg_list = []
ppa_list = []
pim_list = []
fow_list = []
fot_list = []
ta_list = []
ga_list = []
shg_list = []
sha_list = []
bs_list = []
etoi_list = []
shtoi_list = []
pptoi_list = []
for player in results:
    pos_list.append(player['Position'][-1])
    toi_list.append(player['timeOnIce'])
    a_list.append(player['assists'])
    g_list.append(player['goals'])
    s_list.append(player['shots'])
    h_list.append(player['hits'])
    ppg_list.append(player['powerPlayGoals'])
    ppa_list.append(player['powerPlayAssists'])
    pim_list.append(player['penaltyMinutes'])
    fow_list.append(player['faceOffWins'])
    fot_list.append(player['faceoffTaken'])
    ta_list.append(player['takeaways'])
    ga_list.append(player['Expr1'])
    shg_list.append(player['shortHandedGoals'])
    sha_list.append(player['shortHandedAssists'])
    bs_list.append(player['blocked'])
    etoi_list.append(player['evenTimeOnIce'])
    shtoi_list.append(player['shortHandedTimeOnIce'])
    pptoi_list.append(player['powerPlayTimeOnIce'])
df["pos"] = pos_list
df["toi"] = toi_list
df["g"] = g_list
df["a"] = g_list
df["s"] = s_list
df["h"] = h_list
df["ppg"] = ppg_list
df["ppa"] = ppa_list
df["pim"] = pim_list
df["fow"] = fow_list
df["fot"] = fot_list
df["ta"] = ta_list
df["ga"] = ga_list
df["shg"] = shg_list
df["sha"] = sha_list
df["bs"] = bs_list
df["etoi"] = etoi_list
df["shtoi"] = shtoi_list
df["pptoi"] = pptoi_list

In [4]:
df

Unnamed: 0,pos,toi,g,a,s,h,ppg,ppa,pim,fow,fot,ta,ga,shg,sha,bs,etoi,shtoi,pptoi
0,W,930.412698,0.301587,0.301587,2.253968,0.908730,0.083333,0.071429,0.281746,0.138889,0.472222,0.488095,0.301587,0.000000,0.000000,0.361111,789.420635,5.825397,135.166667
1,W,475.555556,0.000000,0.000000,0.555556,1.277778,0.000000,0.000000,3.611111,0.000000,0.000000,0.111111,0.000000,0.000000,0.000000,0.111111,471.777778,0.333333,3.444444
2,W,473.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,390.000000,0.000000,83.000000
3,D,1019.128079,0.034483,0.034483,0.852217,1.842365,0.000000,0.000000,0.527094,0.000000,0.004926,0.231527,0.034483,0.000000,0.014778,1.507389,911.556650,104.167488,3.403941
4,W,421.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.000000,0.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,383.000000,14.000000,24.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3348,D,1078.284483,0.094828,0.094828,1.172414,0.000000,0.025862,0.077586,0.603448,0.000000,0.000000,0.000000,0.094828,0.000000,0.017241,0.000000,872.448276,84.965517,120.870690
3349,W,666.337209,0.127907,0.127907,1.418605,0.000000,0.000000,0.023256,0.488372,0.081395,0.337209,0.000000,0.127907,0.000000,0.000000,0.000000,624.209302,3.418605,38.709302
3350,D,690.607143,0.000000,0.000000,0.964286,0.750000,0.000000,0.000000,0.285714,0.000000,0.000000,0.178571,0.000000,0.000000,0.000000,0.535714,670.428571,17.857143,2.321429
3351,W,935.103093,0.194158,0.194158,1.718213,0.000000,0.063574,0.061856,0.527491,0.041237,0.118557,0.000000,0.194158,0.006873,0.000000,0.000000,736.018900,38.810997,160.273196


In [5]:
X_df = df.drop(columns=["pos"])

In [6]:
# Establish X, y
X = X_df.values
y = df["pos"]

## Create Training and Testing sets

In [7]:
# Convert Player Positions (C, W, D) to numerical values
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

In [8]:
## CLASSES
# 0 - C (Center)
# 1 - D (Defense)
# 2 - W (Winger)

In [9]:
# Create train and test splits
X_train, X_test, y_train, y_test = train_test_split(X, encoded_y, random_state=42)

In [10]:
from sklearn.preprocessing import StandardScaler

# Standardize the columns
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
X_train.shape

(2514, 18)

In [24]:
# Support vector machine linear classifier
from sklearn.svm import SVC 
model = SVC(kernel='linear',probability=True)

In [25]:
# Create the GridSearch estimator along with a parameter object containing the values to adjust
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [3, 4, 5],
              'gamma': [0.000000001, 0.00000001, 0.0000001]}
grid = GridSearchCV(model, param_grid, verbose=3)

In [26]:
# Fit the model using the grid search estimator. 
# This will take the SVC model and try each combination of parameters
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] C=3, gamma=1e-09 ................................................
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] .................... C=3, gamma=1e-09, score=0.845, total=   0.6s
[CV] C=3, gamma=1e-09 ................................................
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.5s remaining:    0.0s
[CV] .................... C=3, gamma=1e-09, score=0.861, total=   0.6s
[CV] C=3, gamma=1e-09 ................................................
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.1s remaining:    0.0s
[CV] .................... C=3, gamma=1e-09, score=0.839, total=   0.6s
[CV] C=3, gamma=1e-09 ................................................
[CV] .................... C=3, gamma=1e-09, score=0.861, total=   0.6s
[CV] C=3, gamma=1e-09 ................................................
[CV] .................... C=3, gamma=1e-09, score=0.869, tot

GridSearchCV(estimator=SVC(kernel='linear', probability=True),
             param_grid={'C': [3, 4, 5], 'gamma': [1e-09, 1e-08, 1e-07]},
             verbose=3)

In [27]:
# List the best parameters for this dataset
print(grid.best_params_)

{'C': 3, 'gamma': 1e-09}


In [28]:
# Make predictions with the hypertuned model
predictions = grid.predict(X_test_scaled)

In [29]:
print('Test Acc: %.3f' % grid.score(X_test_scaled, y_test))

Test Acc: 0.863


In [30]:
# Calculate classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions,
                            target_names=["Center", "Defense", "Winger"]))

              precision    recall  f1-score   support

      Center       0.82      0.77      0.79       222
     Defense       0.95      0.95      0.95       290
      Winger       0.81      0.85      0.83       327

    accuracy                           0.86       839
   macro avg       0.86      0.86      0.86       839
weighted avg       0.86      0.86      0.86       839



## Save the Model

In [45]:
# Save model
joblib.dump(grid, "position_svm_model.pkl")

['position_svm_model.pkl']