## Dependencies

In [1]:
import numpy as np
import pandas as pd

## Import and Prep Data

In [2]:
# Player data from CSV
stats = pd.read_csv("../resources/game_skater_stats.csv")
stats.columns

Index(['game_id', 'player_id', 'primaryPosition', 'team_id', 'timeOnIce',
       'assists', 'goals', 'shots', 'hits', 'powerPlayGoals',
       'powerPlayAssists', 'penaltyMinutes', 'faceOffWins', 'faceoffTaken',
       'takeaways', 'giveaways', 'shortHandedGoals', 'shortHandedAssists',
       'blocked', 'plusMinus', 'evenTimeOnIce', 'shortHandedTimeOnIce',
       'powerPlayTimeOnIce'],
      dtype='object')

In [3]:
# Group player ids to get total player stats
player_stats_avg = stats.groupby(["player_id","primaryPosition"],as_index=False).mean().fillna(0)

In [4]:
X_df = player_stats_avg.drop(columns=["game_id","team_id","player_id","primaryPosition"])

In [5]:
# Establish X, y
X = X_df.values
y = player_stats_avg["primaryPosition"]

## Create Training and Testing sets

In [6]:
# Import Dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [7]:
# Convert Player Positions (C, W, D) to numerical values
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

In [8]:
## CLASSES
# 0 - C (Center)
# 1 - D (Defense)
# 2 - W (Winger)

In [9]:
# Create train and test splits
X_train, X_test, y_uncat_train, y_uncat_test = train_test_split(X, encoded_y, random_state=42)

In [10]:
# Categorize the y -- One-Hot Encoding
y_train = to_categorical(y_uncat_train)
y_test = to_categorical(y_uncat_test)

In [11]:
from sklearn.preprocessing import StandardScaler

# Standardize the columns
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [27]:
X_train.shape

(2514, 19)

## Build a Deep Neural Network

In [12]:
# Import Dependecies
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [13]:
# Create an empty sequential model
model = Sequential()

In [14]:
# Add the first layer where the input dimensions are the columns of the training data
model.add(Dense(19, activation="relu", input_dim=X_train_scaled.shape[1]))

In [15]:
# Add a second hidden layer
model.add(Dense(10, activation="tanh"))

In [16]:
# Add a second hidden layer
model.add(Dense(10, activation="relu"))

In [17]:
# Add output layer
model.add(Dense(units=y_train.shape[1], activation="softmax"))

In [18]:
# Compile the model using categorical_crossentropy for the loss function, the adam optimizer,
# and add accuracy to the training metrics
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [19]:
# Use the training data to fit (train) the model
model.fit(X_train_scaled, y_train, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
79/79 - 1s - loss: 0.9980 - accuracy: 0.5111
Epoch 2/100
79/79 - 0s - loss: 0.7296 - accuracy: 0.7741
Epoch 3/100
79/79 - 0s - loss: 0.4959 - accuracy: 0.8401
Epoch 4/100
79/79 - 0s - loss: 0.4239 - accuracy: 0.8564
Epoch 5/100
79/79 - 0s - loss: 0.3972 - accuracy: 0.8600
Epoch 6/100
79/79 - 0s - loss: 0.3848 - accuracy: 0.8600
Epoch 7/100
79/79 - 0s - loss: 0.3752 - accuracy: 0.8608
Epoch 8/100
79/79 - 0s - loss: 0.3687 - accuracy: 0.8604
Epoch 9/100
79/79 - 0s - loss: 0.3630 - accuracy: 0.8644
Epoch 10/100
79/79 - 0s - loss: 0.3583 - accuracy: 0.8656
Epoch 11/100
79/79 - 0s - loss: 0.3541 - accuracy: 0.8671
Epoch 12/100
79/79 - 0s - loss: 0.3494 - accuracy: 0.8675
Epoch 13/100
79/79 - 0s - loss: 0.3472 - accuracy: 0.8671
Epoch 14/100
79/79 - 0s - loss: 0.3438 - accuracy: 0.8691
Epoch 15/100
79/79 - 0s - loss: 0.3410 - accuracy: 0.8679
Epoch 16/100
79/79 - 0s - loss: 0.3377 - accuracy: 0.8683
Epoch 17/100
79/79 - 0s - loss: 0.3345 - accuracy: 0.8695
Epoch 18/100
79/79 - 0s

<tensorflow.python.keras.callbacks.History at 0x25aaf3ccdc0>

## Evaluate the Model

In [20]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

27/27 - 0s - loss: 0.4050 - accuracy: 0.8451
Loss: 0.40497153997421265, Accuracy: 0.8450536131858826


In [21]:
# Row to test
row = 3

In [22]:
# Actual Class
y_test[row]

array([0., 0., 1.], dtype=float32)

In [23]:
# Grab just one data point to test with
test = np.expand_dims(X_test[row], axis=0)

In [24]:
# Make a prediction
print(f"Predicted class: {np.argmax(model.predict(test), axis=-1)}")

Predicted class: [1]


## Save the Model

In [25]:
# Save model
# model.save("position_model.h5")