Original file is located at
    https://colab.research.google.com/drive/1aSjpmnt8ETarmQxnMOaok3HXYp71tTSm

In [167]:
!pip install tensorflow-addons
!pip install keras-tuner

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import keras_tuner as kt

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import callbacks
from tensorflow.keras.metrics import mean_squared_error
from tensorflow_addons.metrics.r_square import RSquare

np.set_printoptions(precision=3, suppress=True)



In [168]:
#Upload NBA_Player_StatsV2.csv
raw_dataset = pd.read_csv('/content/NBA_Player_StatsV2.csv')

In [169]:
#Data cleanup including one-hot conversion
dataset = raw_dataset.copy()
dataset = dataset.drop(['Unnamed: 0'], axis = 1)
dataset['Player'] = pd.factorize(dataset.Player)[0] + 1
dataset['Tm'] = pd.factorize(dataset.Tm)[0] + 1
dataset['Pos'] = pd.factorize(dataset.Pos)[0] + 1
dataset.tail()

Unnamed: 0,Player,Pos,Age,Tm,G,MP,FG,FGA,FG%,3P,...,AST,STL,BLK,TOV,PF,PTS,Season,Target_PTS,Target_TRB,Target_AST
10036,1764,1,23.0,3,52.0,18.9,3.6,5.0,0.721,0.0,...,1.8,0.8,1.8,1.0,2.0,8.0,2021,10.0,9.6,2.0
10037,1711,1,25.0,20,41.0,32.3,8.0,15.6,0.514,1.9,...,1.7,0.8,1.2,2.0,2.1,21.0,2021,17.9,10.1,2.3
10038,1212,3,32.0,18,68.0,24.3,5.4,9.7,0.559,0.2,...,4.3,1.1,0.6,2.0,2.2,12.1,2021,6.2,4.0,2.0
10039,1669,2,22.0,17,63.0,33.7,7.7,17.7,0.438,2.2,...,9.4,0.8,0.2,4.1,1.8,25.3,2021,28.4,3.7,9.7
10040,1627,1,23.0,24,72.0,22.3,3.6,5.5,0.652,0.0,...,1.3,0.3,0.9,1.1,2.6,9.0,2021,10.3,8.5,1.6


In [170]:
#80-20 training-testing split
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [171]:
#Spliting X and Y
train_features = train_dataset.drop(['Target_PTS', 'Target_TRB', 'Target_AST'], axis=1)
test_features = test_dataset.drop(['Target_PTS', 'Target_TRB', 'Target_AST'], axis=1)

train_labels = train_dataset[['Target_PTS', 'Target_TRB', 'Target_AST']].copy()
test_labels = test_dataset[['Target_PTS', 'Target_TRB', 'Target_AST']].copy()

In [172]:
#normalizing data
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_features))
print(normalizer.mean.numpy())

[[ 800.49     3.009   26.539   16.881   69.316   25.877    4.255    9.136
     0.465    0.58     1.634    0.244    3.674    7.499    0.487    0.496
     2.131    2.806    0.749    1.311    3.273    4.582    2.546    0.871
     0.545    1.605    2.311   11.223 2001.559]]


In [173]:
#model building with hyperparamer tuning: number of units, activation function,
#optional dropout layer, and learning rate
def build_model(hp):
  in_dim = train_features.shape[1]
  out_dim = train_labels.shape[1]

  hp_units1 = hp.Int('units1', min_value=32, max_value=512, step=32)
  hp_units2 = hp.Int('units2', min_value=32, max_value=512, step=32)
  #hp_units3 = hp.Int('units3', min_value=32, max_value=512, step=32)
  hp_activation1 = hp.Choice("activation1", ["relu", "tanh"])
  hp_activation2 = hp.Choice("activation2", ["relu", "tanh"])
  #hp_activation3 = hp.Choice("activation3", ["relu", "tanh"])
  hp_dropout1 = hp.Boolean("dropout1")
  #hp_dropout2 = hp.Boolean("dropout2")
  hp_lr = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")

  model = Sequential(normalizer)
  model.add(Dense(hp_units1, input_dim=in_dim, activation=hp_activation1))
  if hp_dropout1:
    model.add(layers.Dropout(rate=0.25))
  model.add(Dense(hp_units2, activation=hp_activation2))
  #if hp_dropout2:
    #model.add(layers.Dropout(rate=0.25))
  #model.add(Dense(hp_units3, activation=hp_activation3))
  model.add(Dense(out_dim))
  model.compile(loss="mse", optimizer=keras.optimizers.Adam(learning_rate=hp_lr),
                metrics=['msle'])
  
  return model

In [174]:
#tune hyperparameters
tuner = kt.Hyperband(
    build_model,
    objective='val_msle',
    max_epochs=20,
    directory='keras_tuner_dir',
    overwrite=True
)
tuner.search(train_features, train_labels, epochs=20, validation_split=0.2)

Trial 30 Complete [00h 00m 12s]
val_msle: 0.07789571583271027

Best val_msle So Far: 0.06518445163965225
Total elapsed time: 00h 04m 27s
INFO:tensorflow:Oracle triggered exit


In [175]:
#build model
model = tuner.get_best_models(num_models=1)[0]
model.build()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 29)               59        
 n)                                                              
                                                                 
 dense (Dense)               (None, 448)               13440     
                                                                 
 dense_1 (Dense)             (None, 224)               100576    
                                                                 
 dense_2 (Dense)             (None, 3)                 675       
                                                                 
Total params: 114,750
Trainable params: 114,691
Non-trainable params: 59
_________________________________________________________________


In [176]:
#fit model while minimizing overfitting
earlystopping = callbacks.EarlyStopping(monitor ="val_loss", mode ="min",
                                        verbose = 1, patience = 10,
                                        restore_best_weights = True)
model.fit(train_features, train_labels, epochs=100, batch_size=12,
              validation_split=0.2, callbacks = [earlystopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 13: early stopping


<keras.callbacks.History at 0x7fa1f32b0ed0>

In [177]:
#prediction
y_pred = model.predict(test_features)
y_test = test_labels.to_numpy()

In [178]:
PTS_pred = y_pred[:,0]
PTS_test = y_test[:,0]
TRB_pred = y_pred[:,1]
TRB_test = y_test[:,1]
AST_pred = y_pred[:,2]
AST_test = y_test[:,2]

In [179]:
#calculate mse and r2
print("PTS MSE:%.4f" % mean_squared_error(PTS_test, PTS_pred)) 
print("TRB MSE:%.4f" % mean_squared_error(TRB_test, TRB_pred))
print("AST MSE:%.4f" % mean_squared_error(AST_test, AST_pred))

metric = RSquare()
metric.update_state(PTS_test, PTS_pred)
print("PTS R2:%.4f" % metric.result().numpy())
metric.update_state(TRB_test, TRB_pred)
print("TRB R2:%.4f" % metric.result().numpy())
metric.update_state(AST_test, AST_pred)
print("AST R2:%.4f" % metric.result().numpy())

PTS MSE:8.7964
TRB MSE:1.4546
AST MSE:0.7183
PTS R2:0.7706
TRB R2:0.8477
AST R2:0.8793
