In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow.keras.layers as tfl

2022-12-07 13:39:38.025669: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from tensorflow.keras.models import Model
import tensorflow.keras.optimizers as optimizers
import tensorflow.keras.metrics as metrics

In [3]:

x_train = np.load("./cleaned_data/small/x_train.npy")
y_train = np.load("./cleaned_data/small/y_train.npy")
train_mu = np.load("./cleaned_data/small/train_mu.npy")
train_std = np.load("./cleaned_data/small/train_std.npy")
x_val = np.load("./cleaned_data/small/x_val.npy")
y_val= np.load("./cleaned_data/small/y_val.npy")


In [4]:
# y = [no sack, sack, time to sack]
# x = (M, 23, 11)
print(x_train.shape)
print(x_val.shape)

(5991, 23, 11)
(1284, 23, 11)


In [5]:
import keras.backend as K

def my_loss(y_true, y_output):
    bce = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    mse = tf.keras.losses.MeanSquaredError()
    
    # get mse of only true positives
    true_sack_mask = y_true[:,1]==1

    if len(y_true[true_sack_mask]) == 0:
        return bce(y_true[:,0:-1], y_output[:,0:-1])
    else:
        return bce(y_true[:,0:-1], y_output[:,0:-1]) + mse(y_true[true_sack_mask][:,-1], y_output[true_sack_mask][:,-1])

def bce_metric(y_true, y_output):
    return K.mean(K.binary_crossentropy(y_true[:,0:-1], y_output[:,0:-1], from_logits=False))

def mse_metric(y_true, y_output):
    # get mse of only true positives
    true_sack_mask = y_true[:,1]==1
    if len(y_true[true_sack_mask]) == 0:
        return 0.0
    else:
        return K.mean(K.square(y_true[true_sack_mask][:,-1] - y_output[true_sack_mask][:,-1]), axis=-1)
    # return K.mean(K.square(y_pred[:,-1] - y_true[:,-1]), axis=-1)
    
def accuracy_metric(y_true, y_output):
    preds = K.cast(K.argmax(y_output[:,0:-1], axis=-1), 'float32')
    return K.mean(K.cast(y_true[:,1] == preds, 'float32'))

# https://stackoverflow.com/questions/43547402/how-to-calculate-f1-macro-in-keras
def recall(y_true, y_output):
    preds = K.cast(K.argmax(y_output[:,0:-1], axis=-1), 'float32')
    true_positives = K.sum(K.round(K.clip(y_true[:,1] * preds, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true[:,1], 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_output):
    preds = K.cast(K.argmax(y_output[:,0:-1], axis=-1), 'float32')
    true_positives = K.sum(K.round(K.clip(y_true[:,1] * preds, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(preds, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


In [40]:
''' 
Model

how cropping layer works - https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-cropping-layers-with-keras.md
'''

' \nModel\n\nhow cropping layer works - https://github.com/christianversloot/machine-learning-articles/blob/main/how-to-use-cropping-layers-with-keras.md\n'

In [47]:
''' 
Output:
[prob of no sack, prob of sack, time till sack]

If predict no sack, doesn't matter what time is (recorded as -1 in training data)
'''

def createModel(input_shape = (23,7)):
    
    X = tfl.Input(input_shape)  # define the input to the model
    flat = tfl.Flatten(input_shape=(23, 7))(X)     # Flatten to pass into linear layers
    d1 = tfl.Dense(50, activation='relu')(flat)
    d3 = tfl.Dense(3,activation=None)(d1)
    
    # have layer (batch_size, 3). Want to take (b, [0,1]) and turn them into probabilities, and keep (b, [2]) as time
    # https://datascience.stackexchange.com/questions/86740/how-to-slice-an-input-in-keras
    intermediate = tfl.Reshape((3,1), input_shape=(3,))(d3)
    
    probs = tfl.Cropping1D(cropping=(0,1))(intermediate)
    probs = tfl.Reshape((2,), input_shape=(2,1))(probs)
    probs = tfl.Activation('softmax')(probs)
    
    time = tfl.Cropping1D(cropping=(2,0))(intermediate)
    time = tfl.Reshape((1,), input_shape=(1,1))(time)
    
    # concatenate the probabilities and predicted_time_to_sack back into one layer
    out = tfl.Concatenate(axis=-1)([probs, time])
    
    model = Model(inputs=X, outputs=out)        # create model
    
    return model
    

In [57]:
model = createModel()

print(model.summary())

Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_11 (InputLayer)          [(None, 23, 7)]      0           []                               
                                                                                                  
 flatten_10 (Flatten)           (None, 161)          0           ['input_11[0][0]']               
                                                                                                  
 dense_20 (Dense)               (None, 50)           8100        ['flatten_10[0][0]']             
                                                                                                  
 dense_21 (Dense)               (None, 3)            153         ['dense_20[0][0]']               
                                                                                           

In [58]:
LEARNING_RATE = 0.000001
BETA_1 = 0.9
BETA_2 = 0.999
EPS = 1e-07

# Better optimizer
lr_schedule = optimizers.schedules.ExponentialDecay(
    initial_learning_rate=LEARNING_RATE,
    decay_steps=10000,
    decay_rate=0.9)

scheduled_opt = optimizers.Adam(
    learning_rate=lr_schedule,
    beta_1=BETA_1,
    beta_2=BETA_2,
    epsilon=EPS)

In [59]:
opt = optimizers.Adam(
    learning_rate=LEARNING_RATE,
    beta_1=BETA_1,
    beta_2=BETA_2,
    epsilon=EPS)

model.compile(loss = my_loss, optimizer = scheduled_opt, metrics = [accuracy_metric, bce_metric, mse_metric, recall, precision])

In [60]:
NUM_EPOCHS = 20
history = model.fit(x_train[:,:,4:], y_train, epochs=NUM_EPOCHS, validation_data=(x_val[:,:,4:], y_val))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [30]:
train_no_sack = np.round(np.sum(y_train[:,0])/len(y_train), 3)
print(f"Percentage of training plays with no sack = {train_no_sack}")

Percentage of training plays with no sack = 0.941


In [31]:
model.predict(x_train[0:5,:,4:])



array([[ 0.30376208,  0.69623786,  1.5447886 ],
       [ 0.8638921 ,  0.13610792,  1.7055527 ],
       [ 0.35448074,  0.64551926, -0.32487267],
       [ 0.6310289 ,  0.36897105,  0.13501425],
       [ 0.8446053 ,  0.15539473, -0.7607045 ]], dtype=float32)

In [32]:
metrics_df = pd.DataFrame(history.history)


In [33]:
metrics_df.tail()

Unnamed: 0,loss,accuracy_metric,bce_metric,mse_metric,recall,precision,val_loss,val_accuracy_metric,val_bce_metric,val_mse_metric,val_recall,val_precision
15,6.631446,0.71374,0.555776,6.05003,0.23289,0.060413,4.734457,0.721799,0.537242,4.102384,0.332114,0.06964
16,7.17853,0.716803,0.553032,6.598741,0.202216,0.061228,4.674902,0.723323,0.533346,4.047948,0.332114,0.06964
17,6.722048,0.720222,0.547747,6.17324,0.221543,0.059844,4.620469,0.724848,0.529365,3.998605,0.319919,0.066232
18,6.708419,0.724639,0.543701,6.138453,0.218883,0.058618,4.56505,0.728659,0.525514,3.948172,0.319919,0.068914
19,5.872385,0.727109,0.541165,5.309736,0.243262,0.06443,4.514002,0.733994,0.521677,3.901999,0.319919,0.069912


In [34]:
num_no_sack = np.round(np.sum(y_val[:,0])/len(y_val), 3)
print(f"Percentage of validation plays with no sack = {num_no_sack}")

Percentage of validation plays with no sack = 0.947


In [35]:
val_loss, cat_acc, val_bce, val_mse, val_recall, val_precision = model.evaluate(x_val[:,:,4:], y_val, verbose=2)

41/41 - 0s - loss: 4.5140 - accuracy_metric: 0.7340 - bce_metric: 0.5217 - mse_metric: 3.9020 - recall: 0.3199 - precision: 0.0699 - 135ms/epoch - 3ms/step


In [36]:

print(f"val loss = {val_loss}")
print(f"categorical accuracy = {cat_acc}")
print(f"val_bce = {val_bce}")
print(f"val_mse = {val_mse}")
print(f"val_recall = {val_recall}")
print(f"val_precision = {val_precision}")

val loss = 4.514001846313477
categorical accuracy = 0.7339938879013062
val_bce = 0.5216765999794006
val_mse = 3.9019994735717773
val_recall = 0.319918692111969
val_precision = 0.0699116513133049


In [38]:
model.predict(x_val[:,:,4:])[:,0]



array([0.4653594 , 0.8970736 , 0.6486465 , ..., 0.89433753, 0.8607381 ,
       0.9926577 ], dtype=float32)

In [39]:
# https://www.kdnuggets.com/2021/02/saving-loading-models-tensorflow.html

model_string = f"models/fifth_model/weights_epochs{NUM_EPOCHS}"
model.save_weights(model_string)