In [9]:
import tensorflow as tf
import numpy as np
import pandas as pd

ANN Prototype

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

model = Sequential([
    layers.Input(shape = (128,)),
    layers.Normalization(mean = 127, variance = 77.4),
    layers.Dense(200, activation='tanh'), #86
    layers.Dense(160, activation='tanh'), #64
    layers.Dense(140, activation='tanh'), #42
    layers.Dense(100, activation='tanh'), #21
    layers.Dense(60, activation='tanh'),
    layers.Dense(1, activation='relu')
])

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization_2 (Normalizati (None, 128)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 200)               25800     
_________________________________________________________________
dense_13 (Dense)             (None, 160)               32160     
_________________________________________________________________
dense_14 (Dense)             (None, 140)               22540     
_________________________________________________________________
dense_15 (Dense)             (None, 100)               14100     
_________________________________________________________________
dense_16 (Dense)             (None, 60)                6060      
_________________________________________________________________
dense_17 (Dense)             (None, 1)                

Test training

In [18]:
save_to_disk = True # Set to FALSE if we don't want this to overwrite the current model.

TrainFeatures = pd.read_csv('../data/trainFeaturesNewSplit.csv', header = 0, index_col = 0).drop(['video_id','time_stamp'], axis = 'columns')
TrainTargets = pd.read_csv('../data/trainTargetsNewSplit.csv', header = 0, index_col = 0).drop(['video_id','time_stamp'], axis = 'columns')
TrainTargets.iloc[:,0] = TrainTargets.iloc[:,0].astype(float)

EvalFeatures = pd.read_csv('../data/evalFeaturesNewSplit.csv', header = 0, index_col = 0).drop(['video_id','time_stamp'], axis = 'columns')
EvalTargets = pd.read_csv('../data/evalTargetsNewSplit.csv', header = 0, index_col = 0).drop(['video_id','time_stamp'], axis = 'columns')
EvalTargets.iloc[:,0] = EvalTargets.iloc[:,0].astype(float)

# Since our dataset is imbalanced, we need a weighted binary cross entropy loss function.
def weighted_bce(y_true, y_pred):
    weights = (y_true * 8) + 1.
    bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
    weighted_bce = tf.math.reduce_mean(bce * weights)
    return weighted_bce

# Here, we choose the loss function to minimize, the optimizer algorithm, and additional loss metrics to display on screen.
#model.compile(loss = weighted_bce, optimizer = 'adam', metrics = ['accuracy', tf.keras.metrics.AUC(name='auc')])
model.compile(loss = weighted_bce, optimizer = 'adam', metrics = ['accuracy'])

# To address the imbalance, need to adjust the class weights.
#cw = {
#    0: 63943 / 52493,
#    1: 63943 / 11450
#}

# Time to train! Epochs = # times the training algorithm goes through the whole set.
# Batch size = # elements the algorithm "sees" at a time between adjusting model parameters.
model.fit(TrainFeatures,TrainTargets, epochs = 10, batch_size = 8)

if save_to_disk:
    # Saving model to JSON and weights to H5.
    model_json = model.to_json()
    with open("../models/model.json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights("../models/model.h5")
    print("Saved model to disk")

# Measure accuracy on the evaluation set.
loss  = model.evaluate(EvalFeatures, EvalTargets)
print('Loss on Validation Set: ', loss)
print('Confusion matrix:')
predictions = np.round(model.predict(EvalFeatures))
predictions = np.minimum(predictions, np.ones((len(predictions),1))) # Sometimes the result can get rounded to 2.
true_labels = EvalTargets.iloc[:,0]

r = tf.keras.metrics.Recall()
r.update_state(true_labels, predictions)
print('Recall: ' + str(r.result().numpy()))

p = tf.keras.metrics.Precision()
p.update_state(true_labels, predictions)
print('Precision: ' + str(p.result().numpy()))

f1 = 2* r.result().numpy() * p.result().numpy() / (r.result().numpy() + p.result().numpy())
print('F1 Score: ' + str(f1))

conf_matrix = tf.math.confusion_matrix(true_labels, predictions)
conf_matrix

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Saved model to disk
Loss on Validation Set:  [1.2561132907867432, 0.8741105794906616]
Confusion matrix:
Recall: 0.7090484
Precision: 0.67785233
F1 Score: 0.6930995391920082


<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[9361,  864],
       [ 746, 1818]])>