In [1]:
# import all necessary packages
import tensorflow as tf
import os
import numpy as np
import time
from sklearn.preprocessing import LabelBinarizer 

In [2]:
# setup constants
DATASET_DIRECTORY = 'C:/Users/thoma/Documents/CSU East Bay/2nd Year/Fall 2019/CS 663/Projects/Project 3/Video Dataset (Full)'
TENSORBOARD_LOG_DIRECTORY = "logs"
NUM_EPOCHS = 20
SEQUENCE_LENGTH = 40
FEATURE_LENGTH = 1280
BATCH_SIZE = 5

In [3]:
LABELS = ['MoveLeft','MoveRight','MoveStraight']
encoder = LabelBinarizer()
encoder.fit(LABELS)

LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)

In [4]:
# setup a keras Sequential model with 1) Masking layer  2) LSTM layer with 512 cells, dropout 0.5, recurrent_dropout of 0.5  
# 3) a fully connected relu activation layer with 256 outputs,  4) a droupout layer 5) a final decision fully connected layer of length of labels
# (which is the number of classes) with softmax activation
model = tf.keras.Sequential([
    tf.keras.layers.Masking(mask_value=0.),
    tf.keras.layers.LSTM(512, dropout=0.5, recurrent_dropout=0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(LABELS), activation='softmax')
])

In [5]:
# compile the model
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy', 'top_k_categorical_accuracy'])

In [6]:
# get all of the filenames and paths for the training and validation datasets
training_file = os.path.join(DATASET_DIRECTORY,'TrainList.txt')
validation_file = os.path.join(DATASET_DIRECTORY,'ValidateList.txt')

with open(training_file) as f:
    training_list = [row.strip() for row in list(f)]

with open(validation_file) as f:
    validation_list = [row.strip() for row in list(f)]

In [7]:
# function to create a generator that will yield a numpy array of video
# features and the encoded class label
def make_generator(file_list):
    def generator():
        np.random.shuffle(file_list)
        for path in file_list:
            class_label = os.path.basename(os.path.dirname(path))
            features = np.load(path)

            padded_sequence = np.zeros((SEQUENCE_LENGTH, FEATURE_LENGTH))
            padded_sequence[0:len(features)] = np.array(features)

            transformed_label = encoder.transform([class_label])
            yield padded_sequence, transformed_label[0]
    return generator

In [8]:
# setup the training and validation datasets   
train_dataset = tf.data.Dataset.from_generator(make_generator(training_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=((SEQUENCE_LENGTH, FEATURE_LENGTH), (len(LABELS))))
train_dataset = train_dataset.batch(BATCH_SIZE,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

valid_dataset = tf.data.Dataset.from_generator(make_generator(validation_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=((SEQUENCE_LENGTH, FEATURE_LENGTH), (len(LABELS))))
valid_dataset = valid_dataset.batch(BATCH_SIZE,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

In [10]:
# create the directory for the Tensorboard logging files
if not os.path.exists(TENSORBOARD_LOG_DIRECTORY):
    os.mkdir(TENSORBOARD_LOG_DIRECTORY)

In [11]:
# fit the model using the datasets and use TensorBoard to view the model's training results
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = TENSORBOARD_LOG_DIRECTORY, update_freq='epoch')
model.fit(train_dataset, epochs=NUM_EPOCHS, callbacks=[tensorboard_callback], validation_data=valid_dataset)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x205782999b0>

In [12]:
# load the TensorBoard notebook extension.
# if TensorBoard does not begin in the notebook then open a new broswer tab 
# and in the search bar type 'http://localhost:8086' to view the TensorBoard results
%load_ext tensorboard
%tensorboard --logdir="logs" --host localhost --port=8086

ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 14344.

In [13]:
# save model as a HDF5 file with the current date and time in the filename
timestr = time.strftime("%Y%m%d-%H%M%S")
model_file = "LSTM_MODEL_V1_" + timestr + ".h5"
model.save(model_file)