In [56]:
import cv2
import numpy as np

import os
import sys

# Location of frames
training_image_src = '/mnt/disks/a/frames'
validation_image_src = '/mnt/disks/b/frames'

"""
The directory is divided into folders by the candidate number
Within each candidate's folder, the frames are further divided by the label
Naming convention of the frames is as follows: [candidate number]_[frame_number]_[label]
Single digit candidate numbers are padded with a 0
Frame numbers are consecutive and not padded
Label can be 0, 5 or 10
"""

# This function helps to extract data and labels and return it as a Numpy array from a given image file
def extract_data_and_label(image_path):
    # We use opencv to read the images as grayscale, this will give us the 2d vector of pixels
    # Note that it returns a numpy array and not a Python list, but Keras uses Numpy arrays anyway
    image = cv2.imread(image_path, cv2.cv2.IMREAD_GRAYSCALE)
    # Because some of the images are corrupt, we got to do this
    if image is None or image.data is None or image.size == 0:
        return None, None

    # Scale the images to a fixed size, second argument is the target dimension, chose an arbitrary
    # value for now, (100, 100). Additional arguments can be provided to fine-tune the scaling.
    image = cv2.resize(image, (100, 100))
    image = image / 255

    """
    !!! Should we extract only the faces? By right CNN is supposed to be able to pick out key features
    on its own, but this could possibly make it more effective. This can be done using opencv
    """

    # Next is to extract the labels for each image, in our case, it is just the last portion of the filename
    file_name = os.path.basename(image_path)
    label = int(os.path.splitext(file_name)[0].split('_')[2])
    # Convert to 0, 1 - we are only using images with labels 0 and 10 now
    label = 0 if label == 0 else 1
#     print(image.shape)
    return image, label

# Time to actually train the model
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, TimeDistributed, LSTM, Lambda, Input
from keras.utils import to_categorical
from keras.models import model_from_json
from keras.regularizers import l1
from keras.losses import BinaryCrossentropy

"""
Gonna have to do the procesing in batches because the images are too big to fit all on the ram at the same time.
To do so, we define a generator function that will help pull data in batches from the disks.

https://mc.ai/train-keras-model-with-large-dataset-batch-training/
"""
def batch_generator(files, batch_size):
    counter = 0
    while True:
        pixels = []
        labels = []

        # print('Generating batch...')
        while len(pixels) < batch_size:
            filename = files[counter]
            data, label = extract_data_and_label(filename)
            
            if data is None and label is None:
                counter = (counter + 1) % len(files)
                continue

            counter = (counter + 1) % len(files)
            pixels.append(data)
            labels.append(label)

        pixels = np.array(pixels)
        labels = np.array(labels)

        """
        Gotta reformat the data (once again) to a format that the Conv2D layer accepts. Conv2D layer
        is just the convulutional layer provided by keras.

        The target format is (w, x, y, z) where w is the number of total images, x and y is the shape of each image
        and z is 1 which signifies that the images are grayscale
        """
        pixels = pixels.reshape(batch_size, 100, 100, 1)

        """
        We one-hot-encode our labels to create 3 cateogories, 0 being mapped awake, 5 being mapped to normal and 10 being
        mapped to sleepy

        !!! Perhaps there can be a better way of encoding the output data? Will this method result in a loss of ordinality?
        """
        labels = to_categorical(labels, num_classes=2)
        # yield is a Python thing for generators
        print(pixels.shape)
        yield pixels, labels


# Let's instantiate our generators for the training and validation set
print('Creating generators...')
from functools import cmp_to_key
def compare(a, b):
    candidate_a, frame_a, label_a = os.path.splitext(os.path.basename(a))[0].split('_')
    candidate_b, frame_b, label_b = os.path.splitext(os.path.basename(b))[0].split('_')
    if candidate_a != candidate_b:
        return int(candidate_a) - int(candidate_b)
    elif label_a != label_b:
        return int(label_a) - int(label_b)
    else:
        return int(frame_a) - int(frame_b)


training_files = []
for root, dirs, files in os.walk(training_image_src):
    temp = []
    for file in files:
        file_path = os.path.join(root, file)
        temp.append(file_path)
    temp = sorted(temp, key=cmp_to_key(compare))
    temp = temp[:300]
    training_files = training_files + temp
    
training_files = list(filter(lambda x: '_5.jpg' not in x, training_files))
# for f in training_files:
#     print(f)


validation_files = []
for root, dirs, files in os.walk(validation_image_src):
    temp = []
    for file in files:
        file_path = os.path.join(root, file)
        temp.append(file_path)
    temp = sorted(temp, key=cmp_to_key(compare))
    temp = temp[:300]
    validation_files = validation_files + temp

validation_files = list(filter(lambda x: '_5.jpg' not in x, validation_files))
# for f in validation_files:
#     print(f)

batch_size = 300
training_generator = batch_generator(training_files, batch_size)
validation_generator = batch_generator(validation_files, batch_size)

"""
Now we create our model. Keras allows you to build models in a sequential manner or a functional manner. Sequential
is easier to understand for me. Its only a syntax difference.
"""
print('Creating model...')
# model_cnn = Sequential()
model = Sequential()

"""
The model is essentially what we learnt in the course, a series of layers of neurons and in this case, convulutions.

We can tweak the attributes of each layer, such as the size, activation function, etc. This is what they mean by
playing with the parameters.

I believe what is passed between layers are just Numpy arrays, so what happens is that a layer will take in a Numpy
array, transform it using its neurons/convulutions and return the resulting Numpy array.

Note that the input shape and output shape of each layer must match.
"""

# model_cnn.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(100,100,1),  activity_regularizer=l1(0.001)))
# model_cnn.add(MaxPooling2D(pool_size=(2, 2)))
# model_cnn.add(Conv2D(32, kernel_size=3, activation='relu', activity_regularizer=l1(0.001)))
# model_cnn.add(MaxPooling2D(pool_size=(2, 2)))
# # Flattens the 2D data into a 1D Numpy array
# model_cnn.add(Flatten())

# input_lay = Input(shape=(None, 100, 100, 1)) #dimensions of your data
# time_distribute = TimeDistributed(Lambda(lambda x: model_cnn(x)))(input_lay) # keras.layers.Lambda is essential to make our trick work :)
# lstm_lay = LSTM(10)(time_distribute)
# output_lay = Dense(2, activation='softmax')(lstm_lay)


model.add(TimeDistributed(Conv2D(64, kernel_size=3, activation='relu', activity_regularizer=l1(0.001)), input_shape=(300,50,100, 100, 1)))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
model.add(TimeDistributed(Conv2D(32, kernel_size=3, activation='relu', activity_regularizer=l1(0.001))))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
# Flattens the 2D data into a 1D Numpy array
# model.add(TimeDistributed(Flatten()))
# model.add((LSTM(4, return_sequences=True, dropout=0.5)))

# Let's use LSTM instead of a standard MLP layer toend
# model.add(LSTM(10, return_sequences=True))
model.add(Flatten())
model.add(Dense(2, activation='softmax'))
# model = Model(inputs=[input_lay], outputs=[output_lay])
model.compile(optimizer='adam', loss=BinaryCrossentropy(), metrics=['accuracy'])

print('Starting training...')
# Training the model only takes a simple function call
# Epochs is the number of passes over the dataset we want for the training
history = model.fit_generator(training_generator, validation_data=validation_generator, 
                              epochs=2, steps_per_epoch=np.ceil(len(training_files)/batch_size), 
                              validation_steps=np.ceil(len(validation_files)/batch_size), 
                              verbose=1, shuffle=True)


# # Save model to json for future use
# model_json = model.to_json()
# with open("cnn.json", "w") as json_file:
#     json_file.write(model_json)
# # Save weights for future use
# model.save_weights("model.h5")

# You can save model and weights together
model.save("cnnlstm.h5");

# What follows is just a few library calls to plot the results throughout the course of the training
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig('cnnlstm_accuracy.png')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig('cnnlstm_loss.png')
plt.show()


Creating generators...
Creating model...


ValueError: strides should be of length 1, 3 or 5 but was 2