#### Next two cells are only needed for a Google Colab environment.

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
%cd '/content/drive/My Drive/CodingProjects/skateboard_trick_classification'

In [0]:
import numpy as np

from keras.callbacks import EarlyStopping
from keras.layers import concatenate, Dense, Dropout, GlobalAveragePooling3D, Softmax
from keras.models import load_model, Model
from keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

from utils import config
from utils.data_generator import DataGenerator
from utils.i3d_inception import Inception_Inflated3d

## Data Generators

In [0]:
training_generator = DataGenerator(config.VIDEO_TRAINING_DIR, 
                                   config.RGB_TRAINING_BATCH_SIZE, 
                                   is_training=True, 
                                   spectrogram_dir=config.SPECTROGRAM_TRAINING_DIR)

validation_generator = DataGenerator(config.VIDEO_VALIDATION_DIR, 
                                     config.RGB_VALIDATION_BATCH_SIZE, 
                                     is_training=False, 
                                     spectrogram_dir=config.SPECTROGRAM_VALIDATION_DIR)

test_generator = DataGenerator(config.VIDEO_TEST_DIR, 
                                     config.RGB_TEST_BATCH_SIZE, 
                                     is_training=False, 
                                     spectrogram_dir=config.SPECTROGRAM_TEST_DIR)

# Train RGB+Audio Model

In [0]:
input_shape = (None, config.RGB_FRAME_HEIGHT, config.RGB_FRAME_WIDTH, config.CHANNELS)
i3d_model = Inception_Inflated3d(include_top=False, weights='rgb_imagenet_and_kinetics', 
                                 input_shape=input_shape, 
                                 classes=config.RGB_N_CLASSES)
i3d_model_output = Dropout(0.5)(i3d_model.output)

In [0]:
audio_model = load_model(config.AUDIO_MODEL_FILEPATH)
for layer in audio_model.layers:
    layer.trainable = False
audio_model_output = audio_model.get_layer('audio_predictions').output

In [0]:
models_merged = concatenate([i3d_model_output, audio_model_output])
x = Dense(config.RGB_N_CLASSES, name='predictions')(models_merged)
output = Softmax()(x)

model = Model(inputs=[i3d_model.input, audio_model.input], outputs=output)
model.compile(optimizer=Adam(lr=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [0]:
early_stopping = EarlyStopping(patience=4, restore_best_weights=True, verbose=1)
model.fit_generator(training_generator, 
                    epochs=100,
                    validation_data=validation_generator, 
                    callbacks=[early_stopping])

In [0]:
# recompiling the model will reduce the size of the saved model by removing the training related information
model.compile(optimizer=Adam(lr=0.00001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.save(config.RGB_FROZEN_AUDIO_MODEL_FILEPATH)

## Unfreeze Audio Model

In [0]:
model = load_model(config.RGB_FROZEN_AUDIO_MODEL_FILEPATH)

In [0]:
for layer in model.layers:
    layer.trainable = True
    
model.compile(optimizer=Adam(lr=0.00001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [0]:
early_stopping = EarlyStopping(patience=4, restore_best_weights=True, verbose=1)
model.fit_generator(training_generator, 
                    epochs=100,
                    validation_data=validation_generator, 
                    callbacks=[early_stopping])

In [0]:
# recompiling the model will reduce the size of the saved model by removing the training related information
model.compile(optimizer=Adam(lr=0.00001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.save(config.RGB_AUDIO_MODEL_FILEPATH)

# Evaluate Model

In [0]:
model = load_model(config.RGB_AUDIO_MODEL_FILEPATH)

## Validation Set

In [0]:
y_true = validation_generator.labels
predictions = model.predict_generator(validation_generator)
y_pred = np.argmax(predictions, axis=1)

report = classification_report(y_true, y_pred, 
                               target_names=config.RGB_CLASS_NAMES, 
                               digits=4)
print(report)
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

    kickflip     0.5769    0.6977    0.6316        43
360_kickflip     0.6667    0.7143    0.6897        42
       50-50     0.6970    0.5111    0.5897        45
   nosegrind     0.4762    0.4651    0.4706        43
  boardslide     0.6512    0.6667    0.6588        42
   tailslide     0.5625    0.5870    0.5745        46
        fail     0.8974    0.8537    0.8750        41

    accuracy                         0.6391       302
   macro avg     0.6468    0.6422    0.6414       302
weighted avg     0.6446    0.6391    0.6386       302

[[30  9  0  2  2  0  0]
 [11 30  0  0  0  0  1]
 [ 1  0 23  8  6  7  0]
 [ 4  2  4 20  5  6  2]
 [ 1  0  3  2 28  7  1]
 [ 3  1  3 10  2 27  0]
 [ 2  3  0  0  0  1 35]]


## Test Set

In [6]:
y_true = test_generator.labels
predictions = model.predict_generator(test_generator)
y_pred = np.argmax(predictions, axis=1)

report = classification_report(y_true, y_pred, 
                               target_names=config.RGB_CLASS_NAMES, 
                               digits=4)
print(report)
print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

    kickflip     0.6842    0.5200    0.5909        25
360_kickflip     0.6957    0.6400    0.6667        25
       50-50     0.5833    0.5600    0.5714        25
   nosegrind     0.5161    0.6400    0.5714        25
  boardslide     0.5185    0.5600    0.5385        25
   tailslide     0.5357    0.6000    0.5660        25
        fail     0.9130    0.8400    0.8750        25

    accuracy                         0.6229       175
   macro avg     0.6352    0.6229    0.6257       175
weighted avg     0.6352    0.6229    0.6257       175

[[13  7  0  1  1  2  1]
 [ 5 16  1  0  2  0  1]
 [ 0  0 14  3  2  6  0]
 [ 0  0  2 16  5  2  0]
 [ 0  0  5  5 14  1  0]
 [ 0  0  2  6  2 15  0]
 [ 1  0  0  0  1  2 21]]
