# Multi-Modal Regression Model

## Imports

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import sys
sys.path.insert(1, '../')

from tcn import TCN, compiled_tcn
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Input, concatenate
from sklearn.preprocessing import StandardScaler
from data_preparation import prepare_x_data, get_Y_labels, unscale_Y, reshape_Y, reshape_X

In [None]:
scaler = StandardScaler()

## Data preparation

Data preparation includes: <br>
<ul>
  <li>selecting necessary features from source files</li>
  <li>creating combined dataset for the model training</li>
  <li>reshaping data for model training.</li>

In [None]:
x_train_visual = prepare_x_data('../Data/LLDs_video_openface/train',',', 5, scaler)
x_train_audio = prepare_x_data('../Data/LLDs_audio_eGeMAPS/train',';', 2, scaler)

y_train = get_Y_labels('../Data/labels_metadata.csv', 60, 164, scaler)
y_train = reshape_Y(y_train,len(x_train_visual),1,1)

In [None]:
x_test_visual = prepare_x_data('../Data/LLDs_video_openface/dev',',', 5, scaler) 
x_test_audio = prepare_x_data('../Data/LLDs_audio_eGeMAPS/dev',';', 2, scaler)

y_test = get_Y_labels('../Data/labels_metadata.csv', 0, 60, scaler)
y_test = reshape_Y(y_test,len(x_test_visual),1,1)

In [None]:
v_len_dict = {}
a_len_dict = {}

for idx, df in enumerate(x_train_visual):
    v_len_dict.update({idx:len(df)})

for idx, df in enumerate(x_train_audio):
    a_len_dict.update({idx:len(df)})

# Get sequence with highest number of rows for hyper-parameter tuning.
v_max_len = max(v_len_dict, key=v_len_dict.get)
a_max_len = max(a_len_dict, key=a_len_dict.get)

v_max_len = {v_max_len:v_len_dict.get(v_max_len)}
a_max_len = {a_max_len:a_len_dict.get(a_max_len)}

print(f'Longest seq audio: {a_max_len}')
print(f'Longest seq video: {v_max_len}')


# Temporal Convolutional Neural Network

Proposed model for temporal convolutional neural network architecture for multi-modal dataset. Both data modalities are concatenated together before the last Dense layer.

In [None]:
# Define input layer.
inputA = Input(shape=(None, 465))
inputB = Input(shape=(None, 23))

# Define hidden layer.
tcn_v = TCN(
    nb_filters=213, 
    kernel_size=35, 
    dilations=(128,256,512,1024), 
    padding='causal', 
    nb_stacks=1, 
    use_batch_norm=False, 
    use_layer_norm=False, 
    use_weight_norm=False, 
    use_skip_connections=True, 
    dropout_rate=0.004, 
    return_sequences=False, 
    input_shape=(None, 465))(inputA)

tcn_a = TCN(
    nb_filters=11, 
    kernel_size=34, 
    dilations=(128,256,512,1024), 
    padding='causal',
    nb_stacks=1, 
    use_batch_norm=False, 
    use_layer_norm=False, 
    use_weight_norm=False, 
    use_skip_connections=True,
    dropout_rate=0.003, 
    return_sequences=False, 
    input_shape=(None, 23))(inputB)


# Define output layer.
# output_v = Dense(8, activation='relu')(tcn_v)
# output_a = Dense(8, activation='relu')(tcn_a)

combined_input = concatenate([tcn_v, tcn_a])

output = Dense(1, activation='linear')(combined_input)

# Define optimizer and show summary.
model = Model(inputs=[inputA, inputB], outputs=[output])
model.compile(optimizer=Adam(learning_rate=0.003), loss='mse', metrics=['mae'])
model.summary()

In [None]:
plot_model(model=model, show_dtype=False, show_layer_names=True, show_shapes=True, to_file='TCN_multi.png')

### Model training

Model training setup is based on an iterative approach where model is trained one file at a time, then learned parameters are saved and loaded in the next iterative step. This setup is necessary due to the fact that source files does not have an uniform size and differ in number of frames.

In [None]:
early_stop = EarlyStopping(monitor='loss', patience=10)

In [None]:
# Fit the model one file at a time.
train_index = 0
val_index = 0
loss = {}
train_eval = []
train_loss = []
train_mae = []

for train_v, train_a in zip(x_train_visual, x_train_audio):
    
    train_v = np.array(train_v).reshape((1, train_v.shape[0], -1))
    train_a = np.array(train_a).reshape((1, train_a.shape[0], -1))

    history = model.fit(x=[train_v, train_a], y=y_train[train_index], epochs=10, shuffle=False, verbose=0, callbacks=[early_stop])
    loss.update({train_index:history.history})

    train_index += 1

    # Save model
    model.save(r'TCN_Multi', include_optimizer=True)

    # Load model
    model = load_model(r'TCN_Multi', custom_objects={'TCN':TCN})

    scores = model.evaluate([train_v, train_a], y_train[val_index], verbose = 0)
    train_eval.append(scores)
    train_loss.append(scores[0])
    train_mae.append(scores[1])

    val_index += 1

In [None]:
avg_train_loss = sum(train_loss) / len(train_loss)
avg_train_mae = sum(train_mae) / len(train_mae)
print("Train loss (avg):", avg_train_loss, "Train MAE (avg):", avg_train_mae)

In [None]:
plt.xlabel('Iterations')
plt.ylabel('AVG MSE Loss')
plt.plot(train_loss, label='MSE')
plt.plot(train_mae, label='MAE')
plt.show()

### Model evaluation

Model evaluation is performed on subset taken from test data. 

In [None]:
x_val_visual, x_val_audio = x_test_visual[:30], x_test_audio[:30]
y_val = y_test[:30]

In [None]:
eval_loss = []
eval_mae = []
evaluation = []
index = 0

for input_visual, input_audio in zip(x_val_visual, x_val_audio):
    input_visual = reshape_X(input_visual)
    input_audio = reshape_X(input_audio)

    scores = model.evaluate(
        [input_visual,input_audio], 
        y_val[index],
        verbose = 0)

    evaluation.append(scores)    
    eval_loss.append(scores[0])
    eval_mae.append(scores[1])
    
    index += 1

In [None]:
avg_eval_loss = sum(eval_loss) / len(eval_loss)
avg_eval_mae = sum(eval_mae) / len(eval_mae)
print("Validation loss (avg):", avg_eval_loss, "Validation MAE (avg):", avg_eval_mae)

In [None]:
plt.xlabel("Iterations")
plt.ylabel("Scaled YMRS value")
plt.plot(eval_loss, label="MSE")
plt.plot(eval_mae, label="MAE")
plt.legend()
plt.show()

### YMRS prediction and comparison

Prediction is made on different subset taken from the test dataset. Then actual and predicted YMRS values are compared.

In [None]:
x_pred_visual, x_pred_audio = x_test_visual[30:], x_test_audio[30:]
y_pred_actual = y_test[30:]

In [None]:
prediction = []
pred_eval = []
pred_loss = []
pred_mae = []
test_index = 0

for test_v, test_a in zip(x_pred_visual, x_pred_audio):
    
    test_v = np.array(test_v).reshape((1, test_v.shape[0], -1))
    test_a = np.array(test_a).reshape((1, test_a.shape[0], -1))

    pred = model.predict([test_v, test_a])
    prediction.append(pred)

    scores = model.evaluate([test_v,test_a], y_pred_actual[test_index], verbose=0)
    pred_eval.append(scores)
    pred_loss.append(scores[0])
    pred_mae.append(scores[1])

    test_index += 1


In [None]:
avg_pred_loss = sum(pred_loss) / len(pred_loss)
avg_pred_mae = sum(pred_mae) / len(pred_mae)
print("Prediction loss (avg):", avg_pred_loss, "Prediction MAE (avg):", avg_pred_mae)

In [None]:
prediction = np.array(prediction).reshape(-1, 1)
y_pred_actual = y_pred_actual.reshape(-1, 1)

In [None]:
prediction = unscale_Y(prediction, scaler)
y_pred_actual = unscale_Y(y_pred_actual, scaler)

In [None]:
pred_df = pd.DataFrame(data=np.column_stack((y_pred_actual, prediction)), columns=['y_actual','y_pred'])
pred_df['pred_error'] = pred_df['y_actual'] - pred_df['y_pred']
pred_df = pred_df.sort_values(by=['y_actual']).reset_index()
pred_df['y_actual'] = pred_df['y_actual'].apply(np.int64)
pred_df

In [None]:
# Scatter plot x = actual, y = predicted
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.scatter(pred_df.y_actual, pred_df.y_pred)
plt.show()

In [None]:
# Scatter plot x = actual, y = actual - predicted
plt.xlabel('Actual')
plt.ylabel('Actual - Predicted')
plt.scatter(pred_df.y_actual, pred_df.pred_error)
plt.show()

In [None]:
plt.xlabel("Test subject IDs")
plt.ylabel("Target value (YMRS)")
plt.plot(pred_df.y_actual, label="Actual")
plt.plot(pred_df.y_pred, label="Predicted")
plt.legend()
plt.show()