# GFE Classification: Training & Testing Using Neural Networkss

#####  This notebook aims to use a Convolutional Nueral Network (CNN) to classify facial captured footage as a specified emotion. Data exploration, training, and modelling will all be discussed below and mitigations will be provided. 
###### (i) Train/test SVM on GFE data on a single emotion and evaluate performance measures
###### (ii) Repeat test on a different facial expression
###### (iii) Invert the roles of the Users
###### (iiii) Use Dimensionality Reduction

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv1D, MaxPooling1D, BatchNormalization
import pandas as pd 
import numpy as np 
from sklearn import preprocessing
from sklearn.metrics import accuracy_score,confusion_matrix, precision_score, recall_score,roc_auc_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import time
import plotly.express as px
from keras.wrappers.scikit_learn import KerasClassifier
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split,GridSearchCV,KFold, cross_val_score
from keras.models import load_model
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold

## Train on User A and Test on User B "Negative" 

##### Load dataset and pre-process

In [None]:
# select emotion
emotion = "negative" 

# read in data file
df_neg = pd.read_csv(f"grammatical_facial_expression/a_{emotion}_datapoints.txt",delimiter = " ",)
df_neg_target = pd.read_csv(f"grammatical_facial_expression/a_{emotion}_targets.txt",delimiter = " ",header=None)

# read in User B datafile
df_neg_userb = pd.read_csv(f"grammatical_facial_expression/b_{emotion}_datapoints.txt",delimiter = " ",)
df_neg_target_userb = pd.read_csv(f"grammatical_facial_expression/b_{emotion}_targets.txt",delimiter = " ",header=None)

# combine both dataframes using the target dataset
df_neg['target'] = df_neg_target
df_neg_userb['target'] = df_neg_target_userb

# split train/test and validation
X_neg = df_neg.iloc[:,1:-1]
y_neg = df_neg.iloc[:,-1]
X_neg_userb = df_neg_userb.iloc[:,1:-1]
y_neg_userb = df_neg_userb.iloc[:,-1]

# scale data
scaler = preprocessing.StandardScaler()
X_neg_scaled = scaler.fit_transform(X_neg)
X_neg_scaled_userb = scaler.fit_transform(X_neg_userb)

# define one hot encoding
encoder = OneHotEncoder(sparse=False)

In [None]:
# Pre-process data for CNN
# Add additional dimension as holder dimension for CNN
X_neg_scaled_transform = np.expand_dims(X_neg_scaled,axis=2)
X_neg_scaled_transform_userb = np.expand_dims(X_neg_scaled_userb,axis=2)

# define one hot encoding
encoder = OneHotEncoder(sparse=False)

# tHot encode the output to produce a two class probability response to represent the binary output
y_neg_onehot = encoder.fit_transform(df_neg_target)
y_neg_onehot_userb = encoder.fit_transform(df_neg_target_userb)

## Generate NN
Create NN below and create function to produce performance plots.

In [None]:
def NN_model(activation, dropout_rate=0.2):
    # define model instance
    model = Sequential()

    # add 1D conv layer 300 in length 
    model.add(Dense(300,activation = activation, input_shape=(300,1)))

    # add dropout layer for regularization to avoid over-fitting
    model.add(Dropout(dropout_rate))

    # add 2nd layer 100 length
    model.add(Dense(100,activation = activation, input_shape = (100,1)))

    # add dropout layer 
    model.add(Dropout(dropout_rate))

    # 3rd layer 10 in length
    model.add(Dense(10, activation=activation))


    # Add dense layer
    model.add(Flatten())
    model.add(Dense(10, activation = activation))

    # use softmax to create two class output based on probability
    model.add(Dense(2))
    model.add(Activation('softmax'))

    # compile model
    model.compile(loss="binary_crossentropy",
                 optimizer="adam",
                 metrics=['accuracy'])

    return model

def plot_loss(model, emotion, user):
    stats = pd.DataFrame(model.history)
    stats.reset_index(level=0, inplace=True)
    stats = stats.melt(id_vars =['index'])
    stats.columns= ['Epoch','metric', 'Value']

    fig = px.line(stats,x='Epoch', y="Value", color='metric',title=f"NN Model Statistics for {emotion}: {user}")
    fig.update_yaxes(range=[0, 1])

    return fig

##### Use GridSearchCV to hyperparameter tune

In [None]:
# define model as Keras object
model = KerasClassifier(build_fn=NN_model, verbose=True, epochs=8, batch_size = 100)

# define parameter dictionaries
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
dropout = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]

# create param grid
param_grid = dict(activation=activation, dropout_rate = dropout)

# grid search 
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=5)

# Fit data
grid_result = grid.fit(X_neg_scaled_transform, y_neg_onehot)

In [None]:
# Return best results
print(f"Best hyperparameter values: {grid_result.best_params_}")
print(f"Cross fold validation best score {grid_result.cv_results_['mean_test_score'][grid_result.best_index_]}")

Best hyperparameter values: {'activation': 'relu', 'dropout_rate': 0.3}
Cross fold validation best score 0.8851944327354431

5-Cross fold validation identified 'relu' as the best activation function and a dropout_rate of 0.3 to be the best model fit.

### Train/Test on "Negative"

In [None]:
# model instance with tuned hyperparameters
model_neg_usera = NN_model(activation='relu',dropout_rate=0.3)

# fit model data
model_train = model_neg_usera.fit(X_neg_scaled_transform,y_neg_onehot,validation_split=0.2,batch_size = 100, epochs = 20, verbose=False)

# produce plot of performance
plot_loss(model_train, 'Negative','User A')

The performance statistics above indicate that after 5 epochs the validation loss begins. 

###### Test Final Model for Negative

In [None]:
# determine final model with optimal epochs
model_neg_usera = NN_model(activation='relu',dropout_rate=0.3)
final_model = model_neg_usera.fit(X_neg_scaled_transform,y_neg_onehot,batch_size=100,epochs=6, verbose = False)

# save final model weights
#model_neg_usera.save_weights('models/nn_neg_usera.h5')

In [None]:
# load model weights
model_neg_usera.load_weights("models/nn_neg_usera.h5")

# make predictions on test data and round
y_pred = model_neg_usera.predict(X_neg_scaled_transform_userb)
y_pred = np.round(y_pred)

# undo hot encoding in order to test accuracy
y_pred = y_pred[:,1]

# calculate model accuracy
acc = accuracy_score(y_neg_userb, y_pred)

# calculate model precision
prec = precision_score(y_neg_userb, y_pred)

# calculate model recall
recall = recall_score(y_neg_userb, y_pred)

print("Model Accuracy:", acc)
print("Model Precision:", prec)
print("Model Recall:", recall)

print("Model AUC Score:" ,roc_auc_score(y_neg_userb, y_pred))

Testing this model with User B data provides an accuracy of 60.8%, precision of 55.4%, and recall of 57.7%. Compared to the SVM results, this indicates little difference in performance between the two models.

## Train on User A and Test on User B "Emphasis"

##### Load dataset and pre-process

In [None]:
# select emotion
emotion = "emphasis" 

# read in data file
df_emp = pd.read_csv(f"grammatical_facial_expression/a_{emotion}_datapoints.txt",delimiter = " ",)
df_emp_target = pd.read_csv(f"grammatical_facial_expression/a_{emotion}_targets.txt",delimiter = " ",header=None)

# read in User B datafile
df_emp_userb = pd.read_csv(f"grammatical_facial_expression/b_{emotion}_datapoints.txt",delimiter = " ",)
df_emp_target_userb = pd.read_csv(f"grammatical_facial_expression/b_{emotion}_targets.txt",delimiter = " ",header=None)

# combine both dataframes using the target dataset
df_emp['target'] = df_emp_target
df_emp_userb['target'] = df_emp_target_userb

# split train/test and validation
X_emp = df_emp.iloc[:,1:-1]
y_emp = df_emp.iloc[:,-1]
X_emp_userb = df_emp_userb.iloc[:,1:-1]
y_emp_userb = df_emp_userb.iloc[:,-1]

# scale data
scaler = preprocessing.StandardScaler()
X_emp_scaled = scaler.fit_transform(X_emp)
X_emp_scaled_userb = scaler.fit_transform(X_emp_userb)

In [None]:
# Pre-process data for CNN
# Add additional dimension as holder dimension for CNN
X_emp_scaled_transform = np.expand_dims(X_emp_scaled,axis=2)
X_emp_scaled_transform_userb = np.expand_dims(X_emp_scaled_userb,axis=2)

# define one hot encoding
encoder = OneHotEncoder(sparse=False)

# tHot encode the output to produce a two class probability response to represent the binary output
y_emp_onehot = encoder.fit_transform(df_emp_target)
y_emp_onehot_userb = encoder.fit_transform(df_emp_target_userb)

#### Train NN model

The previous model architecture will be applied to "Emphasis" to see if the model is flexible enough to perform on multiple emotions.

In [None]:
# re-define model with previously tuned hyperparameters
model_emp_usera = NN_model(activation='relu',dropout_rate=0.3)
model_training = model_emp_usera.fit(X_emp_scaled_transform, y_emp_onehot,validation_split=0.2,batch_size=100,epochs=30,verbose=False)

# plot loss
plot_loss(model_training,'Emphasis', 'User A')

The performance statistics above indicate that after 10 epochs the validation loss bottoms out, indicating that the model may begin overfitting after.

###### Test Final Model for Emphasis

In [None]:
# determine final model
model_emp_usera = NN_model(activation='relu',dropout_rate=0.3)
final_model = model_emp_usera.fit(X_emp_scaled_transform,y_emp_onehot,batch_size=100,epochs=10,validation_split=.2, verbose=False)

# save final model weights
#model_emp_usera.save_weights('models/nn_emp_usera.h5')

In [None]:
# load model weights
model_emp_usera.load_weights("models/nn_emp_usera.h5")

# make predictions on test data and round
y_pred = model_emp_usera.predict(X_emp_scaled_transform_userb)
y_pred = np.round(y_pred)

# undo hot encoding in order to test accuracy
y_pred = y_pred[:,1]

# calculate model accuracy
acc = accuracy_score(y_emp_userb, y_pred)

# calculate model precision
prec = precision_score(y_emp_userb, y_pred)

# calculate model recall
recall = recall_score(y_emp_userb, y_pred)

print("Model Accuracy:", acc)
print("Model Precision:", prec)
print("Model Recall:", recall)

print("Model AUC Score:" ,roc_auc_score(y_emp_userb, y_pred))

Model Accuracy: 0.8727678571428571   
Model Precision: 0.8237410071942446  
Model Recall: 0.8625235404896422    
Model AUC Score: 0.8709911675387941   

The results indicate a well-performing model with 87.3% accuracy, 82.3% precision, and 86.3% recall. The AUC of the ROC is 87.1%, which is good. Overall the model is well fitting, and this suggests that the model is flexible enough to perform on different emotions.

## Train on User B and Test on User A
#### Negative Emotion

In [None]:
# define model instance from above
# fit model data
model_neg_userb = NN_model(activation='relu',dropout_rate=0.3)
model_train = model_neg_userb.fit(X_neg_scaled_transform_userb,y_neg_onehot_userb,validation_split=0.2,batch_size = 100, epochs = 15, verbose=False)

# produce plot of performance
plot_loss(model_train, 'Negative', "User B")

Model training on User B is very poor, as validation loss increases throughout each epoch. Use standard epoch = 5.

###### Determine Final Model for Negative User B

In [None]:
# determine final model
model_neg_userb.fit(X_neg_scaled_transform_userb,y_neg_onehot_userb,batch_size=100,epochs=5,validation_split=.2, verbose=False)

# save final model weights
#model_neg_userb.save_weights('models/nn_neg_userb.h5')

In [None]:
# load model weights
final_model = model_neg_userb.load_weights("models/nn_neg_userb.h5")

# make predictions on test data and round
y_pred = model_neg_userb.predict(X_neg_scaled_transform)
y_pred = np.round(y_pred)

# undo hot encoding in order to test accuracy
y_pred = y_pred[:,1]

# calculate model accuracy
acc = accuracy_score(y_neg, y_pred)

# calculate model precision
prec = precision_score(y_neg, y_pred)

# calculate model recall
recall = recall_score(y_neg, y_pred)

print("Model Accuracy:", acc)
print("Model Precision:", prec)
print("Model Recall:", recall)

print("Model AUC Score:" ,roc_auc_score(y_neg, y_pred))

Model Accuracy: 0.5569395017793595  
Model Precision: 0.519280205655527  
Model Recall: 0.7651515151515151   
Model AUC Score: 0.5688173683140125  

Model accuracy when training on User B and testing on User A for "Negative" results in a poor model with 55.6% accuracy, 51.9.3% precision, and 76.5% recall.

#### Emphasis Emotion

In [None]:
# define model instance from above
# fit model data
model_emp_userb = NN_model(activation='relu',dropout_rate=0.3)
model_train = model_emp_userb.fit(X_emp_scaled_transform_userb,y_emp_onehot_userb,validation_split=0.2,batch_size = 100, epochs = 20, verbose=False)

# produce plot of performance
plot_loss(model_train)

Model performance indicates that after 2 epochs the validation loss and accuracy plateaus. Therefore use epoch 2 or 3.

###### Determine Final Model for Emphasis User B

In [None]:
#final model
model = NN_model(activation='relu',dropout_rate=0.3)
final_model = model_emp_userb.fit(X_emp_scaled_transform_userb,y_emp_onehot_userb,batch_size=100,epochs=2,validation_split=.2, verbose=False)

# save final model weights
#model_emp_userb.save_weights('models/nn_emp_userb.h5')

In [None]:
# load model weights
model_emp_userb.load_weights("models/nn_emp_userb.h5")

# make predictions on test data and round
y_pred = model_emp_userb.predict(X_emp_scaled_transform)
y_pred = np.round(y_pred)

# undo hot encoding in order to test accuracy
y_pred = y_pred[:,1]

# calculate model accuracy
acc = accuracy_score(y_emp, y_pred)

# calculate model precision
prec = precision_score(y_emp, y_pred)

# calculate model recall
recall = recall_score(y_emp, y_pred)

print("Model Accuracy:", acc)
print("Model Precision:", prec)
print("Model Recall:", recall)

print("Model AUC Score:" ,roc_auc_score(y_emp, y_pred))

Model accuracy for "Emphasis" when testing on User A indicates an accuracy of 71.6%, precision of 40.6%, recall of 44.5%, and AUC of 62.2%. Overall this is a poor fitting model.

The neural network that was built above robust enough to perform well on both "Negative" and "Emphasis" emotions when training on User A and testing on User B. However when training on User B and testing on User A, the model failed to perform well on both emotions.

# Different Feature Representation: Multiple Input Neural Network

Using the paper provided in the assigment, the data will be fed to the model by facial feature (eye, mouth, nose, etc). There are 10 facial features identified in the paper of the dataset, and each input will have a two dimensional shape for each coordinate axis (x,y,z) and the number of corresponding datapoints per facial feature.

This method will be tested for the emotions "Negative" and "Emphasis" to see if the model is flexible enough to perform on both emotions, and also will be trained and tested on both User A and User B to see if this model can perform well when trained on both users.

In [None]:
# split data by x,y, and z coordinates
xs = X_neg_scaled[:,0::3]
ys = X_neg_scaled[:,1::3]
zs = X_neg_scaled[:,2::3]

# combine data into 3-dimensional array shape (# of rows, # of dimensions, number of datapoints per dimension)
X_neg_scaled_coord = np.swapaxes(np.array((xs,ys,zs)),0,1)

# Perform on user B for testing
xs_userb = X_neg_scaled_userb[:,0::3]
ys_userb = X_neg_scaled_userb[:,1::3]
zs_userb = X_neg_scaled_userb[:,2::3]

X_neg_scaled_coord_userb = np.swapaxes(np.array((xs_userb,ys_userb,zs_userb)),0,1)

##### Split data into 10 facial features based on index

In [None]:
# For User A Negative
left_eye =  X_neg_scaled_coord[:,:,0:8]
right_eye = X_neg_scaled_coord[:,:,8:16]
left_eyebrow =  X_neg_scaled_coord[:,:,16:26]
right_eyebrow =  X_neg_scaled_coord[:,:,26:36]
nose = X_neg_scaled_coord[:,:,36:48]
mouth = X_neg_scaled_coord[:,:,48:68]
face_contour = X_neg_scaled_coord[:,:,68:87]
iris = X_neg_scaled_coord[:,:,87:90]
left_line = X_neg_scaled_coord[:,:,90:95]
right_line = X_neg_scaled_coord[:,:,95:100]




# For User B Negative
left_eye_userb =  X_neg_scaled_coord_userb[:,:,0:8]
right_eye_userb = X_neg_scaled_coord_userb[:,:,8:16]
left_eyebrow_userb =  X_neg_scaled_coord_userb[:,:,16:26]
right_eyebrow_userb =  X_neg_scaled_coord_userb[:,:,26:36]
nose_userb = X_neg_scaled_coord_userb[:,:,36:48]
mouth_userb = X_neg_scaled_coord_userb[:,:,48:68]
face_contour_userb = X_neg_scaled_coord_userb[:,:,68:87]
iris_userb = X_neg_scaled_coord_userb[:,:,87:90]
left_line_userb = X_neg_scaled_coord_userb[:,:,90:95]
right_line_userb = X_neg_scaled_coord_userb[:,:,95:100]

# define 10 facial features 
train = [left_eye,right_eye,left_eyebrow,right_eyebrow,nose,mouth,face_contour,iris,left_line,right_line]
train_userb = [left_eye_userb,right_eye_userb,left_eyebrow_userb,right_eyebrow_userb,nose_userb,mouth_userb,face_contour_userb,iris_userb, left_line_userb, right_line_userb]

###### Multiple Input Neural Network

In [None]:
def multi_NN_model(activation = "relu", dropout_rate=0.3):
    # define the activation function as tanh (best performing)
    # define 10 inputs per facial expression, and the given shapes
    input_left_eye = Input(shape=(3,8))
    input_right_eye = Input(shape=(3,8))
    input_left_eyebrow = Input(shape=(3,10))
    input_right_eyebrow = Input(shape=(3,10))
    input_nose = Input(shape=(3,12))
    input_mouth = Input(shape=(3,20))
    input_face_contour = Input(shape=(3,19))
    input_iris = Input(shape=(3,3))
    input_left_line = Input(shape=(3,5))
    input_right_line = Input(shape=(3,5))

    # define input layer and produce outputs of 1 for each in order to be concatenated into a hidden layer of length 10
    left_eye = Dense(8,activation=activation)(input_left_eye)
    left_eye = Dense(1,activation=activation)(left_eye)
    left_eye = Model(inputs=input_left_eye, outputs=left_eye)

    right_eye = Dense(8,activation=activation)(input_right_eye)
    right_eye = Dense(1,activation=activation)(right_eye)
    right_eye = Model(inputs=input_right_eye, outputs=right_eye)

    left_eyebrow = Dense(10,activation=activation)(input_left_eyebrow)
    left_eyebrow = Dense(1,activation=activation)(left_eyebrow)
    left_eyebrow = Model(inputs=input_left_eyebrow, outputs=left_eyebrow)

    right_eyebrow = Dense(10,activation=activation)(input_right_eyebrow)
    right_eyebrow = Dense(1,activation=activation)(right_eyebrow)
    right_eyebrow = Model(inputs=input_right_eyebrow, outputs=right_eyebrow)

    nose = Dense(12,activation=activation)(input_nose)
    nose = Dense(1,activation=activation)(nose)
    nose = Model(inputs=input_nose, outputs=nose)

    mouth = Dense(20,activation=activation)(input_mouth)
    mouth = Dense(1,activation=activation)(mouth)
    mouth = Model(inputs=input_mouth, outputs=mouth)

    face_contour = Dense(19,activation=activation)(input_face_contour)
    face_contour = Dense(1,activation=activation)(face_contour)
    face_contour = Model(inputs=input_face_contour, outputs=face_contour)

    iris = Dense(3,activation=activation)(input_iris)
    iris = Dense(1,activation=activation)(iris)
    iris = Model(inputs=input_iris, outputs=iris)

    left_line = Dense(3,activation=activation)(input_left_line)
    left_line = Dense(1,activation=activation)(left_line)
    left_line = Model(inputs=input_left_line, outputs=left_line)

    right_line = Dense(3,activation=activation)(input_right_line)
    right_line = Dense(1,activation=activation)(right_line)
    right_line = Model(inputs=input_right_line, outputs=right_line)

    # concatenate outputs and define layer 1
    layer1 = concatenate([left_eye.output, right_eye.output, left_eyebrow.output,right_eyebrow.output, 
                          nose.output, mouth.output,face_contour.output,iris.output,left_line.output,right_line.output])
    
    # add dropout to avoid overfit
    layer1 = BatchNormalization()(layer1)
    layer1 = Dropout(dropout_rate)(layer1)

    # Layer 2 is a dense layer of length 10 for each facial feature
    layer2 = Dense(10,activation=activation)(layer1)
    # Flatten the layer to prepare for the output
    layer2 = Flatten()(layer2)

    # Layer three is the output layer
    layer3 = Dense(2, activation="softmax")(layer2)

    # define the model
    model = Model([left_eye.input, right_eye.input, left_eyebrow.input,right_eyebrow.input,nose.input,mouth.input,
                   face_contour.input,iris.input, left_line.input, right_line.input], outputs=layer3)

    # compile model
    model.compile(loss="binary_crossentropy",
                 optimizer="adam",
                 metrics=['accuracy'])

    return model

def plot_loss(model_training,emotion, user):
    stats = pd.DataFrame(model_training.history)
    stats.reset_index(level=0, inplace=True)
    stats = stats.melt(id_vars =['index'])
    stats.columns= ['Epoch','metric', 'Value']

    fig = px.line(stats,x='Epoch', y="Value", color='metric',title=f"Multi-NN Model Statistics for {emotion}: {user}")
    fig.update_yaxes(range=[0, 1])

    return fig   

In [None]:
# train model
model_neg_usera_multi = multi_NN_model()

# fit model
trained = model_neg_usera_multi.fit(train,y_neg_onehot,epochs=80,validation_split=.2, verbose=False)

# plot performance
plot_loss(trained, "Negative","User A")

Model performance of the multi-input NN after training on User A indicates after 20 epochs the model risks overfitting. Therefore use 20 epochs.

In [None]:
# determine final model
model_neg_usera_multi = multi_NN_model()

# fit final model
final_model = model_neg_usera_multi.fit(train,y_neg_onehot,epochs=20,validation_split=.2, verbose=False)

# save model weights
#model_neg_usera_multi.save_weights("models/mnn_neg_usera.h5")

In [None]:
# load model weights
model_neg_usera_multi.load_weights("models/mnn_neg_usera.h5")

# make predictions on test data and round
y_pred = model_neg_usera_multi.predict(train_userb)
y_pred = np.round(y_pred)

# undo hot encoding in order to test accuracy
y_pred = y_pred[:,1]

# calculate model accuracy
acc = accuracy_score(y_neg_userb, y_pred)

# calculate model precision
prec = precision_score(y_neg_userb, y_pred)

# calculate model recall
recall = recall_score(y_neg_userb, y_pred)

print("Model Accuracy:", acc)
print("Model Precision:", prec)
print("Model Recall:", recall)

print("Model AUC Score:" ,roc_auc_score(y_neg_userb, y_pred))

Model Accuracy: 0.6289506953223767    
Model Precision: 0.6075731497418244    
Model Recall: 0.4957865168539326    
Model AUC Score: 0.6168587756683455   

Overall the multi-input NN model performed similairly to the original NN scoring 62.9% accuracy, 60% precision, and 50% recall.
### Train/Test Improved Model on Emphasis

In [None]:
# split data by x,y, and z coordinates
xs_emp = X_emp_scaled[:,0::3]
ys_emp = X_emp_scaled[:,1::3]
zs_emp = X_emp_scaled[:,2::3]

# combine data into 3-dimensional array shape (# of rows, # of dimensions, number of datapoints per dimension)
X_emp_scaled_coord = np.swapaxes(np.array((xs_emp,ys_emp,zs_emp)),0,1)

# Perform on User B Emphasis
xs_emp_userb = X_emp_scaled_userb[:,0::3]
ys_emp_userb = X_emp_scaled_userb[:,1::3]
zs_emp_userb = X_emp_scaled_userb[:,2::3]

X_emp_scaled_coord_userb = np.swapaxes(np.array((xs_emp_userb,ys_emp_userb,zs_emp_userb)),0,1)


In [None]:
# For User A Emphasis
left_eye_emp =  X_emp_scaled_coord[:,:,0:8]
right_eye_emp = X_emp_scaled_coord[:,:,8:16]
left_eyebrow_emp =  X_emp_scaled_coord[:,:,16:26]
right_eyebrow_emp =  X_emp_scaled_coord[:,:,26:36]
nose_emp = X_emp_scaled_coord[:,:,36:48]
mouth_emp = X_emp_scaled_coord[:,:,48:68]
face_contour_emp = X_emp_scaled_coord[:,:,68:87]
iris_emp = X_emp_scaled_coord[:,:,87:90]
left_line_emp = X_emp_scaled_coord[:,:,90:95]
right_line_emp = X_emp_scaled_coord[:,:,95:100]

# For User B Emphasis
left_eye_emp_userb =  X_emp_scaled_coord_userb[:,:,0:8]
right_eye_emp_userb = X_emp_scaled_coord_userb[:,:,8:16]
left_eyebrow_emp_userb =  X_emp_scaled_coord_userb[:,:,16:26]
right_eyebrow_emp_userb =  X_emp_scaled_coord_userb[:,:,26:36]
nose_emp_userb = X_emp_scaled_coord_userb[:,:,36:48]
mouth_emp_userb = X_emp_scaled_coord_userb[:,:,48:68]
face_contour_emp_userb = X_emp_scaled_coord_userb[:,:,68:87]
iris_emp_userb = X_emp_scaled_coord_userb[:,:,87:90]
left_line_emp_userb = X_emp_scaled_coord_userb[:,:,90:95]
right_line_emp_userb = X_emp_scaled_coord_userb[:,:,95:100]

# define 10 facial features for "Emphasis"
# create training instance
train_emp = [left_eye_emp,right_eye_emp,left_eyebrow_emp,right_eyebrow_emp,nose_emp,mouth_emp,face_contour_emp,iris_emp,left_line_emp,right_line_emp]
train_emp_userb = [left_eye_emp_userb,right_eye_emp_userb,left_eyebrow_emp_userb,right_eyebrow_emp_userb,nose_emp_userb,mouth_emp_userb,face_contour_emp_userb,iris_emp_userb, left_line_emp_userb,right_line_emp_userb]

In [None]:
# train model
model_emp_usera_multi = multi_NN_model()

# fit model
trained = model_emp_usera_multi.fit(train_emp,y_emp_onehot,epochs=200,validation_split=.2)

# plot performance
plot_loss(trained,"Emphasis","User A")

The above performance chart suggests that the "Multiple Input Nueral Network" trained very well on the new dataset. After about ~20 epochs the model loss and validation loss begin to flatline along with accuracy. 
To avoid overfitting, use epochs = 20.

In [None]:
# TEST 
# determine final model
model_emp_usera_multi =  multi_NN_model()
final_model = model_emp_usera_multi.fit(train_emp,y_emp_onehot,epochs=80,validation_split=.2, verbose=False,shuffle=False)

# save model weights
#model_emp_usera_multi.save_weights("models/mnn_emp_usera.h5")

In [None]:
# load model weights
model_emp_usera_multi.load_weights("models/mnn_emp_usera.h5")

y_pred = model_emp_usera_multi.predict(train_emp_userb)
y_pred = np.round(y_pred)

# undo hot encoding in order to test accuracy
y_pred = y_pred[:,1]

# calculate model accuracy
acc = accuracy_score(y_emp_userb, y_pred)

# calculate model precision
prec = precision_score(y_emp_userb, y_pred)

# calculate model recall
recall = recall_score(y_emp_userb, y_pred)

print("Model Accuracy:", acc)
print("Model Precision:", prec)
print("Model Recall:", recall)

print("Model AUC Score:" ,roc_auc_score(y_emp_userb, y_pred))

Model Accuracy: 0.8556547619047619    
Model Precision: 0.8703296703296703     
Model Recall: 0.7457627118644068    
Model AUC Score: 0.8365959930785748   

Model accuracy is 85.5%, with 87.0% precision and recall of 74.6%. The AUC is 83.7%. Overall this is very similar to the SVM model that produced 82.4% accuracy with 75% precision and 83.3% recall. Only the AUC for the SVM was far superior with a value of 0.89.

## Train on User B and Test on User A.

###### Train on User B Negative and Test on User A

In [None]:
model_neg_userb_multi = multi_NN_model()
trained = model_neg_userb_multi.fit(train_userb,y_neg_onehot_userb,epochs=80,validation_split=.2)
plot_loss(trained,"Negative","User B")

Model validation accuracy decreases rapidly and validation loss increases after 4-7 epochs, indicating that the model overfits. Overall this model did not train well, and therefore use epochs = 5 to avoid overfitting. 


In [None]:
# determine final model
model_neg_userb_multi =  multi_NN_model()
final_model = model_neg_userb_multi.fit(train_userb,y_neg_onehot_userb,epochs=5,validation_split=.2, verbose=False)

# save model 
#model_neg_userb_multi.save_weights("models/mnn_neg_userb.h5")

In [None]:
# load model
model_neg_userb_multi.load_weights("models/mnn_neg_userb.h5")

# make predictions on test data and round
y_pred = model_neg_userb_multi.predict(train)
y_pred = np.round(y_pred)

# undo hot encoding in order to test accuracy
y_pred = y_pred[:,1]

# calculate model accuracy
acc = accuracy_score(y_neg, y_pred)

# calculate model precision
prec = precision_score(y_neg, y_pred)

# calculate model recall
recall = recall_score(y_neg, y_pred)

print("Model Accuracy:", acc)
print("Model Precision:", prec)
print("Model Recall:", recall)

print("Model AUC Score:" ,roc_auc_score(y_neg, y_pred))

Model Accuracy: 0.5907473309608541   
Model Precision: 0.5841584158415841   
Model Recall: 0.44696969696969696   
Model AUC Score: 0.5825452511694122   

The results above indicate that when the model trained on User B and tested on User A, it performed poorly in comparison to the reverse. However, this was expected given that the same occured when modelling using SVM. Overall accuracy, precision, recall, and AUC resulted in values between 45% - 59%. Suggests a random model.

#### Train User B Emphasis and Test on User A

In [None]:
model_emp_userb_multi = multi_NN_model()
trained = model_emp_userb_multi.fit(train_emp_userb,y_emp_onehot_userb,epochs=100,validation_split=.2)
plot_loss(trained,"Emphasis","User B")

Use 20 epochs since val_loss begins to increase after and accuracy flatlines.

In [None]:
# determine final model
model_emp_userb_multi =  multi_NN_model()
model_emp_userb_multi.fit(train_emp_userb,y_emp_onehot_userb,epochs=20,validation_split=.2, verbose=False)

# save model
#model_emp_userb_multi.save_weights('models/mnn_emp_userb.h5')

In [None]:
# load model weights
#model_emp_userb_multi.load_weights('models/mnn_emp_userb.h5')

# make predictions on test data and round
y_pred = model_emp_userb_multi.predict(train_emp)
y_pred = np.round(y_pred)

# undo hot encoding in order to test accuracy
y_pred = y_pred[:,1]

# calculate model accuracy
acc = accuracy_score(y_emp, y_pred)

# calculate model precision
prec = precision_score(y_emp, y_pred)

# calculate model recall
recall = recall_score(y_emp, y_pred)

print("Model Accuracy:", acc)
print("Model Precision:", prec)
print("Model Recall:", recall)

print("Model AUC Score:" ,roc_auc_score(y_emp, y_pred))

Model Accuracy: 0.7248752672843906   
Model Precision: 0.4125   
Model Recall: 0.4     
Model AUC Score: 0.6123951537744641   

Overall model performance is poor for both training on "Emphasis" User B and testing on "Emphasis" User A.