In [None]:
#EFFECTIVENESS OF VGG-16 ON USER ENGAGEMENT RECOGNITION
#By Matthew Breeds

#Used for building dataframe for the dataset
import numpy as np
import pandas as pd


#Used for preprocessing
import keras
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import decode_predictions
import tensorflow as tf

#Used for building the CNN model
from keras.models import Model
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop
from keras.losses import SparseCategoricalCrossentropy
from keras.layers import Flatten

#Used for plotting results
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go



In [None]:
#import dataset
from google.colab import drive
drive.mount('/content/gdrive')


Mounted at /content/gdrive


In [None]:
#Define paths
path = "/content/gdrive/My Drive/Deep Learning/DAiSEEfinal/Labels/TrainLabels.csv"
path_val = "/content/gdrive/My Drive/Deep Learning/DAiSEEfinal/Labels/ValidationLabels.csv"
path_test = "/content/gdrive/My Drive/Deep Learning/DAiSEEfinal/Labels/TestLabels.csv"
path_all = "/content/gdrive/My Drive/Deep Learning/DAiSEEfinal/Labels/AllLabels.csv"

In [None]:
#read dataset
data_all = pd.read_csv(path_all)
data_all.head(5) #Show top 10 rows

Unnamed: 0,ClipID,Boredom,Engagement,Confusion,Frustration
0,1100011002.avi,0,2,0,0
1,1100011003.avi,0,2,0,0
2,1100011004.avi,0,3,0,0
3,1100011005.avi,0,3,0,0
4,1100011006.avi,0,3,0,0


In [None]:
#read dataset
data = pd.read_csv(path)
data.head() #Show top 10 rows

Unnamed: 0,ClipID,Boredom,Engagement,Confusion,Frustration
0,1100011002.avi,0,2,0,0
1,1100011003.avi,0,2,0,0
2,1100011004.avi,0,3,0,0
3,1100011005.avi,0,3,0,0
4,1100011006.avi,0,3,0,0


In [None]:
#Only need image file paths
x_train = data['ClipID']
x_train.head(5000)

0       1100011002.avi
1       1100011003.avi
2       1100011004.avi
3       1100011005.avi
4       1100011006.avi
             ...      
4995    4018350244.mp4
4996    4018350247.mp4
4997    4018350251.mp4
4998    4018350254.mp4
4999    4018350256.mp4
Name: ClipID, Length: 5000, dtype: object

In [None]:
#The ground truth values for the images
y_train = data.drop(['ClipID'], axis=1)
y_train.head(5)

Unnamed: 0,Boredom,Engagement,Confusion,Frustration
0,0,2,0,0
1,0,2,0,0
2,0,3,0,0
3,0,3,0,0
4,0,3,0,0


In [None]:
#read validation dataset
data_val = pd.read_csv(path_val)

#Only need image file paths
x_val = data_val['ClipID']
x_val.head(5)



0    4000221001.avi
1    4000221002.avi
2    4000221006.avi
3    4000221008.avi
4    4000221009.avi
Name: ClipID, dtype: object

In [None]:
#The ground truth values for the images
y_val = data_val.drop(['ClipID'], axis=1)
y_val.head(5)

Unnamed: 0,Boredom,Engagement,Confusion,Frustration
0,0,2,0,0
1,1,3,0,0
2,1,2,0,0
3,0,3,0,0
4,2,2,0,0


In [None]:
#Verify matching data frame sizes
size_check = len(x_train), len(y_train), len(x_val), len(y_val)
size_check

(5358, 5358, 1429, 1429)

In [None]:
#Load train images
train_image_path = "/content/gdrive/MyDrive/Deep Learning/DAiSEEfinal/Train/"
train_img=[]
first = '1.jpg'
second = '2.jpg'
for i in range(len(x_train)):
#for i in range(150):
#loads an image and resizes the image to (100, 100)
    folder = x_train.iloc[i][0:6]
    frame = x_train.iloc[i][0:-4]
    pictureOne = frame + first
    pictureTwo = frame + second
    temp_imgOne=image.load_img(train_image_path+folder+'/'+frame+'/'+pictureOne,target_size=(100,100))   
    temp_imgTwo=image.load_img(train_image_path+folder+'/'+frame+'/'+pictureTwo,target_size=(100,100))  
#convert the pixels to a NumPy array
    temp_imgOne=image.img_to_array(temp_imgOne)
    train_img.append(temp_imgOne)

    temp_imgTwo=image.img_to_array(temp_imgTwo)
    train_img.append(temp_imgTwo)

In [None]:
#Load validation images
val_image_path = "/content/gdrive/My Drive/Deep Learning/DAiSEEfinal/Validation/"
val_img=[]
first = '1.jpg'
second = '2.jpg'
for i in range(len(x_val)):
#for i in range(100):
#loads an image and resizes the image to (100, 100)
    folder = x_val.iloc[i][0:6]
    frame = x_val.iloc[i][0:-4]
    pictureOne = frame + first
    pictureTwo = frame + second
    temp_imgOne=image.load_img(val_image_path+folder+'/'+frame+'/'+pictureOne,target_size=(100,100))   
    temp_imgTwo=image.load_img(val_image_path+folder+'/'+frame+'/'+pictureTwo,target_size=(100,100))  
#convert the pixels to a NumPy array
    temp_imgOne=image.img_to_array(temp_imgOne)
    val_img.append(temp_imgOne)

    temp_imgTwo=image.img_to_array(temp_imgTwo)
    val_img.append(temp_imgTwo)

In [None]:
# prepare the image for the VGG model
train_img=np.array(train_img)
train_img=preprocess_input(train_img)
train_img.shape

(10716, 100, 100, 3)

In [None]:
# prepare the image for the VGG model
val_img=np.array(val_img)
val_img=preprocess_input(val_img)
val_img.shape

(200, 100, 100, 3)

In [None]:
y_train.shape

(5358, 4)

In [None]:
#duplicate labels for multiple frames per video
y_train_duped = pd.DataFrame(np.repeat(y_train.values, 2, axis=0), columns=y_train.columns)

In [None]:
#map affective states to dataframe
yy_train = {"boredom": y_train_duped.iloc[:, :1], "engagement": y_train_duped.iloc[:, 1:2],"confusion": y_train_duped.iloc[:, 2:3], "frustration": y_train_duped.iloc[:, :3:4]}

In [None]:
y_val_duped = pd.DataFrame(np.repeat(y_val.values, 2, axis=0), columns=y_val.columns)


In [None]:
yy_validate = {"boredom": yy_val.iloc[:, :1], "engagement": yy_val.iloc[:, 1:2],"confusion": yy_val.iloc[:, 2:3], "frustration": yy_val.iloc[:, :3:4]}

In [None]:
# load model without classifier layers
model = VGG16(include_top=False, input_shape=(100, 100, 3))

In [None]:
# mark loaded layers as not trainable
for layer in model.layers:
	layer.trainable = False

In [None]:
#Define the ANN model
x = Flatten()(model.layers[-1].output)
x1 = Dense(4096, activation="relu", name="fc1")(x)
x2 = Dense(512, activation="relu", name="fc2")(x1)
boredom = Dense(4, activation="softmax", name="boredom")(x2)
engagement = Dense(4, activation="softmax", name="engagement")(x2)
confusion = Dense(4, activation="softmax", name="confusion")(x2)
frustration = Dense(4, activation="softmax", name="frustration")(x2)
model = Model(inputs=model.input, outputs=[boredom, engagement, confusion, frustration])

In [None]:
# summarize
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 100, 100, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 100, 100, 64) 1792        input_2[0][0]                    
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 100, 100, 64) 36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 50, 50, 64)   0           block1_conv2[0][0]               
____________________________________________________________________________________________

In [None]:
#Compile the model
model.compile(optimizer=RMSprop(lr =0.0001), loss ={"boredom": SparseCategoricalCrossentropy(),
                                       "engagement": SparseCategoricalCrossentropy(),
                                       "confusion": SparseCategoricalCrossentropy(),
                                       "frustration": SparseCategoricalCrossentropy()},
                              metrics={"boredom": "accuracy",
                                       "engagement": "accuracy",
                                       "confusion": "accuracy",
                                       "frustration": "accuracy"})

In [None]:
#Train the model
history = model.fit(train_img, yy_train, epochs=350, batch_size=50)

Epoch 1/350
Epoch 2/350
Epoch 3/350
Epoch 4/350
Epoch 5/350
Epoch 6/350
Epoch 7/350
Epoch 8/350
Epoch 9/350
Epoch 10/350
Epoch 11/350
Epoch 12/350
Epoch 13/350
Epoch 14/350
Epoch 15/350
Epoch 16/350
Epoch 17/350
Epoch 18/350
Epoch 19/350
Epoch 20/350
Epoch 21/350
Epoch 22/350
Epoch 23/350
Epoch 24/350
Epoch 25/350
Epoch 26/350
Epoch 27/350
Epoch 28/350
Epoch 29/350
Epoch 30/350
Epoch 31/350
Epoch 32/350
Epoch 33/350
Epoch 34/350
Epoch 35/350
Epoch 36/350
Epoch 37/350
Epoch 38/350
Epoch 39/350
Epoch 40/350
Epoch 41/350
Epoch 42/350
Epoch 43/350
Epoch 44/350
Epoch 45/350
Epoch 46/350
Epoch 47/350
Epoch 48/350
Epoch 49/350
Epoch 50/350
Epoch 51/350
Epoch 52/350
Epoch 53/350
Epoch 54/350
Epoch 55/350
Epoch 56/350
Epoch 57/350
Epoch 58/350
Epoch 59/350
Epoch 60/350
Epoch 61/350
Epoch 62/350
Epoch 63/350
Epoch 64/350
Epoch 65/350
Epoch 66/350
Epoch 67/350
Epoch 68/350
Epoch 69/350
Epoch 70/350
Epoch 71/350
Epoch 72/350
Epoch 73/350
Epoch 74/350
Epoch 75/350
Epoch 76/350
Epoch 77/350
Epoch 78

In [None]:
#Plot results
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['boredom_accuracy'],
                    name='Train'))
#fig.add_trace(go.Scatter(y=history.history['val_boredom_accuracy'], name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for boredom feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

<Figure size 432x288 with 0 Axes>

In [None]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['engagement_accuracy'],
                    name='Train'))
#fig.add_trace(go.Scatter(                    y=history.history['val_engagement_accuracy'],                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for engagement feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

<Figure size 432x288 with 0 Axes>

In [None]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['confusion_accuracy'],
                    name='Train'))
#fig.add_trace(go.Scatter(                    y=history.history['val_confusion_accuracy'],                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for confusion feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

<Figure size 432x288 with 0 Axes>

In [None]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['frustration_accuracy'],
                    name='Train'))
#fig.add_trace(go.Scatter(                    y=history.history['val_frustration_accuracy'],                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for frustration feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

<Figure size 432x288 with 0 Axes>

In [None]:
#Prepare for testing
#read test dataset
data_test = pd.read_csv(path_test)
data_test.head(5) #Show top 10 rows

Unnamed: 0,ClipID,Boredom,Engagement,Confusion,Frustration
0,5000441001.avi,1,2,0,0
1,5000441002.avi,0,2,0,0
2,5000441003.avi,1,2,0,0
3,5000441005.avi,2,2,0,0
4,5000441006.avi,2,2,1,2


In [None]:
#Only need image file paths
x_test = data_test['ClipID']
x_test.head(5)

0    5000441001.avi
1    5000441002.avi
2    5000441003.avi
3    5000441005.avi
4    5000441006.avi
Name: ClipID, dtype: object

In [None]:
#The ground truth values for the images
y_test = data_test.drop(['ClipID'], axis=1)
y_test.head(5)

Unnamed: 0,Boredom,Engagement,Confusion,Frustration
0,1,2,0,0
1,0,2,0,0
2,1,2,0,0
3,2,2,0,0
4,2,2,1,2


In [None]:
#Verify matching sizes
size_check = len(x_test), len(y_test)
size_check

(1784, 1784)

In [None]:
#Load test images
test_image_path = "/content/gdrive/My Drive/Deep Learning/DAiSEEfinal/Test/"
test_img=[]
first = '1.jpg'
second = '2.jpg'
for i in range(len(x_test)):
#loads an image and resizes the image to (100, 100)
    folder = x_test.iloc[i][0:6]
    frame = x_test.iloc[i][0:-4]
    #pictureOne = frame + first
    pictureTwo = frame + second
    #temp_imgOne=image.load_img(test_image_path+folder+'/'+frame+'/'+pictureOne,target_size=(100,100))   
    temp_imgTwo=image.load_img(test_image_path+folder+'/'+frame+'/'+pictureTwo,target_size=(100,100))  
#convert the pixels to a NumPy array
    #temp_imgOne=image.img_to_array(temp_imgOne)
    #test_img.append(temp_imgOne)
    temp_imgTwo=image.img_to_array(temp_imgTwo)
    test_img.append(temp_imgTwo)

In [None]:
# prepare the image for the VGG model
test_img=np.array(test_img)
test_img=preprocess_input(test_img)
test_img.shape

(1784, 100, 100, 3)

In [None]:
#map affective states to dataframe
y_test = {"boredom": y_test.iloc[:, :1], "engagement": y_test.iloc[:, 1:2],"confusion": y_test.iloc[:, 2:3], "frustration": y_test.iloc[:, :3:4]}

In [None]:
#run test prediction
y_pred = model.evaluate(test_img, y_test)



In [None]:
#Print results
class_names = ['Boredom', 'Engagement', 'Confusion', 'Frustration']
print("Accuracy: ")
for i, acc in enumerate(y_pred[5:]):
    print(f"{class_names[i]}: {acc*100}")

Accuracy: 
Boredom: 38.22869956493378
Engagement: 41.03139042854309
Confusion: 47.64573872089386
Frustration: 38.396862149238586
