In [45]:
import numpy as np
import pandas as pd
import scipy.special as sp
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import PIL
import json
import os
import datetime
import PIL.Image

import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten

In [11]:
print(f'Number of GPUs detected: {len(tf.config.list_logical_devices('GPU'))}')

Number of GPUs detected: 0


# Data preparation

In [117]:
df = pd.read_csv('../DL_and_NN_in_Python/fer2013.csv')


print(df.head())
print(f'number of photos in the dataset is: {len(df)}')
      
X = []
Y = []

for row in df.index:
    X.append(list(map(int, df.iloc[row].pixels.split(' '))))
    Y.append(df.iloc[row].emotion)

X = np.array(X) / 255 # normalise pixel values to lie between 0 and 1
Y = np.array(Y)

print(f'check number of features is 48**2: {X.shape[1] == 48**2}')

X, Y = shuffle(X, Y, random_state=42)  # numpy's shuffle is not nice because you can't shuffle two arrays simultaneously

train_proportion = 0.8
train_index = int(train_proportion*len(X))

X_train, X_test = X[:train_index], X[train_index:]
Y_train, Y_test = Y[:train_index], Y[train_index:]
# or just use train_test_split from sklearn.model_selection for the same effect
n_classes = len(set(Y_train))

print(f'Number of samples in training set: {len(X_train)}')
print(f'Number of samples in test set: {len(X_test)}')

N, D,  = X_train.shape
D1 = int(np.sqrt(D))

   emotion                                             pixels     Usage
0        0  70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...  Training
1        0  151 150 147 155 148 133 111 140 170 174 182 15...  Training
2        2  231 212 156 164 174 138 161 173 182 200 106 38...  Training
3        4  24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...  Training
4        6  4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...  Training
number of photos in the dataset is: 35887
check number of features is 48**2: True
Number of samples in training set: 28709
Number of samples in test set: 7178


In [121]:
X_train.resize((N, D1, D1))
X_test.resize((len(X_test), D1, D1))

In [14]:
emotions = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

def show_sample_image(emotion: int):
    emotion_no = emotions.index(emotion)
    df_emotion = df[df.emotion == emotion_no]

    random_no = np.random.random_integers(0, len(df_emotion))
    print(random_no)
    print(df_emotion.iloc[random_no])

    img = np.array(list(map(int, df_emotion.iloc[random_no].pixels.split(' '))), dtype=np.uint8).reshape((48,48))
    img = PIL.Image.fromarray(img).resize((1000, 1000))
    img.show()

show_sample_image('Surprise')

779
emotion                                                    5
pixels     16 15 14 14 15 14 14 11 4 0 0 0 0 6 11 17 34 5...
Usage                                               Training
Name: 7381, dtype: object


  random_no = np.random.random_integers(0, len(df_emotion))


# 1. Use Keras

In [103]:
def construct_keras_seq_dense(hidden_layer_sizes: list[int], activation_function: str = 'relu', reg: float = 1e-4):
    # instantiate the sequential model
    model = tf.keras.models.Sequential()
    
    # flatten the image inputs onto a 1D vector
    model.add(tf.keras.Input(shape = (int(np.sqrt(D)),int(np.sqrt(D))))) # useful if we want to print the summary of the graph in advance
    model.add(Flatten())

    for ii in range(len(hidden_layer_sizes)):
        # we want the st. dev. at each layer to be 1
        # because we assume all vars are IID and normalised such that Var(x_i) = 1
        # then to achieve this goal, we need to initialise all weights with st. dev. 1/sqrt(dim)
        # where dim = number of multiplications taking place at a given layer to produce a single matrix entry
        # for the first layer, this is equal to D - the dimensionality of the inputs
        # for subsequent layers, it is equal to the number of hidden units
        hidden_units = hidden_layer_sizes[ii]
        if ii == 0: 
            initialiser = tf.keras.initializers.RandomNormal(0, 1/np.sqrt(D))
        else:
            initialiser = tf.keras.initializers.RandomNormal(0, 1/np.sqrt(hidden_layer_sizes[ii-1]))
        regulariser = tf.keras.regularizers.L2(l2=reg)

        model.add(Dense(
            hidden_units,
            activation = activation_function,
            use_bias = True,
            kernel_initializer = initialiser,
            bias_initializer = 'zeros',
            kernel_regularizer = regulariser,
            bias_regularizer = regulariser
            )
        )
        
    # add the final layer to project onto n_classes
    model.add(Dense(
        n_classes,
        activation = activation_function,
        use_bias = True,
        kernel_initializer = tf.keras.initializers.RandomNormal(0, 1/np.sqrt(hidden_layer_sizes[-1])),
        bias_initializer = 'zeros',
        kernel_regularizer = regulariser,
        bias_regularizer = regulariser
        )
    )

    print('Instantiiated the following model:')
    print(model.summary())

    return model

In [134]:
loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# this loss function assumes that y_true is NOT one-hot encoded (use CategoricalCrossentropy in this case)
# by default, this loss expects the inpu y_pred to be a probabiltiy distribution (i.e. after softmax)
# however, we usually don't add the softmax to the end of the graph because it is not stable with all loss functions


In [137]:
model_keras_seq_dense = construct_keras_seq_dense([100, 200, 100], 'relu')

optimiser = tf.keras.optimizers.Adam(learning_rate=1e-4)

model_keras_seq_dense.compile(optimizer=optimiser,
                              loss=loss_function,
                              metrics=['accuracy'])

model_keras_seq_dense.fit(X_train, Y_train, epochs=500, batch_size=64)

Instantiiated the following model:


None
Epoch 1/500
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.2540 - loss: 1.9096
Epoch 2/500
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.2989 - loss: 1.8242
Epoch 3/500
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.3249 - loss: 1.7706
Epoch 4/500
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.3274 - loss: 1.7567
Epoch 5/500
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.3605 - loss: 1.6855
Epoch 6/500
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.3641 - loss: 1.6588
Epoch 7/500
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.3710 - loss: 1.6443
Epoch 8/500
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.3805 - loss: 1.6270
Epoch 9/500
[1m449/449[0m

<keras.src.callbacks.history.History at 0x7f323dc1f410>

In [141]:
test_loss, test_acc = model_keras_seq_dense.evaluate(X_test, Y_test)
print(test_acc)

[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4055 - loss: 3.7006
0.4140429198741913


In [158]:
# to make predictions and evaluate the model manually, add a softmax layer:
probability_model = tf.keras.models.Sequential([model_keras_seq_dense, tf.keras.layers.Softmax()])

pY = probability_model.predict(X_test)
Y_pred = tf.argmax(pY, axis=1)
test_acc_manual = np.mean(Y_pred == Y_test)

print(test_acc_manual)
np.isclose(test_acc, test_acc_manual, 1e-5)

[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
0.4140429088882697


True

In [161]:
# model_keras_seq_dense.summary()
model_keras_seq_dense.save('model_keras_seq_dense.keras')

In [166]:
model_loaded = tf.keras.models.load_model('model_keras_seq_dense.keras')
model_loaded.evaluate(X_test, Y_test)

[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4055 - loss: 3.7006


[3.7167587280273438, 0.4140429198741913]