In [2]:
# Importing the library:

import math
import scikitplot
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf


from matplotlib import pyplot
from keras.utils import np_utils
from tensorflow.keras import optimizers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Flatten, Dense, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import Dropout, BatchNormalization, LeakyReLU, Activation

Using TensorFlow backend.


In [3]:
# Loading the data:

df = pd.read_csv('../input/facial-expression-recognitionferchallenge/fer2013/fer2013/fer2013.csv')

In [4]:
# Displaying the data:

df

Unnamed: 0,emotion,pixels,Usage
0,0,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...,Training
1,0,151 150 147 155 148 133 111 140 170 174 182 15...,Training
2,2,231 212 156 164 174 138 161 173 182 200 106 38...,Training
3,4,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...,Training
4,6,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...,Training
...,...,...,...
35882,6,50 36 17 22 23 29 33 39 34 37 37 37 39 43 48 5...,PrivateTest
35883,3,178 174 172 173 181 188 191 194 196 199 200 20...,PrivateTest
35884,0,17 17 16 23 28 22 19 17 25 26 20 24 31 19 27 9...,PrivateTest
35885,3,30 28 28 29 31 30 42 68 79 81 77 67 67 71 63 6...,PrivateTest


In [5]:
# Giving the the label:

emotion_label_to_text = {0:'anger', 1:'disgust', 2:'fear', 3:'happiness', 4: 'sadness', 5: 'surprise', 6: 'neutral'}

In [6]:
# Determine the dimensions of an image represented by pixel values:

math.sqrt(len(df.pixels[0].split(' ')))

48.0

In [7]:
# Resizing the image to make it compitable with CNN model:

img_array = df.pixels.apply(lambda x: np.array(x.split(' ')).reshape(48, 48, 1).astype('float32'))
img_array = np.stack(img_array, axis=0)

In [8]:
# Displaying the image shape:

img_array.shape

(35887, 48, 48, 1)

In [9]:
# Encoding the labels:

le = LabelEncoder()
img_labels = le.fit_transform(df.emotion)
img_labels = np_utils.to_categorical(img_labels)
img_labels.shape

(35887, 7)

In [10]:
le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_name_mapping)

{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6}


In [11]:
# Splitting data into training and validation set:

X_train, X_valid, y_train, y_valid = train_test_split(img_array, img_labels,
                                                    shuffle=True, stratify=img_labels,
                                                    test_size=0.1, random_state=42)
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

((32298, 48, 48, 1), (3589, 48, 48, 1), (32298, 7), (3589, 7))

In [12]:
# Assigning values:

img_width = X_train.shape[1]
img_height = X_train.shape[2]
img_depth = X_train.shape[3]
num_classes = y_train.shape[1]

In [13]:
# Normalizing results, as neural networks are very sensitive to unnormalized data:

X_train = X_train / 255.
X_valid = X_valid / 255.

In [14]:
# Creating a custom CNN Model 


def build_model(optim):
    net = Sequential(name='CNN')

    net.add(
        Conv2D(
            filters=64,
            kernel_size=(5,5),
            input_shape=(img_width, img_height, img_depth),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_1'
        )
    )
    net.add(BatchNormalization(name='batchnorm_1'))
    net.add(
        Conv2D(
            filters=64,
            kernel_size=(5,5),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_2'
        )
    )
    net.add(BatchNormalization(name='batchnorm_2'))
    
    net.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_1'))
    net.add(Dropout(0.4, name='dropout_1'))

    net.add(
        Conv2D(
            filters=128,
            kernel_size=(3,3),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_3'
        )
    )
    net.add(BatchNormalization(name='batchnorm_3'))
    net.add(
        Conv2D(
            filters=128,
            kernel_size=(3,3),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_4'
        )
    )
    net.add(BatchNormalization(name='batchnorm_4'))
    
    net.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_2'))
    net.add(Dropout(0.4, name='dropout_2'))

    net.add(
        Conv2D(
            filters=256,
            kernel_size=(3,3),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_5'
        )
    )
    net.add(BatchNormalization(name='batchnorm_5'))
    net.add(
        Conv2D(
            filters=256,
            kernel_size=(3,3),
            activation='elu',
            padding='same',
            kernel_initializer='he_normal',
            name='conv2d_6'
        )
    )
    net.add(BatchNormalization(name='batchnorm_6'))
    
    net.add(MaxPooling2D(pool_size=(2,2), name='maxpool2d_3'))
    net.add(Dropout(0.5, name='dropout_3'))

    net.add(Flatten(name='flatten'))
        
    net.add(
        Dense(
            128,
            activation='elu',
            kernel_initializer='he_normal',
            name='dense_1'
        )
    )
    net.add(BatchNormalization(name='batchnorm_7'))
    
    net.add(Dropout(0.6, name='dropout_4'))
    
    net.add(
        Dense(
            num_classes,
            activation='softmax',
            name='out_layer'
        )
    )
    
    net.compile(
        loss='categorical_crossentropy',
        optimizer=optim,
        metrics=['accuracy']
    )
    
    net.summary()
    
    return net

In [15]:
# To avoid overfitting, early stopping and learning rate are added:

early_stopping = EarlyStopping(
    monitor='val_accuracy',
    min_delta=0.00005,
    patience=11,
    verbose=1,
    restore_best_weights=True,
)  # early stopping

lr_scheduler = ReduceLROnPlateau(   
    monitor='val_accuracy',
    factor=0.5,
    patience=7,
    min_lr=1e-7,
    verbose=1,
)   # learning rate

callbacks = [
    early_stopping,
    lr_scheduler,
]

In [16]:
# As the data in hand is less as compared to the task so ImageDataGenerator is good to go.

train_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=0.15,
    zoom_range=0.15,
    horizontal_flip=True,
)
train_datagen.fit(X_train)

In [None]:
batch_size = 32  # batch size of 32 performs the best.

epochs = 100

optims = [
    optimizers.Nadam(learning_rate=0.001, 
                     beta_1=0.9, 
                     beta_2=0.999, 
                     epsilon=1e-07, 
                     name='Nadam'),
    optimizers.Adam(0.001),
]

model = build_model(optims[1]) 

history = model.fit_generator(
    train_datagen.flow(X_train, y_train, batch_size=batch_size),
    validation_data=(X_valid, y_valid),
    steps_per_epoch=len(X_train) / batch_size,
    epochs=epochs,
    callbacks=callbacks,
    use_multiprocessing=True
)  # training the model

Model: "CNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 48, 48, 64)        1664      
_________________________________________________________________
batchnorm_1 (BatchNormalizat (None, 48, 48, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 48, 48, 64)        102464    
_________________________________________________________________
batchnorm_2 (BatchNormalizat (None, 48, 48, 64)        256       
_________________________________________________________________
maxpool2d_1 (MaxPooling2D)   (None, 24, 24, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 128)       73856   

In [None]:
# Plotting the Accuracy vs Val Accuracy graph:

sns.lineplot(history.epoch, history.history['accuracy'], label='train')
sns.lineplot(history.epoch, history.history['val_accuracy'], label='valid')
pyplot.title('Accuracy vs Val Accuracy graph')
pyplot.tight_layout()

pyplot.savefig('accuracy_plot.png')  # saving the graph 
pyplot.show()

In [None]:
# Plotting the loss vs Val loss graph: 

sns.lineplot(history.epoch, history.history['loss'], label='train')
sns.lineplot(history.epoch, history.history['val_loss'], label='valid')
pyplot.title('Loss')
pyplot.tight_layout()

pyplot.savefig('loss_plot.png')  # saving the graph 
pyplot.show()

In [None]:
# Plotting confusion matrix:

yhat_valid = model.predict_classes(X_valid)
scikitplot.metrics.plot_confusion_matrix(np.argmax(y_valid, axis=1), yhat_valid, figsize=(7,7))

pyplot.savefig("confusion_matrix_dcnn.png")  # saving the plot
pyplot.show()

In [None]:
# Displaying wrong prediction:

print(f'total wrong validation predictions: {np.sum(np.argmax(y_valid, axis=1) != yhat_valid)}\n\n')

In [None]:
# Displaying the classificiation report:


print(classification_report(np.argmax(y_valid, axis=1), yhat_valid))

The confusion matrix clearly demonstrates that while our model performs well for the class "happy," it performs poorly for other classes. The fact that the remaining classes have less data could be one of the causes. I discovered, however, that some of the photographs from these two classes make it difficult for a human to determine whether the subject is dejected or indifferent. Individual differences can be seen in facial expression. The neutral face of some people conveys sadness.

In [None]:
np.random.seed(2)
random_sad_imgs = np.random.choice(np.where(y_valid[:, 1] == 1)[0], size=9)
random_neutral_imgs = np.random.choice(np.where(y_valid[:, 2] == 1)[0], size=9)

fig = pyplot.figure(1, (18, 4))

for i, (sadidx, neuidx) in enumerate(zip(random_sad_imgs, random_neutral_imgs)):
    ax = pyplot.subplot(2, 9, i + 1)
    sample_img = X_valid[sadidx, :, :, 0]
    true_label = "sad"
    predicted_label = emotion_label_to_text[model.predict_classes(sample_img.reshape(1, 48, 48, 1))[0]]
    if true_label == predicted_label:
        ax.imshow(sample_img, cmap='gray')
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(f"true:{true_label}, pred:{predicted_label}")
        ax.spines['top'].set_color('green')
        ax.spines['bottom'].set_color('green')
        ax.spines['left'].set_color('green')
        ax.spines['right'].set_color('green')
    else:
        ax.axis('off')

    ax = pyplot.subplot(2, 9, i + 10)
    sample_img = X_valid[neuidx, :, :, 0]
    true_label = "neutral"
    predicted_label = emotion_label_to_text[model.predict_classes(sample_img.reshape(1, 48, 48, 1))[0]]
    if true_label == predicted_label:
        ax.imshow(sample_img, cmap='gray')
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(f"true:{true_label}, pred:{predicted_label}")
        ax.spines['top'].set_color('green')
        ax.spines['bottom'].set_color('green')
        ax.spines['left'].set_color('green')
        ax.spines['right'].set_color('green')
    else:
        ax.axis('off')

pyplot.tight_layout()

In [None]:
# Plotting the incorrect prediction:

np.random.seed(2)
random_sad_imgs = np.random.choice(np.where(y_valid[:, 1]==1)[0], size=9)
random_neutral_imgs = np.random.choice(np.where(y_valid[:, 2]==1)[0], size=9)

fig = pyplot.figure(1, (18, 4))

for i, (sadidx, neuidx) in enumerate(zip(random_sad_imgs, random_neutral_imgs)):
        ax = pyplot.subplot(2, 9, i+1)
        sample_img = X_valid[sadidx,:,:,0]
        ax.imshow(sample_img, cmap='gray')
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(f"true:sad, pred:{emotion_label_to_text[model.predict_classes(sample_img.reshape(1,48,48,1))[0]]}")

        ax = pyplot.subplot(2, 9, i+10)
        sample_img = X_valid[neuidx,:,:,0]
        ax.imshow(sample_img, cmap='gray')
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(f"t:neut, p:{emotion_label_to_text[model.predict_classes(sample_img.reshape(1,48,48,1))[0]]}")

        pyplot.tight_layout()

In [None]:
# Saving the model for later use or deployment:

model_yaml = model.to_yaml()
with open("model.yaml", "w") as yaml_file:
    yaml_file.write(model_yaml)
    
model.save("model.h5")  # model save in tensorflow

# Conclusion:

- In conclusion, the model underwent numerous iterations to attain validation accuracy ranging from 73% to 80%. Even with the usage of a data generator, the model was only able to reach a certain level of accuracy due to the dataset's label imbalance. The amount of data in the dataset could be increased using data augmentation approaches to address this, which might enhance model performance while being trained.

- Convolutional neural networks (CNNs) created specifically for our application utilizing the Keras Sequential API make up the model we employed. It has a number of convolutional layers as well as layers for batch normalization, max pooling, dropout, and dense layers.

- The model's architecture, weights, and optimizer state were all saved in an H5 file, which was created using TensorFlow. The model's configuration was also stored in a YAML file, which contained details on the network architecture, layer settings, and training parameters. This approach for saving the model makes it simple to load and reuse it in subsequent sessions.