# Import Statements

In [None]:
# Import packages
%config Completer.use_jedi = False
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

from sklearn.model_selection import train_test_split 

import tensorflow as tf
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load Data

In [None]:
# Load the training data and view the shape
train = pd.read_csv('../input/challenges-in-representation-learning-facial-expression-recognition-challenge/train.csv')

print('Training Set Size:', train.shape)

In [None]:
train.head()

In [None]:
# Convert the string of pixels to an array
train['pixels'] = [np.fromstring(x, dtype=int, sep=' ').reshape(-1,48,48,1) for x in train['pixels']]

In [None]:
# Store pixels and labels in an array
pixels = np.concatenate(train['pixels'])
labels = train.emotion.values

# Print the shape of both arrays
print(pixels.shape)
print(labels.shape)

# Split, Reshape, and Scale Datasets

In [None]:
# Split the data into a training and validation set
X_train, X_valid, y_train, y_valid = train_test_split(
    pixels, labels, test_size=0.2, stratify=labels, random_state=1
)


# View the shapes of the data sets
print('X_train Shape:', X_train.shape)
print('y_train Shape:', y_train.shape)
print()
print('X_valid Shape:', X_valid.shape)
print('y_valid Shape:', y_valid.shape)

In [None]:
# Standardize the pixel values between 0 and 1
Xs_train = X_train / 255
Xs_valid = X_valid / 255

# Image Augmentation

In [None]:
# Create an image generator for augmentation
train_datagen = ImageDataGenerator(
    rotation_range = 30,
    width_shift_range = 0.2, 
    height_shift_range = 0.2, 
    zoom_range = 0.2, 
    horizontal_flip = True, 
    fill_mode = 'nearest'
)

train_loader = train_datagen.flow(Xs_train, y_train, batch_size=64)

# Build Xception Model
The code for the Xecption model is taken from https://colab.research.google.com/github/mavenzer/Autism-Detection-Using_YOLO/blob/master/Tutorial_implementing_Xception_in_TensorFlow_2_0_using_the_Functional_API.ipynb#scrollTo=uy3q-iLm3VV2 since the TensorFlow package requires an image size of at least 71x71.

In [None]:
def entry_flow(inputs):

  x = layers.Conv2D(32, 3, strides=2, padding='same')(inputs)
  x = layers.BatchNormalization()(x)
  x = layers.Activation('relu')(x)

  x = layers.Conv2D(64, 3, padding='same')(x)
  x = layers.BatchNormalization()(x)
  x = layers.Activation('relu')(x)

  previous_block_activation = x  # Set aside residual
  
  # Blocks 1, 2, 3 are identical apart from the feature depth.
  for size in [128, 256, 728]:
    x = layers.Activation('relu')(x)
    x = layers.SeparableConv2D(size, 3, padding='same')(x)
    x = layers.BatchNormalization()(x)

    x = layers.Activation('relu')(x)
    x = layers.SeparableConv2D(size, 3, padding='same')(x)
    x = layers.BatchNormalization()(x)

    x = layers.MaxPooling2D(3, strides=2, padding='same')(x)
    
    residual = layers.Conv2D(  # Project residual
        size, 1, strides=2, padding='same')(previous_block_activation)           
    x = layers.add([x, residual])  # Add back residual
    previous_block_activation = x  # Set aside next residual

  return x


def middle_flow(x, num_blocks=8):
  
  previous_block_activation = x

  for _ in range(num_blocks):
    x = layers.Activation('relu')(x)
    x = layers.SeparableConv2D(728, 3, padding='same')(x)
    x = layers.BatchNormalization()(x)

    x = layers.Activation('relu')(x)
    x = layers.SeparableConv2D(728, 3, padding='same')(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Activation('relu')(x)
    x = layers.SeparableConv2D(728, 3, padding='same')(x)
    x = layers.BatchNormalization()(x)

    x = layers.add([x, previous_block_activation])  # Add back residual
    previous_block_activation = x  # Set aside next residual
    
  return x


def exit_flow(x, num_classes=7):
  
  previous_block_activation = x

  x = layers.Activation('relu')(x)
  x = layers.SeparableConv2D(728, 3, padding='same')(x)
  x = layers.BatchNormalization()(x)

  x = layers.Activation('relu')(x)
  x = layers.SeparableConv2D(1024, 3, padding='same')(x)
  x = layers.BatchNormalization()(x)
  
  x = layers.MaxPooling2D(3, strides=2, padding='same')(x)

  residual = layers.Conv2D(  # Project residual
      1024, 1, strides=2, padding='same')(previous_block_activation)
  x = layers.add([x, residual])  # Add back residual
  
  x = layers.SeparableConv2D(1536, 3, padding='same')(x)
  x = layers.BatchNormalization()(x)
  x = layers.Activation('relu')(x)
  
  x = layers.SeparableConv2D(2048, 3, padding='same')(x)
  x = layers.BatchNormalization()(x)
  x = layers.Activation('relu')(x)
  
  x = layers.GlobalAveragePooling2D()(x)
  if num_classes == 1:
    activation = 'sigmoid'
  else:
    activation = 'softmax'
  return layers.Dense(num_classes, activation=activation)(x)

inputs = keras.Input(shape=(48, 48, 1))  # Variable-size image inputs.
outputs = exit_flow(middle_flow(entry_flow(inputs)))
xception = keras.Model(inputs, outputs)

In [None]:
np.random.seed(1)
tf.random.set_seed(1)

cnn = xception
cnn.summary()

# Training Run 1

In [None]:
# Set up the optimizer
opt = tf.keras.optimizers.Adam(0.001)
cnn.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
%%time
# Training Run #1
h1 = cnn.fit(train_loader, 
             batch_size=64, 
             epochs=50, 
             validation_data=(Xs_valid, y_valid), verbose=1)

In [None]:
# Save history and view plots of loss and accuracy
history = h1.history
n_epochs = len(history['loss'])

plt.figure(figsize=[10,4])
plt.subplot(1,2,1)
plt.plot(range(1, n_epochs+1), history['loss'], label='Training')
plt.plot(range(1, n_epochs+1), history['val_loss'], label='Validation')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Loss')
plt.legend()
plt.subplot(1,2,2)
plt.plot(range(1, n_epochs+1), history['accuracy'], label='Training')
plt.plot(range(1, n_epochs+1), history['val_accuracy'], label='Validation')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('Accuracy')
plt.legend()
plt.show()

We notice that the graphs for the accuracy and loss are not very smooth. This indicates that we need to reduce the learning rate. The purpose of decreasing the learning rate is to allow our model to approach the optimal solution. When the learning rate is too large, the model tends to quickly converge on a suboptimal solution.

# Training Run 2

In [None]:
# Update the learning rate
tf.keras.backend.set_value(cnn.optimizer.learning_rate, 0.0001)

In [None]:
%%time 

# Training Run #2
h2 = cnn.fit(train_loader, 
             batch_size=64, 
             epochs=50, 
             validation_data=(Xs_valid, y_valid), verbose=1)

In [None]:
# Save history and view plots of loss and accuracy
for k in history.keys():
    history[k] += h2.history[k]

epoch_range = range(1, len(history['loss'])+1)

plt.figure(figsize=[14,4])
plt.subplot(1,2,1)
plt.plot(epoch_range, history['loss'], label='Training')
plt.plot(epoch_range, history['val_loss'], label='Validation')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Loss')
plt.legend()
plt.subplot(1,2,2)
plt.plot(epoch_range, history['accuracy'], label='Training')
plt.plot(epoch_range, history['val_accuracy'], label='Validation')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()

The validation loss and accuracy stabilzed after reducing the learning rate. We will reduce the learning rate a final time to see if we can improve the accuracy any more. This time we will only run 10 epochs since the training accuracy is increasing more than validation accuracy.

# Training Run 3

In [None]:
# Update the learning rate
tf.keras.backend.set_value(cnn.optimizer.learning_rate, 0.00001)

In [None]:
%%time 

# Training Run #3
h3 = cnn.fit(train_loader, 
             batch_size=64, 
             epochs=10, 
             validation_data=(Xs_valid, y_valid), verbose=1)

In [None]:
# Save history and view plots of loss and accuracy
for k in history.keys():
    history[k] += h3.history[k]

epoch_range = range(1, len(history['loss'])+1)

plt.figure(figsize=[14,4])
plt.subplot(1,2,1)
plt.plot(epoch_range, history['loss'], label='Training')
plt.plot(epoch_range, history['val_loss'], label='Validation')
plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Loss')
plt.legend()
plt.subplot(1,2,2)
plt.plot(epoch_range, history['accuracy'], label='Training')
plt.plot(epoch_range, history['val_accuracy'], label='Validation')
plt.xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()

# Save Model

In [None]:
# Save the model
cnn.save('fer_v05_BZ.h5')
pickle.dump(history, open(f'fer_v05_BZ.pkl', 'wb'))