# Emotion Recognition on Enterface db


## 1 - Import essentials

In [1]:
from keras.datasets import mnist
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dropout
from keras.models import Sequential
import matplotlib.pylab as plt
from keras import backend as K
import keras

from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
K.tensorflow_backend._get_available_gpus()

import numpy as np
from sklearn.utils import shuffle

import os
import sys
import glob
import imageio
import pathlib
from pathlib import Path

from datetime import datetime


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## 2 - Prepare data

In [2]:
data_dir = r"data/"

img_x = 257
img_y = 257
input_shape = (img_x, img_y , 3)

train_split = 0.9

num_classes=6
data_x = []
data_y = []
i = 1

train_x = []
train_y_labels = []

test_x = []
test_y_labels = []

# collect sentences folders
emotions_dirs = ["anger", "disgust", "fear", "happiness", "sadness", "surprise"]


sentences_list = [dir for dir in Path(data_dir).glob('**/*') if dir.is_dir() and "subject" in dir.stem]
sentences_list = shuffle(sentences_list, random_state=0)

print(sentences_list[:10])

train_sentences_list = sentences_list[:int(train_split * len(sentences_list))]
test_sentences_list = sentences_list[int(train_split * len(sentences_list)):]

for dir in train_sentences_list:
    print("\rCollecting data: {} of {}".format(i, len(sentences_list)), end="")
    i += 1
    for filename in glob.iglob(str(dir) + '/**/*.png', recursive=True):
        im = imageio.imread(filename)
        train_x.append(im)
        train_y_labels.append(pathlib.Path(filename).parent.stem)


for dir in test_sentences_list:
    print("\rCollecting data: {} of {}".format(i, len(sentences_list)), end="")
    i += 1
    for filename in glob.iglob(str(dir) + '/**/*.png', recursive=True):
        im = imageio.imread(filename)
        test_x.append(im)
        test_y_labels.append(pathlib.Path(filename).parent.stem)
# for filename in glob.iglob(data_dir + '**/*.png', recursive=True):
#     print("\rCollecting data: {}".format(i), end="")
#     im = imageio.imread(filename)
#     data_x.append(im)
#     data_y.append(pathlib.Path(filename).parent.stem)
    
#     if i > 1000:
#         break
#     i += 1
# print(" finished")
# data_x = np.array(data_x)
# data_y = np.array(data_y)

# data_x, data_y = shuffle(data_x, data_y, random_state=0)



# train_x = data_x[:int(train_split * data_x.shape[0])] 
# train_y_labels = data_y[:int(train_split * data_x.shape[0])]
label_encoder = LabelEncoder()
train_y_integer_encoded = label_encoder.fit_transform(train_y_labels)
train_y = keras.utils.to_categorical(train_y_integer_encoded, num_classes)

# test_x = data_x[int(train_split * data_x.shape[0]):]
# test_y_labels = data_y[int(train_split * data_x.shape[0]):]
test_y_integer_encoded = label_encoder.fit_transform(test_y_labels)
test_y = keras.utils.to_categorical(test_y_integer_encoded, num_classes)

train_x = np.array(train_x) / 255
test_x = np.array(test_x) / 255

print("\ntest data distribution: ", np.sum(test_y, axis=0))
print("train data distribution: ", np.sum(train_y, axis=0))

print("train input shape: {}".format(train_x.shape))
print("test inshape: {}".format(test_x.shape))

print("train output shape: {}".format(train_y.shape))
print("test output: {}".format(test_y.shape))

[WindowsPath('data/jet/Male/subject 32'), WindowsPath('data/jet/Male/subject 39'), WindowsPath('data/jet/Male/subject 27'), WindowsPath('data/jet/Female/subject 33'), WindowsPath('data/jet/Male/subject 10'), WindowsPath('data/jet/Male/subject 24'), WindowsPath('data/jet/Male/subject 3'), WindowsPath('data/jet/Male/subject 11'), WindowsPath('data/jet/Male/subject 40'), WindowsPath('data/jet/Male/subject 34')]
Collecting data: 43 of 43
test data distribution:  [585. 482. 436. 394. 490. 392.]
train data distribution:  [4127. 3204. 3217. 2673. 3757. 2864.]
train input shape: (19842, 257, 257, 3)
test inshape: (2779, 257, 257, 3)
train output shape: (19842, 6)
test output: (2779, 6)


## 3 - Create model

In [3]:
# model: conv --> dropout --> maxpool --> conv --> dropout --> dense --> dense
# first convolutional layer --> dropout --> max pool 
first_filter_width = 8
first_filter_height = 20
first_filter_count = 64
first_strides = [1, 1]
first_conv = Conv2D(first_filter_count 
                    , kernel_size=(first_filter_height, first_filter_width)
                    , strides=first_strides
                    , padding='same'
                    , activation='relu'
                    , input_shape=input_shape)
 

dropout_prob=0.5
first_dropout = Dropout(rate=dropout_prob)

pooling_size=2
pooling_strides=2
first_max_pool = MaxPooling2D(pool_size=pooling_size
                              , strides=pooling_strides)

# second conv layer --> dropout
second_filter_width = 4
second_filter_height = 10
second_filter_count = 64
second_strides = [1, 1]
second_conv = Conv2D(second_filter_count 
                    , kernel_size=(second_filter_height, second_filter_width)
                    , strides=second_strides
                    , padding='same'
                    , activation='relu')

second_dropout=Dropout(rate=dropout_prob)

third_units=500
third_dense = Dense(units=third_units
                   , activation='relu')

fourth_dense = Dense(units=num_classes
                    , activation='softmax')


model = Sequential()
model.add(keras.layers.AlphaDropout(0.2, noise_shape=None, seed=None, input_shape=input_shape))
model.add(keras.layers.GaussianNoise(0.2))
model.add(first_conv)
model.add(first_dropout)
model.add(first_max_pool)
# model.add(Conv2D(first_filter_count 
#                     , kernel_size=(first_filter_height, first_filter_width)
#                     , strides=first_strides
#                     , padding='same'
#                     , activation='relu'
#                     , input_shape=input_shape))
# model.add(Dropout(rate=dropout_prob))
# model.add(MaxPooling2D(pool_size=pooling_size
#                               , strides=pooling_strides))

# model.add(Conv2D(128 
#                     , kernel_size=(10, 4)
#                     , strides=first_strides
#                     , padding='same'
#                     , activation='relu'
#                     , input_shape=input_shape))
# model.add(Dropout(rate=dropout_prob))
# model.add(MaxPooling2D(pool_size=pooling_size
#                               , strides=pooling_strides))

model.add(Conv2D(256 
                    , kernel_size=(10, 4)
                    , strides=first_strides
                    , padding='same'
                    , activation='relu'
                    , input_shape=input_shape))
model.add(Dropout(rate=dropout_prob))
model.add(MaxPooling2D(pool_size=pooling_size
                              , strides=pooling_strides))



model.add(second_conv)
model.add(second_dropout)
model.add(Flatten())
model.add(third_dense)
model.add(Dense(units=200
                , activation='relu'))
model.add(fourth_dense)

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=1e-4),
              metrics=['accuracy'])

## 4 - Train

In [None]:
class TrainValTensorBoard(keras.callbacks.TensorBoard):
    def __init__(self, log_dir='./logs', **kwargs):
        # Make the original `TensorBoard` log to a subdirectory 'training'
        training_log_dir = os.path.join(log_dir, 'training')
        super(TrainValTensorBoard, self).__init__(training_log_dir, **kwargs)

        # Log the validation metrics to a separate subdirectory
        self.val_log_dir = os.path.join(log_dir, 'validation')

    def set_model(self, model):
        # Setup writer for validation metrics
        self.val_writer = tf.summary.FileWriter(self.val_log_dir)
        super(TrainValTensorBoard, self).set_model(model)

    def on_epoch_end(self, epoch, logs=None):
        # Pop the validation logs and handle them separately with
        # `self.val_writer`. Also rename the keys so that they can
        # be plotted on the same figure with the training metrics
        logs = logs or {}
        val_logs = {k.replace('val_', ''): v for k, v in logs.items() if k.startswith('val_')}
        for name, value in val_logs.items():
            summary = tf.Summary()
            summary_value = summary.value.add()
            summary_value.simple_value = value.item()
            summary_value.tag = name
            self.val_writer.add_summary(summary, epoch)
        self.val_writer.flush()

        # Pass the remaining logs to `TensorBoard.on_epoch_end`
        logs = {k: v for k, v in logs.items() if not k.startswith('val_')}
        super(TrainValTensorBoard, self).on_epoch_end(epoch, logs)

    def on_train_end(self, logs=None):
        super(TrainValTensorBoard, self).on_train_end(logs)
        self.val_writer.close()

In [None]:
batch_size = 40
epochs = 40

logdir = "_tf_logs/" + datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
tb = TrainValTensorBoard(log_dir=logdir)

chkpnt_dir = "E:/ckpnt/"
filepath= chkpnt_dir +"weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

model.fit(train_x, train_y,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
#           validation_split=0.2,
          validation_data=(test_x, test_y),
          callbacks=[tb, checkpoint])
score = model.evaluate(test_x, test_y, verbose=1)
print(score)

Train on 19842 samples, validate on 2779 samples
Epoch 1/40

Epoch 00001: val_acc improved from -inf to 0.38971, saving model to E:/ckpnt/weights-improvement-01-0.39.hdf5
Epoch 2/40

Epoch 00002: val_acc improved from 0.38971 to 0.39043, saving model to E:/ckpnt/weights-improvement-02-0.39.hdf5
Epoch 3/40

Epoch 00003: val_acc improved from 0.39043 to 0.43973, saving model to E:/ckpnt/weights-improvement-03-0.44.hdf5
Epoch 4/40

Epoch 00004: val_acc did not improve
Epoch 5/40

Epoch 00005: val_acc improved from 0.43973 to 0.47967, saving model to E:/ckpnt/weights-improvement-05-0.48.hdf5
Epoch 6/40

Epoch 00006: val_acc did not improve
Epoch 7/40

Epoch 00007: val_acc did not improve
Epoch 8/40

Epoch 00008: val_acc did not improve
Epoch 9/40

Epoch 00009: val_acc did not improve
Epoch 10/40

Epoch 00010: val_acc did not improve
Epoch 11/40

Epoch 00011: val_acc did not improve
Epoch 12/40

Epoch 00012: val_acc did not improve
Epoch 13/40

Epoch 00013: val_acc did not improve
Epoch 14/

In [6]:
pred = model.predict(test_x)
print(np.sum(pred, axis=0))


[1.6779497e-14 1.5544706e-16 0.0000000e+00 5.2727147e-23 2.7790000e+03
 6.1526601e-15]


In [None]:

batch_size = 128
num_classes = 10
epochs = 10

# input image dimensions
img_x, img_y = 28, 28

# load the MNIST data set, which already splits into train and test sets for us
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# reshape the data into a 4D tensor - (sample_number, x_img_size, y_img_size, num_channels)
# because the MNIST is greyscale, we only have a single channel - RGB colour images would have 3
x_train = x_train.reshape(x_train.shape[0], img_x, img_y, 1)
x_test = x_test.reshape(x_test.shape[0], img_x, img_y, 1)
input_shape = (img_x, img_y, 1)

# convert the data to the right type
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices - this is for use in the
# categorical_crossentropy loss below
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print('y_train shape:', y_train.shape)
# model: conv --> dropout --> maxpool --> conv --> dropout --> dense --> dense
# first convolutional layer --> dropout --> max pool 
first_filter_width = 8
first_filter_height = 20
first_filter_count = 64
first_strides = [1, 1]

first_conv = Conv2D(first_filter_count 
                    , kernel_size=(first_filter_height, first_filter_width)
                    , strides=first_strides
                    , padding='same'
                    , activation='relu'
                    , input_shape=input_shape)
 

dropout_prob=0.5
first_dropout = Dropout(rate=dropout_prob)

pooling_size=2
pooling_strides=2
first_max_pool = MaxPooling2D(pool_size=pooling_size
                              , strides=pooling_strides)

# second conv layer --> dropout
second_filter_width = 4
second_filter_height = 10
second_filter_count = 64
second_strides = [1, 1]
second_conv = Conv2D(second_filter_count 
                    , kernel_size=(second_filter_height, second_filter_width)
                    , strides=second_strides
                    , padding='same'
                    , activation='relu')

second_dropout=Dropout(rate=dropout_prob)

third_units=200
third_dense = Dense(units=third_units
                   , activation='relu')

fourth_dense = Dense(units=num_classes
                    , activation='softmax')


model = Sequential()
model.add(first_conv)
model.add(first_dropout)
model.add(first_max_pool)
model.add(second_conv)
model.add(second_dropout)
model.add(Flatten())
model.add(third_dense)
model.add(fourth_dense)

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=1e-3),
              metrics=['accuracy'])



model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_split=0.1,
          callbacks=[])

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
y_train shape: (60000, 10)
Train on 54000 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10