# Import libraries

In [1]:
from tensorflow.contrib.keras.python.keras import applications, layers
from tensorflow.contrib.keras.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.contrib.keras.python.keras import optimizers
from tensorflow.contrib.keras.python.keras.models import Sequential, Model
from tensorflow.contrib.keras.python.keras.layers import Dropout, Flatten, Dense, Input, ZeroPadding2D, Conv2D, BatchNormalization, Activation, MaxPooling2D, AveragePooling2D, GlobalAveragePooling2D
from tensorflow.contrib.keras.python.keras.callbacks import EarlyStopping
from tensorflow.contrib.keras.python.keras.optimizers import Adam

# from tensorflow.contrib.keras.python.keras.applications.resnet50 import ResNet50
from tensorflow.contrib.keras.python.keras.applications.imagenet_utils import _obtain_input_shape
from tensorflow.contrib.keras.python.keras import backend as K

from residual_funcs import *

import numpy as np
import matplotlib.pyplot as plt

In [2]:
def get_emotion(ohv):
    if ohv.shape[0] == 1:
        indx = ohv[0]
    else:
        indx = np.argmax(ohv)
        
    if indx == 0:
        return 'angry'
    elif indx == 1:
        return 'disgust'
    elif indx == 2:
        return 'fear'
    elif indx == 3:
        return 'happy'
    elif indx == 4:
        return 'sad'
    elif indx == 5:
        return 'surprise'
    elif indx == 6:
        return 'neutral'

# Create the base model ResNet50

In [3]:
# Determine proper input shape
input_shape = _obtain_input_shape(None,
                                default_size=224,
                                min_size=197,
                                data_format=K.image_data_format(),
                                include_top=False)

img_input = Input(shape=input_shape)

In [4]:
bn_axis = 3

x = ZeroPadding2D((3, 3))(img_input)
x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)
x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = Activation('relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)

x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')

x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')

x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')

x = AveragePooling2D((7, 7), name='avg_pool')(x)

In [5]:
inputs = img_input

# Create model.
base_model = Model(inputs, x, name='resnet50')

In [6]:
# Load trained model
base_model.load_weights('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5')

# FC layer

In [7]:
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add 2 fully-connected layers
x = Dense(1204, activation='relu')(x)
x = Dropout(0.5)(x)
# and a logistic layer -- let's say we have 2 classes
predictions = Dense(7, activation='softmax')(x)

In [8]:
# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [9]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional ResNet50 layers
for layer in base_model.layers:
    layer.trainable = False

# Compile model

In [10]:
# compile the model (should be done *after* setting layers to non-trainable)
opt = Adam(lr=0.0001, decay=10e-6)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Fit model

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

# fer2013 ------------------------------------------------
# train_datagen = ImageDataGenerator()
# train_generator = train_datagen.flow_from_directory(
#      "./data/fer2013/train",
#     target_size=(224, 244),
#     batch_size=128,
#     class_mode='binary')

# val_datagen = ImageDataGenerator()
# val_generator = val_datagen.flow_from_directory(
#      "./data/fer2013/test1",
#     target_size=(224, 244),
#     batch_size=1,
#     class_mode='binary')

# afew2017 ------------------------------------------------
train_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_directory(
     "E:/EmotiW2017/Train_AFEW/face_images",
    target_size=(224, 244),
    batch_size=128,
    class_mode='binary')

val_datagen = ImageDataGenerator()
val_generator = val_datagen.flow_from_directory(
     "E:/EmotiW2017/Val_AFEW/face_images",
    target_size=(224, 244),
    batch_size=1,
    class_mode='binary')

model.fit_generator(generator=train_generator, steps_per_epoch=323,
                    epochs=200, verbose=1, callbacks=[early_stopping],
                    validation_data=val_generator, validation_steps=19871)

Found 41225 images belonging to 7 classes.
Found 19871 images belonging to 7 classes.
Epoch 1/200
Epoch 2/200

In [None]:
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

# Evaluate model

In [None]:
# scores = model.evaluate(x_testing, y_testing)
# print('%s: %.2f%%'% (model.metrics_names[1], scores[1]*100))

In [None]:
# model.save('fer2013.h5')
model.save_weights('emotiw2017_mxnet_cascade_cnn_face_resnet50_weights.h5')

# Load trained model

In [None]:
# model.load_weights('fer2013_weights.h5')

In [None]:
# scores = model.evaluate(x_testing, y_testing)
# print('%s: %.2f%%'% (model.metrics_names[1], scores[1]*100))

***
# Test trained model

In [None]:
# # 2163
# img_indx = np.uint32(np.random.rand()*(testingset.shape[0] - 1))
# sample = x_testing[img_indx, :]
# sample = sample.reshape(48, 48)

# pred_cls = model.predict_classes(sample.reshape(1, 48, 48, 1))

# plt.imshow(sample, cmap='gray')
# plt.show()
# print('> testing image index: %d\n> true emotion: %s\n> predicted emotion: %s' % (img_indx, get_emotion(y_testing[img_indx, :]), get_emotion(pred_cls)))

***
# Partial accuracy

In [None]:
# for emo_indx in range(0, n_classes):
#     data_for_class = testingset[testingset[:, 2304 + emo_indx] == 1]
#     x_data = data_for_class[:, 0:2304]
#     x_data = x_data.reshape(x_data.shape[0], 48, 48)
#     x_data = np.expand_dims(x_data, axis=4)

#     y_data = data_for_class[:, 2304:2304 + n_classes]

#     scores = model.evaluate(x_data, y_data, batch_size=32, verbose=0)
#     print('> Accuracy %.2f%% for <%s>'% (scores[1]*100, get_emotion(np.array([emo_indx]))))