# Challenges in Representation Learning: Facial Expression Recognition Challenge
### _Learn facial expressions from an image_
#### https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/overview

1. **[Project Background](#1)**
2. **[Data Cleaning](#2)**
3. **[Exploratory Data Analysis](#3)**
4. **[Machine Learning](#4)**

# 1. Project Background <a id="1"></a>

The data consists of 48x48 pixel grayscale images of faces. The faces have been automatically registered so that the face is more or less centered and occupies about the same amount of space in each image. The task is to categorize each face based on the emotion shown in the facial expression in to one of seven categories (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral).

# 2, Data Cleaning<a id="2"></a>

### 2.1 Load the raw data

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

from keras import models
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
#from keras.optimizers import RMSprop,Adam
from tensorflow.keras.optimizers import RMSprop
#from keras.utils import to_categorical
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import MultiLabelBinarizer

In [None]:

# train_df_original = pd.read_csv('kaggle/input/challenges-in-representation-learning-facial-expression-recognition-challenge/train.csv')
# test_df_original = pd.read_csv('kaggle/input/challenges-in-representation-learning-facial-expression-recognition-challenge/test.csv')
# example_submission_df_original = pd.read_csv('kaggle/input/challenges-in-representation-learning-facial-expression-recognition-challenge/example_submission.csv')
icml_face_data_df_original = pd.read_csv('/kaggle/input/challenges-in-representation-learning-facial-expression-recognition-challenge/icml_face_data.csv')

In [None]:

icml_face_data_df = icml_face_data_df_original.copy()

In [None]:

icml_face_data_df.head()

In [None]:
icml_face_data_df[' Usage'].value_counts()

In [None]:

def pixels_to_array(pixels):
    array = np.array(pixels.split(),'float64')
    return array

def image_reshape(data):
    image = np.reshape(data.to_list(),(data.shape[0],48,48,1))
    return image

In [None]:
icml_face_data_df[' pixels'] = icml_face_data_df[' pixels'].apply(pixels_to_array)

In [None]:
train_df = icml_face_data_df[icml_face_data_df[' Usage']=='Training']
val_df = icml_face_data_df[icml_face_data_df[' Usage']=='PrivateTest']
test_df = icml_face_data_df[icml_face_data_df[' Usage']=='PublicTest']

In [None]:
X_train = image_reshape(train_df[' pixels'])
y_train = train_df['emotion']

In [None]:
X_val = image_reshape(val_df[' pixels'])
y_val = val_df['emotion']

In [None]:
X_test = image_reshape(test_df[' pixels'])
y_test = test_df['emotion']

# 3, Exploratory Data Analysis <a id="3"></a>

### 3.1 Label Distribution

In [None]:
emotion_prop = (train_df.emotion.value_counts() / len(train_df)).to_frame().sort_index(ascending=True)
emotions = ['Angry','Disgust','Fear','Happy','Sad','Surprise','Neutral']

plt.figure(figsize=[12,6])

plt.bar(x=emotions, height=emotion_prop['emotion'], edgecolor='black')
    
plt.xlabel('Emotion')
plt.ylabel('Proportion')
plt.title('Proportion of Emotion Labels')
plt.show()

In [None]:
class_weight = dict(zip(range(0, 7), (((y_train.value_counts()).sort_index())/len(train_df['emotion'])).tolist()))
class_weight

### 3.2 View Sample of Images

In [None]:
img_ind_list = []
for i in range(7):
    img_ind_list.extend(train_df[train_df['emotion']==i][:5].index.tolist())
    
fig = plt.figure(figsize=(30,50))
for i,ind in enumerate(img_ind_list):
  ax = fig.add_subplot(7, 5, i + 1)
  ax.imshow(X_train[ind],cmap="gray")
  ax.set_title(emotions[y_train[ind]])
  ax.axis("off")
plt.show()

# 5, Machine Learning <a id="5"></a>

### 5.1 Split Data

In [None]:
X_train = X_train.astype('float32')/255
X_val = X_val.astype('float32')/255
X_test = X_test.astype('float32')/255

In [None]:
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test_original = y_test.copy()
y_test = to_categorical(y_test)

### 5.2 Build Model
- We define a simple CNN model

In [None]:

model = models.Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)))
model.add(MaxPool2D((2, 2))) 
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(7, activation='softmax'))

In [None]:
model.compile(optimizer=Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

### 5.3 Train Model

In [None]:
h1 = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    class_weight = class_weight,
                    epochs=15,
                    batch_size=64)

### 5.4 Evaluate Model

In [None]:
#17
history = h1.history

epoch_range = range(1, len(history['loss'])+1)

fig = plt.figure(figsize=(14,4))
ax = fig.add_subplot(1, 2, 1)
ax.plot(epoch_range, history['loss'], label='Training')
ax.plot(epoch_range, history['val_loss'], label='Validation')
ax.set_xlabel('Epoch'); plt.ylabel('Loss'); plt.title('Loss')
ax.legend()
ax = fig.add_subplot(1,2,2)
ax.plot(epoch_range, history['accuracy'], label='Training')
ax.plot(epoch_range, history['val_accuracy'], label='Validation')
ax.set_xlabel('Epoch'); plt.ylabel('Accuracy'); plt.title('Accuracy')
ax.legend()
plt.show()

In [None]:
#18
test_loss, test_acc = model.evaluate(X_test, y_test)
# 0.5188

### 5.6 Analyse Wrong Prediction

In [None]:
pred_test = model.predict(X_test)

In [None]:
df_compare = pd.DataFrame()
df_compare['real'] = y_test.argmax(axis=1)
df_compare['pred'] = pred_test.argmax(axis=1)
df_compare['wrong'] = np.where(df_compare['real']!=df_compare['pred'], 1, 0)

In [None]:
conf_mat = confusion_matrix(y_test.argmax(axis=1), pred_test.argmax(axis=1))

fig, ax = plot_confusion_matrix(conf_mat=conf_mat,
                                show_normed=True,
                                show_absolute=False,
                                class_names=emotions,
                                figsize=(8, 8))
fig.show()

### 5.7 Analyse Results

In [None]:
#20
y_test_reset_index = y_test_original.reset_index()
y_test_reset_index = y_test_reset_index.drop('index', axis=1)
emotions_ind_list = []
for n in range(7):
    emotions_ind_list.append(y_test_reset_index[y_test_reset_index.values == n].index[0])

In [None]:
#21
for i, rand_ind in enumerate(emotions_ind_list):
    print
    fig = plt.figure(figsize=(20,8))
    temp = model.predict(X_test[rand_ind].reshape((1,48,48,1)))

    ax = fig.add_subplot(1, 2, 1)
    ax.imshow(X_test[rand_ind],cmap="gray")
    ax.set_title(emotions[y_test_reset_index['emotion'][rand_ind]])
    ax.axis("off")
    ax_2 = fig.add_subplot(1, 2, 2)
    ax_2.bar(emotions, temp.tolist()[0], align="center")
    ax_2.set_title('Predict')
    plt.show()