# Labeled Faces in the Wild

### Environment Setup
Please refer to the report `README.pdf` section  


In [None]:
# Imports
import numpy as np

from keras import backend
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Dense, Dropout, Activation, Flatten, Reshape

from sklearn.cross_validation import train_test_split

In [None]:
# Set image dimension ordering convention to follow theano
backend.set_image_dim_ordering('th')

In [None]:
def load_train_data():
    image_data = np.load('X_train.npy')
    image_data = image_data.reshape((image_data.shape[0], 50, 37))
    image_data = image_data[:, np.newaxis, :, :]
    image_labels = np.load('y_train.npy')
    return image_data, image_labels

def split_train_data(image_data, image_labels, train_size=0.9, random_state=20):
    return train_test_split(image_data, image_labels, train_size=train_size, random_state = random_state)

num_classes = 7 # Define the number of classes in the classification problem

# Load the training data
image_data, image_labels = load_train_data()

# Split the training data into train and test data for evaluation of the model
X_train, X_test, y_train, y_test = split_train_data(image_data, image_labels)

X_train.astype('float32')
X_test.astype('float32')
X_train /= 255 # Normalize values to be in range of 0 to 1
X_test /= 255 # Normalize values to be in range of 0 to 1

# Map each label to an array of 7 with value 1 at the position denoting the class number, and the rest 0
# e.g. label 0 will be represented as [1, 0, 0, 0, 0, 0, 0]
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

In [None]:
# Preprocess image
datagen = ImageDataGenerator(rotation_range=25, 
                             width_shift_range=0.2, 
                             height_shift_range=0.2, 
                             horizontal_flip=False,
                             fill_mode='nearest')
datagen.fit(X_train)

In [None]:
def construct_convolutional_neural_net_model():
    model = Sequential()
    model.add(Convolution2D(32, 3, 3, input_shape=(1, 50, 37)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Convolution2D(32, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Convolution2D(64, 3, 3))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    model.add(Dense(7))
    model.add(Activation('softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer="adam", metrics=['accuracy'])
    
    return model

# Construct neural net model
model = construct_convolutional_neural_net_model()
model.summary()

In [None]:
def train_model(model, datagen, X_train, y_train, batch_size, nb_epoch):
    model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), 
                        samples_per_epoch=len(X_train), 
                        nb_epoch=nb_epoch, 
                        verbose=1)
    
train_model(model, datagen, X_train, y_train, 64, 300)

In [None]:
def get_label(array):
    for i in range(len(array)):
        if array[i] == 1:
            return i
    
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print('Loss: {0}'.format(loss))
    print('Accuracy: {0}'.format(accuracy))
    total = [0]*7
    correct = [0]*7
    
    prediction = model.predict(X_test)
    for i,probs in enumerate(prediction):
        correct_class = get_label(y_test[i])
        total[correct_class] += 1
        pred_class = np.argmax(probs)
        if correct_class == pred_class:
            correct[correct_class] += 1
        print('%d %d'%(correct_class, pred_class))
    print('Correct: {0}'.format(correct))
    print('Total: {0}'.format(total))
    
evaluate_model(model, X_test, y_test)

In [None]:
def load_test_data(filename):
    x_test = np.load(filename)
    x_test = x_test.reshape((x_test.shape[0], 50, 37))
    x_test = x_test[:, np.newaxis, :, :]
    return x_test

def output_to_csv(filename, prediction):
    fo = open(filename, 'w')
    fo.write("ImageId,PredictedClass\n")

    for i in range(len(prediction)):
        pred_class = np.argmax(prediction[i])
        fo.write(str(i) + "," + str(pred_class) + "\n")

    fo.close()

# Load the real test data
x_test = load_test_data('X_test.npy')
x_test /= 255 # Normalize values to be in range of 0 to 1

# Predict the real test data with the trained model
prediction = model.predict(x_test)

# Print the result of the prediction to csv file
output_to_csv('result3.csv', prediction)