# Race Prediction on LFW

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Convolution2D,MaxPooling2D,Dense,Dropout,Activation,Flatten
from keras.callbacks import ModelCheckpoint,EarlyStopping
import seaborn as sns
from sklearn.metrics import accuracy_score,classification_report
from sklearn.ensemble import RandomForestRegressor
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn import svm
import h5py
import cv2

Using TensorFlow backend.


## Load Data

In [10]:
lfwdf = pd.read_csv("../Data/lfw_attributes.txt",sep='\t')
imgdf = pd.read_csv("../Data/lfw_grayscale_64.csv",header=None)

In [11]:
y_train = []
for i in range(len(lfwdf)):
    racemax = max(lfwdf["Asian"][i],lfwdf["White"][i],lfwdf["Black"][i],lfwdf["Indian"][i])
    if lfwdf["Asian"][i]==racemax:
        y_train.append(0)
    elif lfwdf["White"][i]==racemax:
        y_train.append(1)
    elif lfwdf["Black"][i]==racemax:
        y_train.append(2)
    else:
        y_train.append(3)
y_train=np.array(y_train)
print(np.unique(y_train,return_counts=True))

(array([0, 1, 2, 3]), array([ 1318, 10651,   675,   499], dtype=int64))


In [12]:
np.random.seed(123)
X_train = (imgdf.iloc[:len(y_train),1:].values.astype(float).reshape(-1,64,64,1))/255
print(X_train.shape)

(13143, 64, 64, 1)


## VGG-16 Model

In [13]:
model = Sequential()
model.add(Convolution2D(32,(3,3),activation='relu',input_shape=(64,64,1),padding='same'))
model.add(Convolution2D(32,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Convolution2D(64,(3,3),activation='relu',padding='same'))
model.add(Convolution2D(64,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Convolution2D(128,(3,3),activation='relu',padding='same'))
model.add(Convolution2D(128,(3,3),activation='relu',padding='same'))
model.add(Flatten())
model.add(Dense(1024,activation='relu'))
model.add(Dense(1024,activation='relu'))
model.add(Dense(4,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 64, 64, 32)        320       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 64, 64, 32)        9248      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 16, 16, 128)       73856     
__________

In [29]:
save_callback = ModelCheckpoint("../Models/LFW_GRAYSCALE_64/Race/no-dropout-{epoch:03d}-{val_loss:.4f}.hdf5",verbose=1)
es_callback = EarlyStopping(patience=5,verbose=1)
callback_list = [save_callback,es_callback]

## Training the Model

In [30]:
model.fit(X_train,y_train,batch_size=32,epochs=100,verbose=1,callbacks=callback_list,validation_data=(X_test,y_test))

Train on 9857 samples, validate on 3286 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
 128/9857 [..............................] - ETA: 3:06 - loss: 0.6886 - acc: 0.8047

KeyboardInterrupt: 

In [14]:
model.load_weights("../Models/LFW_GRAYSCALE_64/Race/no-dropout-001-0.7030.hdf5")
y_pred = model.predict(X_train)

In [15]:
y_pred_int = []
for i in range(len(y_pred)):
    for j in range(len(y_pred[i])):
        if y_pred[i][j]==max(y_pred[i]):
            y_pred_int.append(j)
            break

## Results

In [17]:
print(accuracy_score(y_train,y_pred_int))

0.81039336529
