# Race Prediction on LFW

## Import Libraries

In [86]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Convolution2D,MaxPooling2D,Dense,Dropout,Activation,Flatten
from keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard
from keras.utils import to_categorical
import seaborn as sns
from sklearn.metrics import accuracy_score,classification_report
from sklearn.ensemble import RandomForestRegressor
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn import svm
import h5py
import cv2
from glob import glob
from tqdm import tqdm,trange
import re

In [101]:
train_size=10000
num_epochs=50
batch_size = 16
train_steps = train_size//batch_size
val_steps = 3143//batch_size

## Load Data

In [37]:
lfwdf = pd.read_csv("../Data/lfw_attributes.txt",sep='\t')
filelist = glob("../Data/lfw-deepfunneled/**/*.jpg")

In [80]:
y_train = []
for i in range(len(lfwdf)):
    racemax = max(lfwdf["Asian"][i],lfwdf["White"][i],lfwdf["Black"][i],lfwdf["Indian"][i])
    if lfwdf["Asian"][i]==racemax:
        y_train.append(0)
    elif lfwdf["White"][i]==racemax:
        y_train.append(1)
    elif lfwdf["Black"][i]==racemax:
        y_train.append(2)
    else:
        y_train.append(3)
y_train=np.array(y_train)
print(np.unique(y_train,return_counts=True))
y_train = to_categorical(y_train)

(array([0, 1, 2, 3]), array([ 1318, 10651,   675,   499], dtype=int64))


In [39]:
file_loc = []
for i in lfwdf.loc[:,["person","imagenum"]].iterrows():
    person = re.sub(' ','_',i[1]["person"].strip())
    imgfile = person+"_{:04d}".format(i[1]["imagenum"])
    file_loc.append("../Data/lfw-deepfunneled\\{}\\{}.jpg".format(person,imgfile))
X_loc = file_loc

In [75]:
def load_image(filepath):
    img = cv2.imread(filepath)
    img = img[25:-25,25:-25,:]
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img = cv2.resize(img,(64,64))
    return img.astype(np.float32)/255.0

In [102]:
def get_batches(start_index=0,end_index=13143,batch_size=batch_size):
    rng_size = end_index-start_index
    num_batches = rng_size//batch_size
    while True:
        for i in range(num_batches):
            X = []
            for j in range(start_index+(batch_size*i),start_index+(batch_size*(i+1))):
                X.append(load_image(X_loc[j]))
            X = np.array(X)
            y = y_train[start_index+(batch_size*i):start_index+(batch_size*(i+1))]
            yield X,y

## VGG-16 Model

In [104]:
model = Sequential()
model.add(Convolution2D(32,(3,3),activation='relu',input_shape=(64,64,3),padding='same'))
model.add(Convolution2D(32,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Convolution2D(64,(3,3),activation='relu',padding='same'))
model.add(Convolution2D(64,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(Convolution2D(128,(3,3),activation='relu',padding='same'))
model.add(Convolution2D(128,(3,3),activation='relu',padding='same'))
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dense(4,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 64, 64, 32)        896       
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 64, 64, 32)        9248      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 32, 32, 64)        18496     
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 16, 16, 128)       73856     
__________

In [88]:
save_callback = ModelCheckpoint("../Models/LFW_GRAYSCALE_64/Race/model-{epoch:03d}-{val_loss:.4f}.hdf5",verbose=1)
es_callback = EarlyStopping(patience=3,verbose=1)
tb_callback = TensorBoard(log_dir='../Data/logdir/RaceNet/')
callback_list = [save_callback,es_callback,tb_callback]

## Training the Model

In [105]:
train_gen = get_batches(0,train_size,batch_size)
val_gen = get_batches(train_size,13143,batch_size)
model.fit_generator(train_gen,steps_per_epoch=train_steps,epochs=50,verbose=1,callbacks=callback_list,validation_data=val_gen,validation_steps=val_steps)

ResourceExhaustedError: OOM when allocating tensor with shape[32768,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: training_2/Adam/Variable_28/Assign = Assign[T=DT_FLOAT, _class=["loc:@training_2/Adam/Variable_28"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training_2/Adam/Variable_28, training_2/Adam/zeros_12)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'training_2/Adam/Variable_28/Assign', defined at:
  File "c:\users\nitk\appdata\local\programs\python\python36\Lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "c:\users\nitk\appdata\local\programs\python\python36\Lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "c:\users\nitk\envs\facegan\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "c:\users\nitk\envs\facegan\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "c:\users\nitk\envs\facegan\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "c:\users\nitk\envs\facegan\lib\site-packages\tornado\platform\asyncio.py", line 112, in start
    self.asyncio_loop.run_forever()
  File "c:\users\nitk\appdata\local\programs\python\python36\Lib\asyncio\base_events.py", line 421, in run_forever
    self._run_once()
  File "c:\users\nitk\appdata\local\programs\python\python36\Lib\asyncio\base_events.py", line 1425, in _run_once
    handle._run()
  File "c:\users\nitk\appdata\local\programs\python\python36\Lib\asyncio\events.py", line 126, in _run
    self._callback(*self._args)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tornado\platform\asyncio.py", line 102, in _handle_events
    handler_func(fileobj, events)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "c:\users\nitk\envs\facegan\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "c:\users\nitk\envs\facegan\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "c:\users\nitk\envs\facegan\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "c:\users\nitk\envs\facegan\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "c:\users\nitk\envs\facegan\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "c:\users\nitk\envs\facegan\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "c:\users\nitk\envs\facegan\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "c:\users\nitk\envs\facegan\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "c:\users\nitk\envs\facegan\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "c:\users\nitk\envs\facegan\lib\site-packages\IPython\core\interactiveshell.py", line 2856, in run_ast_nodes
    if self.run_code(code, result):
  File "c:\users\nitk\envs\facegan\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-105-e9e806e58a74>", line 3, in <module>
    model.fit_generator(train_gen,steps_per_epoch=train_steps,epochs=50,verbose=1,callbacks=callback_list,validation_data=val_gen,validation_steps=val_steps)
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\models.py", line 1276, in fit_generator
    initial_epoch=initial_epoch)
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\engine\training.py", line 2080, in fit_generator
    self._make_train_function()
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\engine\training.py", line 990, in _make_train_function
    loss=self.total_loss)
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\optimizers.py", line 458, in get_updates
    vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\optimizers.py", line 458, in <listcomp>
    vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\backend\tensorflow_backend.py", line 694, in zeros
    return variable(v, dtype=dtype, name=name)
  File "c:\users\nitk\envs\facegan\lib\site-packages\keras\backend\tensorflow_backend.py", line 395, in variable
    v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tensorflow\python\ops\variables.py", line 233, in __init__
    constraint=constraint)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tensorflow\python\ops\variables.py", line 371, in _init_from_args
    validate_shape=validate_shape).op
  File "c:\users\nitk\envs\facegan\lib\site-packages\tensorflow\python\ops\state_ops.py", line 280, in assign
    validate_shape=validate_shape)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tensorflow\python\ops\gen_state_ops.py", line 61, in assign
    use_locking=use_locking, name=name)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tensorflow\python\framework\ops.py", line 3271, in create_op
    op_def=op_def)
  File "c:\users\nitk\envs\facegan\lib\site-packages\tensorflow\python\framework\ops.py", line 1650, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[32768,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: training_2/Adam/Variable_28/Assign = Assign[T=DT_FLOAT, _class=["loc:@training_2/Adam/Variable_28"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training_2/Adam/Variable_28, training_2/Adam/zeros_12)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [14]:
model.load_weights("../Models/LFW_GRAYSCALE_64/Race/no-dropout-001-0.7030.hdf5")
y_pred = model.predict(X_train)

In [15]:
y_pred_int = []
for i in range(len(y_pred)):
    for j in range(len(y_pred[i])):
        if y_pred[i][j]==max(y_pred[i]):
            y_pred_int.append(j)
            break

## Results

In [17]:
print(accuracy_score(y_train,y_pred_int))

0.81039336529
