# Age Prediction on LFW

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Convolution2D,MaxPooling2D,Dense,Dropout,Activation,Flatten,BatchNormalization
from keras.callbacks import ModelCheckpoint,EarlyStopping
import seaborn as sns
from sklearn.metrics import accuracy_score,classification_report
from sklearn.ensemble import RandomForestRegressor
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn import svm
import glob
import h5py
import cv2
from tqdm import trange,tqdm
from ipywidgets import interact
import ipywidgets as widgets

Using TensorFlow backend.


## Define constants

In [2]:
num_epochs = 5
batch_size = 32
train_size = 10000
filelist = np.array(glob.glob("../Data/lfw-deepfunneled/**/*.jpg"))

In [3]:
lfwdf = pd.read_csv("../Data/lfw_attributes.txt",sep='\t')

In [4]:
zippy = np.array(list(zip(lfwdf["person"].values,lfwdf["imagenum"].values)))
zippy = [[i[0].replace(' ','_'),"{}_{:04d}".format(i[0].replace(' ','_'),int(i[1]))] for i in zippy]
zippy = np.array(["../Data/lfw-deepfunneled\\{}\\{}.jpg".format(i[0],i[1]) for i in zippy])
lfwdf = lfwdf.assign(merger=zippy)
filelist = pd.DataFrame(filelist)
lfwdf = lfwdf.merge(filelist,left_on=["merger"],right_on=filelist.columns[0],copy=False)
lfwdf.drop(lfwdf.columns[-1],axis=1,inplace=True)
lfwdf.head()

Unnamed: 0,person,imagenum,Male,Asian,White,Black,Baby,Child,Youth,Middle Aged,...,5 o' Clock Shadow,Strong Nose-Mouth Lines,Wearing Lipstick,Flushed Face,High Cheekbones,Brown Eyes,Wearing Earrings,Wearing Necktie,Wearing Necklace,merger
0,Aaron Eckhart,1,1.568346,-1.889043,1.737203,-0.929729,-1.471799,-0.19558,-0.835609,-0.351468,...,1.166118,-1.164916,-1.13999,-2.371746,-1.299932,-0.414682,-1.144902,0.694007,-0.826609,../Data/lfw-deepfunneled\Aaron_Eckhart\Aaron_E...
1,Aaron Guiel,1,0.169851,-0.982408,0.422709,-1.282184,-1.36006,-0.867002,-0.452293,-0.197521,...,-0.39768,0.87416,-0.945431,-0.268649,-0.006244,-0.030406,-0.480128,0.66676,-0.496559,../Data/lfw-deepfunneled\Aaron_Guiel\Aaron_Gui...
2,Aaron Patterson,1,0.997749,-1.364195,-0.157377,-0.756447,-1.891825,-0.871526,-0.862893,0.031445,...,1.884745,-0.999765,-1.359858,-1.912108,-1.095634,0.915126,-0.572332,0.144262,-0.841231,../Data/lfw-deepfunneled\Aaron_Patterson\Aaron...
3,Aaron Peirsol,1,1.122719,-1.997799,1.916144,-2.514214,-2.580071,-1.404239,0.057551,0.000196,...,-0.176089,1.108125,-1.600944,-3.264613,0.813418,0.308631,-0.848693,0.475941,-0.447025,../Data/lfw-deepfunneled\Aaron_Peirsol\Aaron_P...
4,Aaron Peirsol,2,1.078214,-2.008098,1.676211,-2.278056,-2.651845,-1.348408,0.649089,0.017656,...,-0.955283,0.119113,-1.128176,-3.161048,0.08268,-0.439614,-0.359859,-0.760774,-0.410152,../Data/lfw-deepfunneled\Aaron_Peirsol\Aaron_P...


### Load Data

In [5]:
def get_image(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    if(img.shape[0]!=img.shape[1]):
        img = img[25:225,25:225]
    img = cv2.resize(img,(64,64))
    img = ((img.astype(float).reshape(64,64,1))/255)
    return img

In [6]:
def get_batch(batch_size,step,start_index=0,end_index=train_size):
    i = step
    si = start_index+(i*batch_size)
    ei = start_index+((i+1)*batch_size)
    X = []
    for j in range(si,ei):
        X.append(get_image(lfwdf["merger"][j]))
    X = np.array(X)
    y = lfwdf[["Baby","Child","Youth","Middle Aged","Senior"]][si:ei].values
    for j in range(y.shape[0]):
        max_cmp = max(y[j])
        onehot = [1 if yj == max_cmp else 0 for yj in y[j]]
        y[j]=onehot
    return X,y

## VGG-16 Model

In [7]:
model = Sequential()
model.add(Convolution2D(32,(3,3),activation='relu',input_shape=(64,64,1),padding='same'))
model.add(BatchNormalization())
model.add(Convolution2D(32,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
model.add(Convolution2D(64,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Convolution2D(64,(3,3),activation='relu',padding='same'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
model.add(Convolution2D(128,(3,3),activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Convolution2D(128,(3,3),activation='relu',padding='same'))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(1024,activation='relu'))
model.add(BatchNormalization())
model.add(Dense(1024,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(5,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 64, 64, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 64, 64, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
batch_normalization_3 (Batch (None, 32, 32, 64)        256       
__________

## Training the Model

In [None]:
for epoch_i in range(num_epochs):
    print("Epoch {}".format(epoch_i+1))
    loss_avg = 0.0
    acc_avg = 0.0
    step = 0
    last_step = 0
    num_steps = train_size//batch_size
    trng = tqdm(range(num_steps),total=num_steps)
    for i in trng:
        X,y = get_batch(batch_size,i)
        loss,acc = model.train_on_batch(X,y)
        loss_avg+=loss
        acc_avg+=acc
        step+=1
        if(step%50)==0:
            trng.set_description("Loss: {:.4f}\tAcc: {:.4f}".format(loss_avg/(step-last_step),acc_avg/(step-last_step)))
            trng.refresh()
            loss_avg = 0.0
            acc_avg = 0.0
            last_step = step
    model.save("../Models/LFW_GRAYSCALE_64/Age/model-{:03d}.hdf5".format(epoch_i+1))

Epoch 1


Loss: 1.7023	Acc: 0.3538: 100%|██████████████████████████████████████████████████████| 312/312 [04:14<00:00,  1.23it/s]


Epoch 2


Loss: 1.3032	Acc: 0.4806: 100%|██████████████████████████████████████████████████████| 312/312 [04:11<00:00,  1.24it/s]


Epoch 3


Loss: 1.2257	Acc: 0.5056:  65%|███████████████████████████████████▎                  | 204/312 [02:39<01:24,  1.28it/s]

## Results