In [1]:
from scipy.io import loadmat
from skimage import io
from skimage.transform import resize
from sklearn.model_selection import train_test_split
import keras

import pandas as pd
import numpy as np

from os.path import join

import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Image
from IPython.display import clear_output

from datetime import datetime
from dateutil.relativedelta import relativedelta
from datetime import timedelta

sample_size = 2000

Using TensorFlow backend.


In [2]:
class PlotProgress(keras.callbacks.Callback):
    
    def __init__(self, entity='loss'):
        self.entity = entity
        
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('{}'.format(self.entity)))
        self.val_losses.append(logs.get('val_{}'.format(self.entity)))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="{}".format(self.entity))
        plt.plot(self.x, self.val_losses, label="val_{}".format(self.entity))
        plt.legend()
        plt.show();

In [3]:
def get_date(matlab_datenum):
    matlab_datenum = int(matlab_datenum)
    python_datetime = datetime.fromordinal(int(matlab_datenum)) + timedelta(days=matlab_datenum%1) - timedelta(days = 366)
    return python_datetime

def get_age(row):
    try:
        age = relativedelta(row['photo_taken'], row['dob']).years
    except:
        age = None
    return age

def get_image(path, base='wiki_crop'):
    return io.imread(join(base, path))

In [4]:
raw_metadata = loadmat('./wiki_crop/wiki.mat').get('wiki')

In [5]:
flattened_metadata = np.squeeze(np.array([row for data in raw_metadata for rows in data for row in rows]))

In [6]:
metainfo_df = pd.DataFrame(
    flattened_metadata.T[:, :4],
    columns = ['dob', 'photo_taken', 'full_path', 'gender'])

In [7]:
metainfo_df['photo_taken'] = metainfo_df.photo_taken.apply(lambda x: datetime(x, 1, 1))
metainfo_df['dob'] = metainfo_df.dob.apply(get_date)
metainfo_df['age'] = metainfo_df.apply(get_age, axis=1)
metainfo_df = metainfo_df[metainfo_df.age > 0]
metainfo_df = metainfo_df[~metainfo_df.gender.isnull()]

In [8]:
metainfo_df.drop(columns=['dob', 'photo_taken'], inplace=True)
metainfo_df.head(), metainfo_df.shape

(                           full_path gender  age
 0  [17/10000217_1981-05-05_2009.jpg]      1   27
 1  [48/10000548_1925-04-04_1964.jpg]      1   38
 2    [12/100012_1948-07-03_2008.jpg]      1   59
 3  [65/10001965_1930-05-23_1961.jpg]      1   30
 4  [16/10002116_1971-05-31_2012.jpg]      0   40, (58440, 3))

In [9]:
metainfo_df['data'] = metainfo_df.full_path.apply(lambda path_array: get_image(path_array[0]))

In [13]:
metainfo_df = metainfo_df[metainfo_df.data.apply(lambda x: len(np.shape(x))) == 3]

In [None]:
metainfo_df['data'] = metainfo_df.data.apply(lambda img: resize(img, (100, 100), anti_aliasing= True) )

  warn("The default mode, 'constant', will be changed to 'reflect' in "


In [None]:
metainfo_df.isnull().apply(sum)

In [None]:
faces   = np.array([img for img in metainfo_df.data])
labels  = metainfo_df[['age', 'gender']].values

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(faces.astype('float32'), 
                                                      labels.astype('float32'),
                                                      test_size=0.2,
                                                      random_state=123)

In [None]:
y_train_age    = Y_train[:, 0]
y_test_age    = Y_test[:, 0]
y_train_gender = Y_train[:, 1]
y_test_gender = Y_test[:, 1]

In [None]:
import keras
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

from keras.utils import plot_model

In [None]:
filter_size = (3, 3)
maxpool_size = (2, 3)
dr = 0.3

inputs = Input(shape=X_train[0].shape, name='main_input')

main_branch = Conv2D(16, kernel_size=filter_size, padding="same")(inputs)
main_branch = Activation("relu")(main_branch)
main_branch = MaxPooling2D(pool_size=maxpool_size)(main_branch)
main_branch = Dropout(dr)(main_branch)

main_branch = Conv2D(8, kernel_size=filter_size, padding="same")(inputs)
main_branch = Activation("relu")(main_branch)
main_branch = MaxPooling2D(pool_size=maxpool_size)(main_branch)
main_branch = Dropout(dr)(main_branch)

main_branch = Flatten()(main_branch)
main_branch = Dense(16)(main_branch)
main_branch = Activation('relu')(main_branch)

In [None]:
age_branch = Dense(1, activation='relu', name='age_output')(main_branch)
gender_branch = Dense(2, activation='softmax', name='gender_output')(main_branch)

In [None]:
model = Model(inputs = inputs,
     outputs = [age_branch, gender_branch])
model.summary()

In [None]:
plot_model(model)
Image(retina=True, filename='model.png')

In [None]:
opt = keras.optimizers.RMSprop(lr= 0.001)
model.compile(optimizer='rmsprop',
              loss={'age_output': 'mse', 'gender_output': 'sparse_categorical_crossentropy'},
              loss_weights={'age_output': .001, 'gender_output': 1.})

In [None]:
plot_progress = PlotProgress(entity='loss')

try:
    model.fit({'main_input': X_train},
              {'age_output': y_train_age, 'gender_output': y_train_gender},
              epochs=10, batch_size=32,
              verbose=1,
              callbacks=[plot_progress],
              validation_split=0.2,
             )
except KeyboardInterrupt:
    pass

In [None]:
np.mean(y_train_gender == np.argmax(model.predict(X_train)[1], axis=1))

In [None]:
np.mean(y_test_gender == np.argmax(model.predict(X_test)[1], axis=1))