In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import cv2 
from PIL import Image
import tensorflow as tf 
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Dropout, LayerNormalization
from keras_preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split

In [3]:
from google.colab import drive 
drive.mount("/content/gdrive")

Mounted at /content/gdrive


Read Fold Data from dataset. Files how image labels

In [None]:
fold0 = pd.read_csv("/content/gdrive/MyDrive/projectDataAdience/fold_0_data.txt", sep = "\t")
fold1 = pd.read_csv("/content/gdrive/MyDrive/projectDataAdience/fold_1_data.txt", sep = "\t")
fold2 = pd.read_csv("/content/gdrive/MyDrive/projectDataAdience/fold_2_data.txt", sep = "\t")
fold3 = pd.read_csv("/content/gdrive/MyDrive/projectDataAdience/fold_3_data.txt", sep = "\t")
fold4 = pd.read_csv("/content/gdrive/MyDrive/projectDataAdience/fold_4_data.txt", sep = "\t")

#combine all fold data
fold_data = pd.concat([fold0, fold1, fold2, fold3, fold4], ignore_index=True)

Preprocess data to only accept data that is necessary and clean for our use 

In [None]:
#save all images from dataset so they are accessible 
data = fold_data[['age', 'gender', 'x', 'y', 'dx', 'dy']].copy()
img_path = []
for row in fold_data.iterrows():
    path = "/content/gdrive/MyDrive/projectDataAdience/faces/"+row[1].user_id+"/coarse_tilt_aligned_face."+str(row[1].face_id)+"."+row[1].original_image
    img_path.append(path)

data['img_path'] = img_path


Map age data to an age group. This makes it easier to predict age ranges as external factors can effect how old someone looks

In [None]:
age_range = [('(0, 2)', '0-2'), ('2', '0-2'), ('3', '0-2'), ('(4, 6)', '4-6'), ('(8, 12)', '8-13'), ('13', '8-13'), ('22', '15-20'), ('(8, 23)','15-20'), ('23', '25-32'), ('(15, 20)', '15-20'), ('(25, 32)', '25-32'), ('(27, 32)', '25-32'), ('32', '25-32'), ('34', '25-32'), ('29', '25-32'), ('(38, 42)', '38-43'), ('35', '38-43'), ('36', '38-43'), ('42', '48-53'), ('45', '38-43'), ('(38, 43)', '38-43'), ('(38, 42)', '38-43'), ('(38, 48)', '48-53'), ('46', '48-53'), ('(48, 53)', '48-53'), ('55', '48-53'), ('56', '48-53'), ('(60, 100)', '60+'), ('57', '60+'), ('58', '60+')]
age_dict = {each[0]: each[1] for each in age_range}
drop_labels = []
for index, each in enumerate(data.age):
    if each == 'None':
        drop_labels.append(index)
    else:
        data.age.loc[index] = age_dict[each]

data = data.drop(labels=drop_labels, axis=0)
data.age.value_counts(dropna=False)



Drop undefined gender elements

In [None]:
data = data.dropna()
data_clean = data[data.gender != 'u'].copy()
data_clean.info()

Map cleaned data gender labels to 0 and 1.
Map cleaned data age label from 0-7 across all age ranges.

In [None]:
genders = {
    'f': 0,
    'm': 1
}
data_clean['gender'] = data_clean['gender'].apply(lambda g: genders[g])
data_clean.head()

In [None]:
ages = {
    '0-2': 0,
    '4-6': 1,
    '8-13': 2,
    '15-20': 3,
    '25-32': 4,
    '38-43': 5,
    '48-53': 6,
    '60+': 7
}
data_clean['age'] = data_clean['age'].apply(lambda age: ages[age])
data_clean.head()

Make training and test datasets and resize images in dataset 
Make split data for gender and age 


In [None]:
X = data_clean[['img_path']]
Y = data_clean[['gender']]
training_imgs = []
testing_imgs = []

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

for row in X_train.iterrows():
    image = Image.open(row[1].img_path)
    image = image.resize((227, 227))
    trainData = np.asarray(image)
    training_imgs.append(trainData)

for row in X_test.iterrows():
    image = Image.open(row[1].img_path)
    image = image.resize((227, 227))
    testData = np.asarray(image)
    testing_imgs.append(testData)

#convert to np array to pass through model
training_imgs = np.asarray(training_imgs)
testing_imgs = np.asarray(testing_imgs)

In [None]:
#same process as gender split, do with age data
X_age = data_clean[['img_path']]
Y_age = data_clean[['age']]
training_imgs_age = []
testing_imgs_age = []

X_train_age, X_test_age, Y_train_age, Y_test_age = train_test_split(X_age, Y_age, test_size=0.3, random_state=42)

for row in X_train_age.iterrows():
    image = Image.open(row[1].img_path)
    image = image.resize((227, 227))
    tData = np.asarray(image)
    training_imgs_age.append(tData)

for row in X_test_age.iterrows():
    image = Image.open(row[1].img_path)
    image = image.resize((277, 277))
    teData = np.asarray(image)
    testing_imgs_age.append(teData)

training_imgs_age = np.asarray(training_imgs_age)
testing_imgs_age = np.asarray(testing_imgs_age)


Gender Model definded with squential model

In [None]:
#make model with layers defined in Levi and Hassner paper
model = Sequential()
model.add(Conv2D(input_shape=(227, 227, 3), filters=96, kernel_size=(7, 7), strides=4, padding='valid', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
model.add(LayerNormalization())
model.add(Conv2D(filters=256, kernel_size=(5, 5), strides=1, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(LayerNormalization())
model.add(Conv2D(filters=256, kernel_size=(3,3), strides=1, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(LayerNormalization())
model.add(Flatten())
model.add(Dense(units=512, activation='relu'))
model.add(Dropout(rate=0.25))
model.add(Dense(units=512, activation='relu'))
model.add(Dropout(rate=0.25))
model.add(Dense(units=2, activation='relu'))
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
model.compile(loss=["binary_crossentropy", "mae"], optimizer="Adam", metrics=["accuracy"])
model.summary()

Train gender model 

In [None]:
History = model.fit(training_imgs, Y_train, batch_size=32, epochs=15, validation_data=(testing_imgs, Y_test), callbacks=[callback])

model.save('gender_model_25e.h5')

Test the model


In [None]:
test_loss, test_acc = model.evaluate(testing_imgs, Y_test, verbose=2)
print(test_acc)

Create and train age model

In [None]:
age_model = Sequential()
age_model.add(Conv2D(input_shape=(227, 227, 3), filters=96, kernel_size=(7, 7), strides=4, padding='valid', activation='relu'))
age_model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
age_model.add(LayerNormalization())
age_model.add(Conv2D(filters=256, kernel_size=(5, 5), strides=1, padding='same', activation='relu'))
age_model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
age_model.add(LayerNormalization())
age_model.add(Conv2D(filters=256, kernel_size=(3,3), strides=1, padding='same', activation='relu'))
age_model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
age_model.add(LayerNormalization())
age_model.add(Flatten())
age_model.add(Dense(units=512, activation='relu'))
age_model.add(Dropout(rate=0.25))
age_model.add(Dense(units=512, activation='relu'))
age_model.add(Dropout(rate=0.25))
age_model.add(Dense(units=2, activation='relu'))
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
age_model.compile(loss=["binary_crossentropy", "mae"], optimizer="Adam", metrics=["accuracy"])
age_model.summary()

In [None]:
history_age = age_model.fit(training_imgs_age, Y_train_age, batch_size=32, epochs=25, validation_data=(testing_imgs_age, Y_test_age), callbacks=[callback])
age_model.save("age_model.h5")

test_loss, test_acc = age_model.evaluate(testing_imgs_age, Y_test_age, verbose=2)
print(test_acc)