#### Data Preparation

In [2]:
# Load data

import numpy as np
import pandas as pd

df = pd.read_csv("../data/people_data.csv")

In [12]:
# Import libraries

import setuptools.dist
import plotly.graph_objects as go
import plotly.express as px
from keras import layers as L
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras import regularizers


In [4]:
# Convert pixels into array (numpy)

df['pixels'] = df['pixels'].apply(lambda x: np.array(x.split(), dtype="float32") if isinstance(x, str) else x)
x = np.array(df['pixels'].tolist())
x = x.reshape(x.shape[0], 48, 48, 1)

#### Gender Prediction Model

In [5]:
# Split gender data frame data into train and test data

df['gender'] = df['gender'].replace({'Male': 0, 'Female': 1}).infer_objects(copy=False)
df['gender'] = df['gender'].astype('int8')

y_gender = df['gender']
x_train_gender, x_test_gender, y_train_gender, y_test_gender = train_test_split(x, y_gender, test_size=0.2, random_state=20)

In [6]:
# Defining model

model_gender = tf.keras.Sequential([
    L.Input(shape=(48, 48, 1)),
    L.Conv2D(32, (3, 3), activation='relu'), 
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(64, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    L.Flatten(),
    L.Dense(64, activation='relu'),
    L.Dropout(rate=0.5),
    L.Dense(1, activation='sigmoid')
])

model_gender.compile(
    optimizer='sgd', 
    loss=tf.keras.losses.BinaryCrossentropy(), 
    metrics=['accuracy']
    )

class myCallback(tf.keras.callbacks.Callback):
    def __init__(self, threshold):
        super(myCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs={}):
        if logs is None:
            logs = {}
        current_val_loss = logs.get('val_loss')
        if current_val_loss is not None and current_val_loss < self.threshold:
            print(f"\nReached val_loss of {current_val_loss} which is below {self.threshold}.")
            print("Consider reviewing the model or callback settings.")
        
callback = myCallback(threshold=0.2500)

model_gender.summary()

In [7]:
# Train model

callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=4,
    min_delta=0.0005,
    restore_best_weights=True
)

history_gender = model_gender.fit(
    x_train_gender, y_train_gender, 
    epochs=20, 
    validation_split=0.12, 
    batch_size=64, 
    callbacks=[callback]
)

Epoch 1/20
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 46ms/step - accuracy: 0.6839 - loss: 0.6168 - val_accuracy: 0.8190 - val_loss: 0.4099
Epoch 2/20
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 46ms/step - accuracy: 0.8089 - loss: 0.4173 - val_accuracy: 0.8502 - val_loss: 0.3447
Epoch 3/20
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 45ms/step - accuracy: 0.8334 - loss: 0.3619 - val_accuracy: 0.8519 - val_loss: 0.3294
Epoch 4/20
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 39ms/step - accuracy: 0.8543 - loss: 0.3339 - val_accuracy: 0.8629 - val_loss: 0.3109
Epoch 5/20
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 40ms/step - accuracy: 0.8607 - loss: 0.3125 - val_accuracy: 0.8493 - val_loss: 0.3131
Epoch 6/20
[1m261/261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 39ms/step - accuracy: 0.8759 - loss: 0.2890 - val_accuracy: 0.8550 - val_loss: 0.3144
Epoch 7/20
[1m2

In [8]:
# Loss chart

figure_gender = px.line(
    history_gender.history, y=['loss', 'val_loss'],
    labels={'index': 'epoch', 'value': 'loss'}, 
    title='Training History')

figure_gender.show()

In [11]:
if isinstance(x_test_gender, tuple):
    x_test_gender, y_test_gender = x_test_gender

x_test_gender = np.array(x_test_gender)
y_test_gender = np.array(y_test_gender)

if len(x_test_gender.shape) == 3:
    x_test_gender = x_test_gender.reshape((-1, 48, 48, 1))

y_test_gender = y_test_gender.flatten()

x_test_gender = x_test_gender.astype('float32')
y_test_gender = y_test_gender.astype('float32')

loss, acc = model_gender.evaluate(x_test_gender, y_test_gender, verbose=0)
print('Test loss: {:.4f}'.format(loss))
print('Test Accuracy: {:.2f}%'.format(acc * 100))

Test loss: 0.2531
Test Accuracy: 89.45%


#### Age Prediction Model

In [13]:
# Split data

y_age = df['age']
x_train_age, x_test_age, y_train_age, y_test_age  = train_test_split(x, y_age, test_size=0.2, random_state=25)


In [14]:
# Defining model

model_age = tf.keras.Sequential([
    L.InputLayer(shape=(48, 48, 1)),
    L.Conv2D(32, (3, 3), activation='relu'),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    
    L.Conv2D(64, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    
    L.Conv2D(128, (3, 3), activation='relu'),
    L.MaxPooling2D((2, 2)),
    
    L.Flatten(),
    L.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    L.Dropout(rate=0.3),
    
    L.Dense(1, activation='linear')
])

sgd = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.9)
adam = tf.keras.optimizers.Adam(learning_rate=0.0001)

model_age.compile(
    optimizer=adam,
    loss='mae',
    metrics=['mae']
)

class myCallback(tf.keras.callbacks.Callback):
    def __init__(self, threshold):
        super(myCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs={}):
        if logs is None:
            logs = {}
        current_val_loss = logs.get('val_loss')
        if current_val_loss is not None and current_val_loss < self.threshold:
            print(f"\nReached val_loss of {current_val_loss} which is below {self.threshold}.")
            print("Consider reviewing the model or callback settings.")
        
callback = myCallback(threshold=110)

model_age.summary()

In [15]:
# Train model

callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=7,
    min_delta=0.0005,
    restore_best_weights=True
)

x_train_age = np.array(x_train_age)
y_train_age = np.array(y_train_age)

history_age = model_age.fit(
    x_train_age, y_train_age, 
    epochs=50, 
    validation_split=0.13, 
    batch_size=64, 
    callbacks=[callback]
)

Epoch 1/50
[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 46ms/step - loss: 20.5942 - mae: 20.4734 - val_loss: 13.2756 - val_mae: 13.1612
Epoch 2/50
[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 45ms/step - loss: 12.4679 - mae: 12.3547 - val_loss: 10.3456 - val_mae: 10.2352
Epoch 3/50
[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 49ms/step - loss: 11.0110 - mae: 10.9013 - val_loss: 10.1104 - val_mae: 10.0026
Epoch 4/50
[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 57ms/step - loss: 10.2952 - mae: 10.1879 - val_loss: 9.2546 - val_mae: 9.1489
Epoch 5/50
[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - loss: 9.7353 - mae: 9.6300 - val_loss: 10.5136 - val_mae: 10.4096
Epoch 6/50
[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - loss: 9.3140 - mae: 9.2103 - val_loss: 8.4383 - val_mae: 8.3356
Epoch 7/50
[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [16]:
# Chart

figure_age = px.line(
    history_age.history, y=['loss', 'val_loss'],
    labels={'index': 'epoch', 'value': 'loss'}, 
    title='Training History')

figure_age.show()

In [17]:
if isinstance(x_test_age, tuple):
    x_test_age, y_test_age = x_test_age

x_test_age = np.array(x_test_age)
y_test_age = np.array(y_test_age)

mse, mae = model_age.evaluate(x_test_age, y_test_age, verbose=0)

print('Test Mean squared error: {:.2f}'.format(mse))
print('Test Mean absolute error: {:.2f}'.format(mae))

Test Mean squared error: 6.79
Test Mean absolute error: 6.70


#### Ethnicity Prediction Model

In [18]:
y_ethnicity = df['ethnicity']
x_train_ethnicity, x_test_ethnicity, y_train_ethnicity, y_test_ethnicity = train_test_split(x, y_ethnicity, test_size=0.2, random_state=55)

In [19]:
model_ethnicity = tf.keras.Sequential([
    L.Input(shape=(48, 48, 1)),
    L.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    L.BatchNormalization(),
    L.MaxPooling2D((2, 2)),
    L.Flatten(),
    L.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    L.Dropout(rate=0.3),
    L.Dense(5, activation='softmax')
])

opt = tf.keras.optimizers.Adam(learning_rate=0.00005)
model_ethnicity.compile(
    optimizer=opt,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

class myCallback(tf.keras.callbacks.Callback):
    def __init__(self, threshold):
        super(myCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs={}):
        if logs is None:
            logs = {}
        current_val_loss = logs.get('val_loss')
        if current_val_loss is not None and current_val_loss < self.threshold:
            print(f"\nReached val_loss of {current_val_loss} which is below {self.threshold}.")
            print("Consider reviewing the model or callback settings.")

callback = myCallback(threshold=0.2500)

model_ethnicity.summary()

In [20]:
from tensorflow import keras
from keras._tf_keras.keras.preprocessing.image import ImageDataGenerator

x_train_ethnicity = np.array(x_train_ethnicity)
y_train_ethnicity = np.array(y_train_ethnicity)

callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=15,
    min_delta=0.001,
    restore_best_weights=True
)

datagen_ethnicity = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

datagen_ethnicity.fit(x_train_ethnicity)

history_ethnicity = model_ethnicity.fit(
    x_train_ethnicity, y_train_ethnicity, 
    epochs=55, 
    validation_split=0.15,
    batch_size=64, 
    callbacks=[callback]
)

Epoch 1/55
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 69ms/step - accuracy: 0.3683 - loss: 2.3559 - val_accuracy: 0.5782 - val_loss: 1.5424
Epoch 2/55
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 53ms/step - accuracy: 0.5616 - loss: 1.5379 - val_accuracy: 0.6221 - val_loss: 1.3842
Epoch 3/55
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 49ms/step - accuracy: 0.6168 - loss: 1.3944 - val_accuracy: 0.6605 - val_loss: 1.2896
Epoch 4/55
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 50ms/step - accuracy: 0.6687 - loss: 1.2762 - val_accuracy: 0.6840 - val_loss: 1.2304
Epoch 5/55
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 51ms/step - accuracy: 0.6916 - loss: 1.2178 - val_accuracy: 0.7058 - val_loss: 1.1794
Epoch 6/55
[1m252/252[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 60ms/step - accuracy: 0.7229 - loss: 1.1334 - val_accuracy: 0.7149 - val_loss: 1.1515
Epoch 7/55
[1m2

In [21]:
figure_ethnicity = px.line(
    history_ethnicity.history, y=['loss', 'val_loss'],
    labels={'index': 'epoch', 'value': 'loss'}, 
    title='Training History')

figure_ethnicity.show()

In [22]:
if isinstance(x_test_ethnicity, tuple):
    x_test_ethnicity, y_test_ethnicity = x_test_ethnicity

x_test_ethnicity = np.array(x_test_ethnicity)
y_test_ethnicity = np.array(y_test_ethnicity)

loss, acc = model_ethnicity.evaluate(x_test_ethnicity, y_test_ethnicity, verbose=0)
print('Test loss: {:.4f}'.format(loss))
print('Test Accuracy: {:.2f}%'.format(acc * 100))

Test loss: 1.0539
Test Accuracy: 75.05%
