In [137]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPooling2D, Flatten, Dense, Dropout, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from keras import layers as L
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns


In [110]:
df = pd.read_csv("../data/people_data.csv")
df = df.drop("img_name",axis=1)
df["pixels"] = df["pixels"].apply(lambda x: np.array(x.split(),dtype="float32"))
df["pixels"] = df["pixels"] / 255
df.tail()

Unnamed: 0,age,ethnicity,gender,pixels
23700,99,0,1,"[0.49803922, 0.39215687, 0.36862746, 0.3176470..."
23701,99,1,1,"[0.09019608, 0.10980392, 0.1254902, 0.13725491..."
23702,99,2,1,"[0.23137255, 0.19607843, 0.14509805, 0.1568627..."
23703,99,2,1,"[0.1764706, 0.42352942, 0.47058824, 0.6117647,..."
23704,99,0,1,"[0.6117647, 0.6313726, 0.627451, 0.64705884, 0..."


In [111]:
x = np.zeros(shape=(23705, 2304))
x = x.reshape(-1, 48, 48, 1)
print(x.shape)  #Input: (batch_size, height, width, channels) e.g., (batch_size, 224, 224, 3)

(23705, 48, 48, 1)


In [112]:
y_gender = df['gender'].values
y_ethnicity = df['ethnicity'].values
y_age = df['age'].values

print(y_gender, y_ethnicity, y_age)

[0 0 0 ... 1 1 1] [2 2 2 ... 2 2 0] [ 1  1  1 ... 99 99 99]


In [129]:
x_train, x_temp, y_gender_train, y_gender_temp, y_ethnicity_train, y_ethnicity_temp, y_age_train, y_age_temp = train_test_split(
    x, y_gender, y_ethnicity, y_age, 
    test_size=0.2, 
    random_state=20,
    shuffle=True,
    #stratify=y_ethnicity
)

x_val, x_test, y_gender_val, y_gender_test, y_ethnicity_val, y_ethnicity_test, y_age_val, y_age_test = train_test_split(
    x_temp, y_gender_temp, y_ethnicity_temp, y_age_temp, 
    test_size=0.25, 
    random_state=42,
    shuffle=True,
    #stratify=y_ethnicity_temp
)




# from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
# from sklearn.preprocessing import KBinsDiscretizer


# n_bins = 10
# est = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
# y_age_binned = est.fit_transform(y_age.reshape(-1, 1)).astype(int).flatten()

# y_combined = np.vstack((y_gender, y_ethnicity, y_age_binned)).T

# msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

# for train_idx, temp_idx in msss.split(x, y_combined):
#     x_train, x_temp = x[train_idx], x[temp_idx]
#     y_gender_train, y_gender_temp = y_gender[train_idx], y_gender[temp_idx]
#     y_ethnicity_train, y_ethnicity_temp = y_ethnicity[train_idx], y_ethnicity[temp_idx]
#     y_age_train, y_age_temp = y_age[train_idx], y_age[temp_idx]
#     y_age_binned_train, y_age_binned_temp = y_age_binned[train_idx], y_age_binned[temp_idx]

# msss_val_test = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)

# for val_idx, test_idx in msss_val_test.split(x_temp, y_combined[temp_idx]):
#     x_val, x_test = x_temp[val_idx], x_temp[test_idx]
#     y_gender_val, y_gender_test = y_gender_temp[val_idx], y_gender_temp[test_idx]
#     y_ethnicity_val, y_ethnicity_test = y_ethnicity_temp[val_idx], y_ethnicity_temp[test_idx]
#     y_age_val, y_age_test = y_age_temp[val_idx], y_age_temp[test_idx]

In [124]:
x_train = x_train.astype('float32') / 255.0
x_val = x_val.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

train_datagen = ImageDataGenerator(
    rotation_range=[1, 40],
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
)

train_datagen.fit(x_train)


from sklearn.preprocessing import MinMaxScaler

age_scaler = MinMaxScaler(feature_range=(0, 1))

y_age_train_scaled = age_scaler.fit_transform(y_age_train.reshape(-1, 1))
y_age_val_scaled = age_scaler.transform(y_age_val.reshape(-1, 1))
y_age_test_scaled = age_scaler.transform(y_age_test.reshape(-1, 1))


y_train = {
    'gender': y_gender_train,
    'ethnicity': y_ethnicity_train,
    'age': y_age_train_scaled
}

y_val = {
    'gender': y_gender_val,
    'ethnicity': y_ethnicity_val,
    'age': y_age_val_scaled
}

y_test = {
    'gender': y_gender_test,
    'ethnicity': y_ethnicity_test,
    'age': y_age_test_scaled
}

In [125]:
train_dataset = tf.data.Dataset.from_tensor_slices((
    x_train,
    {
        'gender': y_train['gender'],
        'ethnicity': y_train['ethnicity'],
        'age': y_train['age']
    }
))

val_dataset = tf.data.Dataset.from_tensor_slices((
    x_val,
    {
        'gender': y_val['gender'],
        'ethnicity': y_val['ethnicity'],
        'age': y_val['age']
    }
))

test_dataset = tf.data.Dataset.from_tensor_slices((
    x_test,
    {
        'gender': y_test['gender'],
        'ethnicity': y_test['ethnicity'],
        'age': y_test['age']
    }
))

# Batch and prefetch
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(buffer_size=1024, seed=42).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [148]:
def build_layers(input):
    inputs = Input(shape=(48, 48, 1))
    x = inputs
    x = Conv2D(16, (3, 3), padding="same")(input)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(3, 3))(x)
    x = Dropout(0.25)(x)

    x = Conv2D(32, (3, 3), padding="same")(x)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)

    x = Conv2D(32, (3, 3), padding="same")(x)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)

    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    x = Flatten()(x)
    x = Dense(128)(x)
    x = Activation("relu")(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(5)(x)
    x = Activation("softmax", name="race_output")(x)

    return x

In [149]:
def build_ethnicity_branch(input):
        x = build_layers(input)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(5)(x)
        x = Activation("softmax", name="race_output")(x)

        return x


def build_age_branch(input):
        x = build_layers(input)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1)(x)
        x = Activation("linear", name="age_output")(x)

        return x


def build_gender_branch(input):
        x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(input)
        x = build_layers(input)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(2)(x)
        x = Activation("sigmoid", name="gender_output")(x)

        return x

In [153]:
def assemble_model(height, width):
    input_shape = (height, width, 3)
    inputs = Input(shape=input_shape)
    age_branch = build_age_branch(inputs)
    race_branch = build_ethnicity_branch(inputs)
    gender_branch = build_gender_branch(inputs)
    model = Model(
        inputs=inputs,
        outputs = [age_branch, race_branch, gender_branch],
        name="face_net"
        )
    return model

    
model = assemble_model(48, 48)

TypeError: assemble_model() takes 2 positional arguments but 3 were given

In [132]:
# Early Stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=3,
    # restore_best_weights=True,
    min_delta=0.0005,
    verbose=2, 
    mode='auto'
)

# Reduce Learning Rate on Plateau
# lr_reduce = ReduceLROnPlateau(
#     monitor='val_loss',
#     factor=0.1,
#     patience=3,
#     verbose=2,
#     min_lr=1e-6,
#     mode="auto",
#     min_delta=0.0001,
#     cooldown=0
# )

In [133]:

# Recompile the model with a lower learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss={
        'gender': 'binary_crossentropy',
        'ethnicity': 'sparse_categorical_crossentropy',
        'age': 'mean_absolute_error'
    },
    metrics={
        'gender': ['accuracy', tf.keras.metrics.AUC(name='auc')],
        'ethnicity': ['accuracy'],
        'age': ['mae', 'mse']
    },
    loss_weights={
        'gender': 1.0,
        'ethnicity': 1.0,
        'age': 1.0
    }
)

# Define the number of epochs
EPOCHS = 30

# Train the model
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=val_dataset,
    callbacks=[early_stop] #lr_reduce
)

Epoch 1/30
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 29ms/step - age_loss: 3.7915 - age_mae: 0.2484 - age_mse: 0.0964 - ethnicity_accuracy: 0.2871 - ethnicity_loss: 1.5354 - gender_accuracy: 0.5104 - gender_auc: 0.4943 - gender_loss: 0.2055 - loss: 5.5325 - val_age_loss: 0.7303 - val_age_mae: 0.1577 - val_age_mse: 0.0352 - val_ethnicity_accuracy: 0.4253 - val_ethnicity_loss: 1.4623 - val_gender_accuracy: 0.5072 - val_gender_auc: 0.5000 - val_gender_loss: 0.1404 - val_loss: 2.3218
Epoch 2/30
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - age_loss: 0.7082 - age_mae: 0.1740 - age_mse: 0.0419 - ethnicity_accuracy: 0.4257 - ethnicity_loss: 1.4494 - gender_accuracy: 0.5271 - gender_auc: 0.4935 - gender_loss: 0.1385 - loss: 2.2962 - val_age_loss: 0.6971 - val_age_mae: 0.2084 - val_age_mse: 0.0577 - val_ethnicity_accuracy: 0.4253 - val_ethnicity_loss: 1.4582 - val_gender_accuracy: 0.5072 - val_gender_auc: 0.5000 - val_gender_loss: 0.1298 

In [134]:
model.save('../models/combined_model.h5')

