In [67]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from keras import layers as L
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns


In [68]:
df = pd.read_csv("../data/people_data.csv")
df = df.drop("img_name",axis=1)
df["pixels"] = df["pixels"].apply(lambda x: np.array(x.split(),dtype="float32"))
df["pixels"] = df["pixels"] / 255
df.tail()

Unnamed: 0,age,ethnicity,gender,pixels
23700,99,0,1,"[0.49803922, 0.39215687, 0.36862746, 0.3176470..."
23701,99,1,1,"[0.09019608, 0.10980392, 0.1254902, 0.13725491..."
23702,99,2,1,"[0.23137255, 0.19607843, 0.14509805, 0.1568627..."
23703,99,2,1,"[0.1764706, 0.42352942, 0.47058824, 0.6117647,..."
23704,99,0,1,"[0.6117647, 0.6313726, 0.627451, 0.64705884, 0..."


In [69]:
x = np.zeros(shape=(23705, 2304))
x = x.reshape(-1, 48, 48, 1)
print(x.shape)  #Input: (batch_size, height, width, channels) e.g., (batch_size, 224, 224, 3)

(23705, 48, 48, 1)


In [70]:
y_gender = df['gender'].values
y_ethnicity = df['ethnicity'].values
y_age = df['age'].values

print(y_gender, y_ethnicity, y_age)

[0 0 0 ... 1 1 1] [2 2 2 ... 2 2 0] [ 1  1  1 ... 99 99 99]


In [71]:
# x_train, x_temp, y_gender_train, y_gender_temp, y_ethnicity_train, y_ethnicity_temp, y_age_train, y_age_temp = train_test_split(
#     x, y_gender, y_ethnicity, y_age, 
#     test_size=0.2, 
#     random_state=42,
#     shuffle=True,
#     stratify=y_ethnicity
# )

# x_val, x_test, y_gender_val, y_gender_test, y_ethnicity_val, y_ethnicity_test, y_age_val, y_age_test = train_test_split(
#     x_temp, y_gender_temp, y_ethnicity_temp, y_age_temp, 
#     test_size=0.5, 
#     random_state=42,
#     shuffle=True,
#     stratify=y_ethnicity_temp
# )

from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
from sklearn.preprocessing import KBinsDiscretizer


# Bin the Age for Stratification
# Age is continuous, so we discretize it into categories
n_bins = 10  # Number of age bins; adjust as needed
est = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
y_age_binned = est.fit_transform(y_age.reshape(-1, 1)).astype(int).flatten()

# Combine Labels for Multi-Label Stratification
# Combine gender, ethnicity, and binned age into a multi-label format
y_combined = np.vstack((y_gender, y_ethnicity, y_age_binned)).T  # Shape: (num_samples, 3)

# Initialize the Splitter
msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

# Perform the Initial Split (Train vs Temp)
for train_idx, temp_idx in msss.split(x, y_combined):
    x_train, x_temp = x[train_idx], x[temp_idx]
    y_gender_train, y_gender_temp = y_gender[train_idx], y_gender[temp_idx]
    y_ethnicity_train, y_ethnicity_temp = y_ethnicity[train_idx], y_ethnicity[temp_idx]
    y_age_train, y_age_temp = y_age[train_idx], y_age[temp_idx]
    y_age_binned_train, y_age_binned_temp = y_age_binned[train_idx], y_age_binned[temp_idx]

# Further Split Temp into Validation and Test
msss_val_test = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)

for val_idx, test_idx in msss_val_test.split(x_temp, y_combined[temp_idx]):
    x_val, x_test = x_temp[val_idx], x_temp[test_idx]
    y_gender_val, y_gender_test = y_gender_temp[val_idx], y_gender_temp[test_idx]
    y_ethnicity_val, y_ethnicity_test = y_ethnicity_temp[val_idx], y_ethnicity_temp[test_idx]
    y_age_val, y_age_test = y_age_temp[val_idx], y_age_temp[test_idx]

In [72]:
x_train = x_train.astype('float32') / 255.0
x_val = x_val.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Proceed with data augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

train_datagen.fit(x_train)


from sklearn.preprocessing import MinMaxScaler

# Initialize the scaler
age_scaler = MinMaxScaler(feature_range=(0, 1))

# Fit and transform the training age labels
y_age_train_scaled = age_scaler.fit_transform(y_age_train.reshape(-1, 1))
y_age_val_scaled = age_scaler.transform(y_age_val.reshape(-1, 1))
y_age_test_scaled = age_scaler.transform(y_age_test.reshape(-1, 1))


# Training Set
y_train = {
    'gender': y_gender_train,
    'ethnicity': y_ethnicity_train,
    'age': y_age_train_scaled
}

# Validation Set
y_val = {
    'gender': y_gender_val,
    'ethnicity': y_ethnicity_val,
    'age': y_age_val_scaled
}

# Test Set
y_test = {
    'gender': y_gender_test,
    'ethnicity': y_ethnicity_test,
    'age': y_age_test_scaled
}

In [73]:
train_dataset = tf.data.Dataset.from_tensor_slices((
    x_train,
    {
        'gender': y_train['gender'],
        'ethnicity': y_train['ethnicity'],
        'age': y_train['age']
    }
))

val_dataset = tf.data.Dataset.from_tensor_slices((
    x_val,
    {
        'gender': y_val['gender'],
        'ethnicity': y_val['ethnicity'],
        'age': y_val['age']
    }
))

test_dataset = tf.data.Dataset.from_tensor_slices((
    x_test,
    {
        'gender': y_test['gender'],
        'ethnicity': y_test['ethnicity'],
        'age': y_test['age']
    }
))

# Batch and prefetch
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(buffer_size=1024, seed=42).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D

def build_transfer_learning_model(input_shape=(48, 48, 1)):
    inputs = Input(shape=input_shape)
    
    # Convert grayscale to RGB by duplicating the channel
    x = Conv2D(3, (3,3), padding='same')(inputs)
    
    # Resize images to match the input size of MobileNetV2
    x = tf.image.resize(x, [96, 96])  # MobileNetV2 can handle 96x96
    
    # Load MobileNetV2 with pre-trained ImageNet weights
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_tensor=x)
    base_model.trainable = False  # Freeze the base model initially
    
    # Add custom layers on top
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    # Output Heads
    gender_output = Dense(1, activation='sigmoid', name='gender')(x)
    ethnicity_output = Dense(5, activation='softmax', name='ethnicity')(x)
    age_output = Dense(1, activation='linear', name='age')(x)
    
    # Define the Model
    model = Model(inputs=inputs, outputs=[gender_output, ethnicity_output, age_output])
    
    return model

# Instantiate the model
model = build_transfer_learning_model()
model.summary()

In [74]:
def build_model(input_shape=(48, 48, 1)):
    inputs = Input(shape=input_shape)
    x = inputs

    # Block 1
    x = Conv2D(32, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(32, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2,2))(x)
    x = Dropout(0.25)(x)

    x = Conv2D(64, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2,2))(x)
    x = Dropout(0.25)(x)

    x = Conv2D(128, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(128, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2,2))(x)
    x = Dropout(0.4)(x)

    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Output Heads
    gender_output = Dense(1, activation='sigmoid', name='gender')(x)
    ethnicity_output = Dense(5, activation='softmax', name='ethnicity')(x)
    age_output = Dense(1, activation='linear', name='age')(x)

    model = Model(inputs=inputs, outputs=[gender_output, ethnicity_output, age_output])
    return model

# Instantiate the model
model = build_model()
model.summary()

In [75]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss={
        'gender': 'binary_crossentropy',
        'ethnicity': 'sparse_categorical_crossentropy',  # categorical_crossentropy
        'age': 'mean_absolute_error'
    },
    metrics={
        'gender': ['accuracy', tf.keras.metrics.AUC(name='auc')],
        'ethnicity': ['accuracy'],
        'age': ['mae', 'mse']
    },
    loss_weights={
        'gender': 1.0,
        'ethnicity': 1.0,
        'age': 1.0
    }
)

In [76]:
# Early Stopping
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

# Reduce Learning Rate on Plateau
lr_reduce = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=7,
    verbose=2,
    min_lr=1e-6,
    mode="auto",
    min_delta=0.0001,
    cooldown=0
)

In [77]:
# Define the number of epochs
EPOCHS = 5

# Train the model
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=val_dataset,
    callbacks=[early_stop, lr_reduce]
)

Epoch 1/5
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 270ms/step - age_loss: 8.5316 - age_mae: 20.7829 - age_mse: 652.1686 - ethnicity_accuracy: 0.3596 - ethnicity_loss: 1.5904 - gender_accuracy: 0.5313 - gender_auc: 0.4793 - gender_loss: 20.2649 - loss: 30.3864 - val_age_loss: 7.9066 - val_age_mae: 33.3540 - val_age_mse: 1510.8823 - val_ethnicity_accuracy: 0.4253 - val_ethnicity_loss: 1.5093 - val_gender_accuracy: 0.4772 - val_gender_auc: 0.5000 - val_gender_loss: 34.4159 - val_loss: 41.9818 - learning_rate: 0.0010
Epoch 2/5
[1m214/297[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m23s[0m 278ms/step - age_loss: 8.7992 - age_mae: 17.3861 - age_mse: 467.4575 - ethnicity_accuracy: 0.3534 - ethnicity_loss: 1.5506 - gender_accuracy: 0.5459 - gender_auc: 0.5083 - gender_loss: 16.7999 - loss: 27.1498

KeyboardInterrupt: 

In [38]:
model.save('../models/combined_model.h5')

