In [18]:
import numpy as np
import os
from PIL import Image
import tensorflow as tf
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the path to the dataset
dataset_path = 'UTKface'

# Initialize lists to hold data
file_names = []
ages = []
genders = []
ethnicities = []

# Define the target image size
IMG_SIZE = (224, 224)

# Iterate through the dataset and process each image
for filename in os.listdir(dataset_path):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        # Extract age, gender, and ethnicity from the filename
        try:
            age, gender, ethnicity, _ = filename.split('_')
            age = int(age)
            gender = int(gender)
            ethnicity = int(ethnicity)

            # Append data to lists
            file_names.append(filename)
            ages.append(age)
            genders.append(gender)
            ethnicities.append(ethnicity)

        except ValueError:
            # Skip files that don't follow the expected naming convention
            continue

# Convert lists to numpy arrays
ages = np.array(ages, dtype=np.float32).reshape(-1, 1) / 116.0  # Normalize age values (max age is 116)
genders = np.array(genders, dtype=np.float32).reshape(-1, 1)  # Reshape to (n_samples, 1)
ethnicities = to_categorical(ethnicities, num_classes=5)  # One-hot encode ethnicities
print("Sample of one-hot encoded ethnicities:", ethnicities[:5])


# Train and validation split
split_idx = int(0.8 * len(file_names))
train_file_names, val_file_names = file_names[:split_idx], file_names[split_idx:]
train_ages, val_ages = ages[:split_idx], ages[split_idx:]
train_genders, val_genders = genders[:split_idx], genders[split_idx:]
train_ethnicities, val_ethnicities = ethnicities[:split_idx], ethnicities[split_idx:]

# Custom Data Generator for multi-output (age, gender, ethnicity)
class MultiOutputDataGenerator(Sequence):
    def __init__(self, file_names, ages, genders, ethnicities, batch_size, img_size, dataset_path):
        self.file_names = file_names
        self.ages = ages
        self.genders = genders
        self.ethnicities = ethnicities
        self.batch_size = batch_size
        self.img_size = img_size
        self.dataset_path = dataset_path

    def __len__(self):
        return int(np.ceil(len(self.file_names) / self.batch_size))

    def __getitem__(self, idx):
        batch_files = self.file_names[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_ages = self.ages[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_genders = self.genders[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_ethnicities = self.ethnicities[idx * self.batch_size:(idx + 1) * self.batch_size]
    
        # Initialize batch arrays
        batch_images = np.zeros((len(batch_files), *self.img_size, 3), dtype=np.float32)
    
        for i, file_name in enumerate(batch_files):
            img = Image.open(os.path.join(self.dataset_path, file_name)).convert('RGB')
            img = img.resize(self.img_size)
            img_array = np.array(img) / 255.0  # Normalize
            batch_images[i] = img_array
    
        print(f"Batch images shape: {batch_images.shape}")
        print(f"Batch ages shape: {batch_ages.shape}")
        print(f"Batch genders shape: {batch_genders.shape}")
        print(f"Batch ethnicities shape: {batch_ethnicities.shape}")
    
        # Return images and corresponding outputs as a tuple
        return batch_images, {
            'age_output': np.array(batch_ages, dtype=np.float32), 
            'gender_output': np.array(batch_genders, dtype=np.float32), 
            'ethnicity_output': np.array(batch_ethnicities, dtype=np.float32)
        }


# Create train and validation generators
batch_size = 16

train_generator = MultiOutputDataGenerator(
    train_file_names, train_ages, train_genders, train_ethnicities, batch_size, IMG_SIZE, dataset_path
)

validation_generator = MultiOutputDataGenerator(
    val_file_names, val_ages, val_genders, val_ethnicities, batch_size, IMG_SIZE, dataset_path
)

# Load the VGG16 model without the top layer
vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))

# Freeze the VGG16 layers
for layer in vgg16_base.layers:
    layer.trainable = False

# Define the multi-output model
inputs = Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
x = vgg16_base(inputs)
x = Flatten()(x)

# Age output (regression)
age_output = Dense(1, activation='linear', name='age_output')(x)

# Gender output (binary classification)
gender_output = Dense(1, activation='sigmoid', name='gender_output')(x)

# Ethnicity output (multi-class classification)
ethnicity_output = Dense(5, activation='softmax', name='ethnicity_output')(x)

# Define the complete model
model = Model(inputs=inputs, outputs=[age_output, gender_output, ethnicity_output])

# Compile the model
model.compile(
    optimizer='adam',
    loss={
        'age_output': 'mean_squared_error',
        'gender_output': 'binary_crossentropy',
        'ethnicity_output': 'categorical_crossentropy'
    },
    metrics={
        'age_output': 'mae',
        'gender_output': 'accuracy',
        'ethnicity_output': 'accuracy'
    }
)

# Summary of the model
model.summary()

# Set up a callback to save the best model
checkpoint_callback = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min')

# Train the model using the generators
EPOCHS = 20

# Print sample outputs from the generator to check shapes and types
sample_train_images, sample_train_labels = next(iter(train_generator))
print(f"Sample train images shape: {sample_train_images.shape}")
print(f"Sample train labels shapes: {sample_train_labels['age_output'].shape}")
print(f"Sample train labels shapes: {sample_train_labels['gender_output'].shape}")
print(f"Sample train labels shapes: {sample_train_labels['ethnicity_output'].shape}")

# Convert train and validation generators to tf.data.Dataset
train_dataset = tf.data.Dataset.from_generator(
    lambda: train_generator,
    output_signature=(
        tf.TensorSpec(shape=(None, IMG_SIZE[0], IMG_SIZE[1], 3), dtype=tf.float32),
        {
            'age_output': tf.TensorSpec(shape=(None, 1), dtype=tf.float32),
            'gender_output': tf.TensorSpec(shape=(None, 1), dtype=tf.float32),
            'ethnicity_output': tf.TensorSpec(shape=(None, 5), dtype=tf.float32)
        }
    )
)


validation_dataset = tf.data.Dataset.from_generator(
    lambda: validation_generator,
    output_signature=(
        tf.TensorSpec(shape=(None, IMG_SIZE[0], IMG_SIZE[1], 3), dtype=tf.float32),
        {
            'age_output': tf.TensorSpec(shape=(None, 1), dtype=tf.float32),
            'gender_output': tf.TensorSpec(shape=(None, 1), dtype=tf.float32),
            'ethnicity_output': tf.TensorSpec(shape=(None, 5), dtype=tf.float32)
        }
    )
)

# Prefetch data for faster consumption
train_dataset = train_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Train the model
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=EPOCHS,
    callbacks=[checkpoint_callback],
    verbose=1
)


Sample of one-hot encoded ethnicities: [[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]]


Batch images shape: (16, 224, 224, 3)
Batch ages shape: (16, 1)
Batch genders shape: (16, 1)
Batch ethnicities shape: (16, 5)
Sample train images shape: (16, 224, 224, 3)
Sample train labels shapes: (16, 1)
Sample train labels shapes: (16, 1)
Sample train labels shapes: (16, 5)
Epoch 1/20


ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 5), output.shape=(None, 1)

In [12]:
# Test the generator and model output before training
batch_images, batch_labels = train_generator[0]
predictions = model.predict(batch_images)

print(f"Predicted ages shape: {predictions[0].shape}")
print(f"Predicted genders shape: {predictions[1].shape}")
print(f"Predicted ethnicities shape: {predictions[2].shape}")


Batch images shape: (16, 224, 224, 3)
Batch ages shape: (16, 1)
Batch genders shape: (16, 1)
Batch ethnicities shape: (16, 5)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Predicted ages shape: (16, 1)
Predicted genders shape: (16, 1)
Predicted ethnicities shape: (16, 5)
