In [None]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
from PIL import Image
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.callbacks import ModelCheckpoint

# Load and preprocess the dataset
dataset_path = 'UTKface'  # Update this path to your dataset location
filenames = list(map(lambda x: x.name, Path(dataset_path).glob('*.jpg')))

# Shuffle the filenames
np.random.seed(42)
np.random.shuffle(filenames)

# Extract labels and load images
ages, genders, ethnicities, image_data = [], [], [], []
IMG_SIZE = (224, 224)

for filename in filenames:
    if filename.endswith('.jpg') or filename.endswith('.png'):
        try:
            age, gender, ethnicity, _ = filename.split('_')
            age = int(age)
            gender = int(gender)
            ethnicity = int(ethnicity)

            # Load and process the image
            img = Image.open(os.path.join(dataset_path, filename)).convert('RGB')
            img = img.resize(IMG_SIZE)
            img_array = np.array(img) / 255.0  # Normalize pixel values

            # Append data to lists
            ages.append(age)
            genders.append(gender)
            ethnicities.append(ethnicity)
            image_data.append(img_array)

        except ValueError:
            continue

# Convert lists to numpy arrays
image_data = np.array(image_data, dtype='float32')
ages = np.array(ages, dtype='float32').reshape(-1, 1)  # Regression output
ages /= 116.0  # Normalize age values (max age is 116)
genders = np.array(genders, dtype='float32').reshape(-1, 1)  # Binary output
ethnicities = to_categorical(ethnicities, num_classes=5)  # One-hot encode ethnicity labels

# Split into training and validation sets
X_train, X_test, age_train, age_test, gender_train, gender_test, eth_train, eth_test = train_test_split(
    image_data, ages, genders, ethnicities, test_size=0.2, random_state=42
)

# Load the VGG16 model
conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the VGG16 layers
for layer in conv_base.layers:
    layer.trainable = False

# Build the multi-output model
input_layer = Input(shape=(224, 224, 3))

# Apply the VGG16 base model
X = conv_base(input_layer)

# Flatten the output
X = Flatten()(X)

# Gender output
dense_gender = Dense(256, activation='relu')(X)
dropout_gender = Dropout(0.4)(dense_gender)
gender_output = Dense(1, activation='sigmoid', name='gender_output')(dropout_gender)  # Binary classification

# Age output
dense_age = Dense(256, activation='relu')(X)
dropout_age = Dropout(0.3)(dense_age)
age_output = Dense(1, activation='linear', name='age_output')(dropout_age)  # Regression

# Ethnicity output
dense_ethnicity = Dense(256, activation='relu')(X)
dropout_ethnicity = Dropout(0.3)(dense_ethnicity)
ethnicity_output = Dense(5, activation='softmax', name='ethnicity_output')(dropout_ethnicity)  # Multi-class classification

# Define the model
model = Model(inputs=input_layer, outputs=[gender_output, age_output, ethnicity_output])

# Compile the model
model.compile(
    optimizer='adam',
    loss={
        'gender_output': 'binary_crossentropy',
        'age_output': 'mean_squared_error',
        'ethnicity_output': 'categorical_crossentropy'
    },
    metrics={
        'gender_output': 'accuracy',
        'age_output': 'mae',
        'ethnicity_output': 'accuracy'
    }
)

# Display the model architecture
model.summary()

# Define the checkpoint callback to save the best model
checkpoint_callback = ModelCheckpoint(
    'best_model.keras',
    monitor='val_loss',
    save_best_only=True,
    mode='min'
)

# Create data generators
batch_size = 32
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, {'gender_output': gender_train, 'age_output': age_train, 'ethnicity_output': eth_train}))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

validation_dataset = tf.data.Dataset.from_tensor_slices((X_test, {'gender_output': gender_test, 'age_output': age_test, 'ethnicity_output': eth_test}))
validation_dataset = validation_dataset.batch(batch_size)

# Train the model
epochs = 20
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=epochs,
    callbacks=[checkpoint_callback],
    verbose=1
)
