In [3]:
# ======================================
# AGE & GENDER PREDICTION - UTKFACE DATASET
# ======================================

import os
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# ==============================
# CONFIGURATION
# ==============================
DATASET_PATH = r"E:\ml project\UTKFace"  # change this to your folder path
IMG_SIZE = 128

# ==============================
# DATA LOADING (paths only)
# ==============================
def load_utkface_dataset(dataset_path):
    """Return lists of file paths, ages and genders without loading images into memory."""
    file_paths, ages, genders = [], [], []
    for filename in os.listdir(dataset_path):
        if filename.lower().endswith((".jpg", ".jpeg", ".png")):
            try:
                parts = filename.split("_")
                age = int(parts[0])
                gender = int(parts[1])  # 0 = male, 1 = female
                img_path = os.path.join(dataset_path, filename)
                file_paths.append(img_path)
                ages.append(age)
                genders.append(gender)
            except Exception:
                continue
    return file_paths, np.array(ages, dtype=np.int32), np.array(genders, dtype=np.int32)

print("Loading dataset (paths only)...")
file_paths, age_labels, gender_labels = load_utkface_dataset(DATASET_PATH)
print("✅ Dataset paths loaded")
print(f"Total Images: {len(file_paths)}")

# ==============================
# SPLIT PATHS (labels stay in memory but are small)
# ==============================
paths_train, paths_test, y_age_train, y_age_test, y_gender_train, y_gender_test = train_test_split(
    file_paths, age_labels, gender_labels, test_size=0.2, random_state=42
)

# Create tf.data datasets that load images on the fly to avoid large memory usage
AUTOTUNE = tf.data.AUTOTUNE

def _parse_function(path, age, gender):
    # path: tf.string scalar
    image_contents = tf.io.read_file(path)
    image = tf.io.decode_image(image_contents, channels=3, expand_animations=False)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = tf.cast(image, tf.float32) / 255.0
    age = tf.cast(age, tf.float32)
    gender = tf.cast(gender, tf.float32)
    return image, {"age_output": age, "gender_output": gender}

batch_size = 64

train_ds = tf.data.Dataset.from_tensor_slices((paths_train, y_age_train, y_gender_train))
train_ds = train_ds.shuffle(buffer_size=1000)
train_ds = train_ds.map(lambda p,a,g: _parse_function(p,a,g), num_parallel_calls=AUTOTUNE)
train_ds = train_ds.batch(batch_size).prefetch(AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((paths_test, y_age_test, y_gender_test))
# No shuffle for test
test_ds = test_ds.map(lambda p,a,g: _parse_function(p,a,g), num_parallel_calls=AUTOTUNE)
test_ds = test_ds.batch(batch_size).prefetch(AUTOTUNE)

# ==============================
# MODEL ARCHITECTURE
# ==============================
inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3))

x = Conv2D(32, (3,3), activation='relu', padding='same')(inputs)
x = MaxPooling2D((2,2))(x)
x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2))(x)
x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
x = MaxPooling2D((2,2))(x)
x = Flatten()(x)
x = Dropout(0.5)(x)

# Output layers
age_output = Dense(1, activation='linear', name='age_output')(x)
gender_output = Dense(1, activation='sigmoid', name='gender_output')(x)

model = Model(inputs=inputs, outputs=[age_output, gender_output])

# ==============================
# COMPILE MODEL
# ==============================
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss={'age_output': 'mae', 'gender_output': 'binary_crossentropy'},
    metrics={'age_output': 'mae', 'gender_output': 'accuracy'}
)

model.summary()

# ==============================
# TRAIN MODEL
# ==============================
history = model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=15,
    # If your dataset has fewer examples than batch_size, you may need to set steps_per_epoch
)

# ==============================
# SAVE MODEL
# ==============================
model.save("age_gender_model.h5")
print("✅ Model saved as age_gender_model.h5")


Loading dataset (paths only)...
✅ Dataset paths loaded
Total Images: 23708
✅ Dataset paths loaded
Total Images: 23708


Epoch 1/15
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 960ms/step - age_output_loss: 15.4750 - age_output_mae: 15.4791 - gender_output_accuracy: 0.6733 - gender_output_loss: 0.6144 - loss: 16.0937 - val_age_output_loss: 14.1945 - val_age_output_mae: 14.1685 - val_gender_output_accuracy: 0.7741 - val_gender_output_loss: 0.4752 - val_loss: 14.6461
Epoch 2/15
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 960ms/step - age_output_loss: 15.4750 - age_output_mae: 15.4791 - gender_output_accuracy: 0.6733 - gender_output_loss: 0.6144 - loss: 16.0937 - val_age_output_loss: 14.1945 - val_age_output_mae: 14.1685 - val_gender_output_accuracy: 0.7741 - val_gender_output_loss: 0.4752 - val_loss: 14.6461
Epoch 2/15
[1m297/297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m280s[0m 941ms/step - age_output_loss: 12.5150 - age_output_mae: 12.5191 - gender_output_accuracy: 0.7654 - gender_output_loss: 0.5338 - loss: 13.0532 - val_age_output_loss: 10.9878 - va



✅ Model saved as age_gender_model.h5
