# Step 1: Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Step 2: Import Libraries

In [None]:
import tensorflow as tf
print(tf.__version__)

2.17.0


In [None]:
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf

# Step 3: Define Paths

In [None]:
# Path to the filtered images folder
filtered_images_folder = '/content/drive/MyDrive/FairFace_Project/filtered_images/'

# Path to save the trained model
model_save_path = '/content/drive/MyDrive/FairFace_Project/fairface_model.h5'

# Step 4: Load sampled_df DataFrame

In [None]:
# Load the CSV file containing image paths and labels
csv_path = '/content/drive/MyDrive/FairFace_Project/fairface_label_train.csv'
sampled_df = pd.read_csv(csv_path)

# Ensure the DataFrame only contains the images in filtered_images_folder
# This step assumes you have pre-filtered 21k images in this DataFrame
sampled_df = sampled_df[sampled_df['file'].apply(lambda x: os.path.exists(os.path.join(filtered_images_folder, os.path.basename(x))))]
print("DataFrame loaded and filtered:", sampled_df.shape)

DataFrame loaded and filtered: (1572, 5)


In [None]:
sampled_df

Unnamed: 0,file,age,gender,race,service_test
104,train/105.jpg,50-59,Male,East Asian,False
107,train/108.jpg,more than 70,Female,Indian,False
119,train/120.jpg,60-69,Male,Middle Eastern,False
242,train/243.jpg,40-49,Female,Southeast Asian,True
255,train/256.jpg,3-9,Male,East Asian,False
...,...,...,...,...,...
86535,train/86536.jpg,10-19,Male,Latino_Hispanic,True
86639,train/86640.jpg,40-49,Male,East Asian,True
86667,train/86668.jpg,10-19,Female,White,False
86704,train/86705.jpg,30-39,Male,Indian,True


In [None]:
# Output summary of the filtered dataset
print(f"Total images in filtered dataset: {len(sampled_df)}")
print("Race distribution:\n", sampled_df['race'].value_counts())
print("Age distribution:\n", sampled_df['age'].value_counts())
print("Gender distribution:\n", sampled_df['gender'].value_counts())

Total images in filtered dataset: 1572
Race distribution:
 race
White              268
Southeast Asian    250
East Asian         225
Indian             211
Middle Eastern     207
Latino_Hispanic    206
Black              205
Name: count, dtype: int64
Age distribution:
 age
3-9             210
20-29           209
40-49           204
30-39           200
50-59           192
10-19           185
60-69           182
0-2             130
more than 70     60
Name: count, dtype: int64
Gender distribution:
 gender
Male      852
Female    720
Name: count, dtype: int64


# Step 5: Load and Preprocess Images

In [None]:
IMG_SIZE = 128  # Resize target

image_data = []
age_labels = []
gender_labels = []
race_labels = []

# Load and preprocess each image
for _, row in tqdm(sampled_df.iterrows(), total=len(sampled_df)):
    img_path = os.path.join(filtered_images_folder, os.path.basename(row['file']))
    img = cv2.imread(img_path)

    if img is not None:
        # Resize and normalize the image
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = img / 255.0  # Normalize pixel values to [0, 1]

        # Append image data and labels
        image_data.append(img)
        age_labels.append(row['age'])
        gender_labels.append(row['gender'])
        race_labels.append(row['race'])

# Convert lists to numpy arrays
image_data = np.array(image_data)
age_labels = np.array(age_labels)
gender_labels = np.array(gender_labels)
race_labels = np.array(race_labels)

print(f"Loaded {len(image_data)} images.")

100%|██████████| 1572/1572 [08:12<00:00,  3.19it/s]


Loaded 1572 images.


# Step 6: Encode Labels

In [None]:
# Encode race
race_encoder = LabelEncoder()
race_encoded = race_encoder.fit_transform(race_labels)
race_one_hot = to_categorical(race_encoded)

# Encode gender
gender_encoder = LabelEncoder()
gender_encoded = gender_encoder.fit_transform(gender_labels)
gender_one_hot = to_categorical(gender_encoded)

# Encode age
age_encoder = LabelEncoder()
age_encoded = age_encoder.fit_transform(age_labels)
age_one_hot = to_categorical(age_encoded)

print("Encoding completed for age, gender, and race.")

Encoding completed for age, gender, and race.


# Step 7: Split Data into Training and Validation Sets

In [None]:
X_train, X_val, y_train_age, y_val_age, y_train_gender, y_val_gender, y_train_race, y_val_race = train_test_split(
    image_data, age_one_hot, gender_one_hot, race_one_hot, test_size=0.2, random_state=42
)

print(f"Training set: {X_train.shape}, Validation set: {X_val.shape}")

Training set: (1257, 128, 128, 3), Validation set: (315, 128, 128, 3)


# Step 8: Define the Multi-Task CNN Model

In [None]:
# Input layer
input_layer = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

# Shared convolutional layers
x = layers.Conv2D(32, (3, 3), activation='relu')(input_layer)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(64, (3, 3), activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(128, (3, 3), activation='relu')(x)
x = layers.Flatten()(x)

# Age prediction branch
age_output = layers.Dense(age_one_hot.shape[1], activation='softmax', name="age_output")(x)

# Gender prediction branch
gender_output = layers.Dense(gender_one_hot.shape[1], activation='softmax', name="gender_output")(x)

# Race prediction branch
race_output = layers.Dense(race_one_hot.shape[1], activation='softmax', name="race_output")(x)

# Define the model with multiple outputs
model = Model(inputs=input_layer, outputs=[age_output, gender_output, race_output])

# Compile the model
model.compile(optimizer='adam',
              loss={'age_output': 'categorical_crossentropy',
                    'gender_output': 'categorical_crossentropy',
                    'race_output': 'categorical_crossentropy'},
              metrics={'age_output': 'accuracy',
                       'gender_output': 'accuracy',
                       'race_output': 'accuracy'})

print(model.summary())

None


# Step 9: Train the Model

In [None]:
import tensorflow as tf

# Clear any existing models in memory
tf.keras.backend.clear_session()

# Define your model here
# (Re-run the entire model definition from Step 8 after adding this)

In [None]:
# Clear the session first to avoid any lingering model states
tf.keras.backend.clear_session()

# Define the model as before, then compile with the following line:
model.compile(
    optimizer='adam',
    loss={'age_output': 'categorical_crossentropy',
          'gender_output': 'categorical_crossentropy',
          'race_output': 'categorical_crossentropy'},
    metrics={'age_output': 'accuracy',
             'gender_output': 'accuracy',
             'race_output': 'accuracy'},
    run_eagerly=True  # Enable eager execution mode
)

# Proceed to train as before
history = model.fit(
    X_train,
    {'age_output': y_train_age, 'gender_output': y_train_gender, 'race_output': y_train_race},
    validation_data=(X_val, {'age_output': y_val_age, 'gender_output': y_val_gender, 'race_output': y_val_race}),
    epochs=10,
    batch_size=32,
    verbose=1
)

Epoch 1/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 126ms/step - age_output_accuracy: 0.1352 - gender_output_accuracy: 0.4956 - loss: 5.2546 - race_output_accuracy: 0.1513 - val_age_output_accuracy: 0.1587 - val_gender_output_accuracy: 0.5714 - val_loss: 4.7730 - val_race_output_accuracy: 0.1746
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 134ms/step - age_output_accuracy: 0.1562 - gender_output_accuracy: 0.5620 - loss: 4.7424 - race_output_accuracy: 0.2220 - val_age_output_accuracy: 0.1270 - val_gender_output_accuracy: 0.6254 - val_loss: 4.7482 - val_race_output_accuracy: 0.2349
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 108ms/step - age_output_accuracy: 0.2464 - gender_output_accuracy: 0.6324 - loss: 4.4644 - race_output_accuracy: 0.3150 - val_age_output_accuracy: 0.2032 - val_gender_output_accuracy: 0.5873 - val_loss: 4.6014 - val_race_output_accuracy: 0.2730
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━

# Step 10: Evaluate and Save the Model

In [None]:
# Evaluate the model
results = model.evaluate(X_val, {'age_output': y_val_age, 'gender_output': y_val_gender, 'race_output': y_val_race})
print("Evaluation results:", results)

# Save the model to Google Drive
model.save(model_save_path)
print("Model saved successfully to Google Drive!")

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step - age_output_accuracy: 0.2303 - gender_output_accuracy: 0.6132 - loss: 16.1437 - race_output_accuracy: 0.3173




Evaluation results: [15.302157402038574, 0.21587301790714264, 0.6126984357833862, 0.30158731341362]
Model saved successfully to Google Drive!
