In [None]:
import pandas as pd

# Path to your cleaned and encoded CSV file
csv_path = '/content/drive/MyDrive/Trainselect/fairface_filtered_8781.csv'
data = pd.read_csv(csv_path)

# Inspect the dataset
print(data.head())
print(f"Number of samples: {len(data)}")


    file  age  gender  race  service_test
0  27327    1       1     2         False
1  63584    0       0     1         False
2  35860    6       0     4          True
3  83767    4       1     2          True
4  66866    1       0     3         False
Number of samples: 8781


In [None]:
from sklearn.model_selection import train_test_split

# Extract file paths and labels
file_paths = data['file'].values
labels = data[['age', 'gender', 'race']].values

# Split into training and validation sets
train_files, val_files, train_labels, val_labels = train_test_split(
    file_paths, labels, test_size=0.2, random_state=42
)

print(f"Training samples: {len(train_files)}, Validation samples: {len(val_files)}")

Training samples: 7024, Validation samples: 1757


In [None]:
import pandas as pd
import os

# Path to the CSV file
csv_path = '/content/drive/MyDrive/Trainselect/fairface_filtered_8781.csv'
data = pd.read_csv(csv_path)

# Define the folder containing the images
images_folder = '/content/drive/MyDrive/Trainselect/Train select'

# Create full file paths from the file numbers
data['file'] = data['file'].apply(lambda x: os.path.join(images_folder, f"{x}.jpg.jpg"))

# Verify the updated file paths
print(data.head())


                                                file  age  gender  race  \
0  /content/drive/MyDrive/Trainselect/Train selec...    1       1     2   
1  /content/drive/MyDrive/Trainselect/Train selec...    0       0     1   
2  /content/drive/MyDrive/Trainselect/Train selec...    6       0     4   
3  /content/drive/MyDrive/Trainselect/Train selec...    4       1     2   
4  /content/drive/MyDrive/Trainselect/Train selec...    1       0     3   

   service_test  
0         False  
1         False  
2          True  
3          True  
4         False  


In [None]:
# Filter rows where the file exists
data = data[data['file'].apply(os.path.exists)]

# Extract file paths and labels
file_paths = data['file'].values
labels = data[['age', 'gender', 'race']].values  # Adjust to match your encoded columns

# Verify the number of valid file paths
print(f"Number of valid file paths: {len(file_paths)}")
print(f"Example file paths: {file_paths[:5]}")


Number of valid file paths: 7686
Example file paths: ['/content/drive/MyDrive/Trainselect/Train select/27327.jpg.jpg'
 '/content/drive/MyDrive/Trainselect/Train select/63584.jpg.jpg'
 '/content/drive/MyDrive/Trainselect/Train select/35860.jpg.jpg'
 '/content/drive/MyDrive/Trainselect/Train select/83767.jpg.jpg'
 '/content/drive/MyDrive/Trainselect/Train select/66866.jpg.jpg']


In [None]:
from sklearn.model_selection import train_test_split

# Create a new column for stratification by combining age, gender, and race
data['stratify_label'] = (
    data['age'].astype(str) + '_' +
    data['gender'].astype(str) + '_' +
    data['race'].astype(str)
)

# Perform stratified split
train_data, val_data = train_test_split(
    data,
    test_size=0.2,
    random_state=42,
    stratify=data['stratify_label']  # Ensure balanced split
)

# Extract file paths and labels for training and validation
train_files = train_data['file'].tolist()
val_files = val_data['file'].tolist()
train_labels = train_data[['age', 'gender', 'race']].to_dict('records')
val_labels = val_data[['age', 'gender', 'race']].to_dict('records')

# Confirm the splits
print(f"Training samples: {len(train_files)}")
print(f"Validation samples: {len(val_files)}")


Training samples: 6148
Validation samples: 1538


In [None]:
import tensorflow as tf

# Preprocess labels: Convert list of dictionaries to separate arrays
train_labels_age = [label['age'] for label in train_labels]
train_labels_gender = [label['gender'] for label in train_labels]
train_labels_race = [label['race'] for label in train_labels]

val_labels_age = [label['age'] for label in val_labels]
val_labels_gender = [label['gender'] for label in val_labels]
val_labels_race = [label['race'] for label in val_labels]

# Preprocessing function for images and labels
def preprocess_image(file_path, age, gender, race):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (224, 224))  # Resize for EfficientNetB0
    img = img / 255.0  # Normalize to [0, 1]
    return img, {'age': age, 'gender': gender, 'race': race}

# Create training dataset
train_dataset = tf.data.Dataset.from_tensor_slices(
    (train_files, train_labels_age, train_labels_gender, train_labels_race)
)
train_dataset = train_dataset.map(
    lambda file_path, age, gender, race: preprocess_image(file_path, age, gender, race)
).shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)

# Create validation dataset
val_dataset = tf.data.Dataset.from_tensor_slices(
    (val_files, val_labels_age, val_labels_gender, val_labels_race)
)
val_dataset = val_dataset.map(
    lambda file_path, age, gender, race: preprocess_image(file_path, age, gender, race)
).batch(32).prefetch(tf.data.AUTOTUNE)

# Verify the datasets
for images, labels in train_dataset.take(1):
    print(f"Image batch shape: {images.shape}")
    print(f"Age labels batch shape: {labels['age'].shape}")
    print(f"Gender labels batch shape: {labels['gender'].shape}")
    print(f"Race labels batch shape: {labels['race'].shape}")


Image batch shape: (32, 224, 224, 3)
Age labels batch shape: (32,)
Gender labels batch shape: (32,)
Race labels batch shape: (32,)


In [None]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
import tensorflow as tf

# Define the input shape
input_shape = (224, 224, 3)  # Assuming images are resized to 224x224

# Load the pre-trained InceptionV3 model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the base model
base_model.trainable = False

# Add custom layers on top of the InceptionV3 model
inputs = Input(shape=input_shape)
x = base_model(inputs, training=False)  # Pass the inputs through the pre-trained base model
x = GlobalAveragePooling2D()(x)  # Pool the features from the Inception model
x = Dropout(0.5)(x)  # Add a dropout layer for regularization

# Output layers
age_output = Dense(8, activation='softmax', name='age')(x)  # Predict age with 8 classes
gender_output = Dense(2, activation='softmax', name='gender')(x)  # Predict gender with 2 classes
race_output = Dense(7, activation='softmax', name='race')(x)  # Predict race with 7 classes

# Create the model
model = Model(inputs=inputs, outputs=[age_output, gender_output, race_output])

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss={
        'age': 'sparse_categorical_crossentropy',
        'gender': 'sparse_categorical_crossentropy',
        'race': 'sparse_categorical_crossentropy',
    },
    metrics={
        'age': 'accuracy',
        'gender': 'accuracy',
        'race': 'accuracy',
    }
)

# Model summary
model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # Lower learning rate for transfer learning
    loss={
        'age': 'sparse_categorical_crossentropy',  # Multi-class classification for age
        'gender': 'sparse_categorical_crossentropy',  # Binary classification for gender
        'race': 'sparse_categorical_crossentropy',  # Multi-class classification for race
    },
    metrics={
        'age': ['accuracy'],  # Track accuracy for age predictions
        'gender': ['accuracy'],  # Track accuracy for gender predictions
        'race': ['accuracy'],  # Track accuracy for race predictions
    }
)


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Define callbacks
callbacks = [
    ModelCheckpoint(
        filepath="inception_model_best.keras",  # Save the best model during training
        monitor="val_loss",  # Monitor validation loss
        save_best_only=True,
        verbose=1
    ),
    EarlyStopping(
        monitor="val_loss",  # Stop training when validation loss stops improving
        patience=3,  # Number of epochs to wait before stopping
        verbose=1,
        restore_best_weights=True  # Restore the best weights at the end
    ),
    ReduceLROnPlateau(
        monitor="val_loss",  # Reduce learning rate when validation loss stops improving
        factor=0.5,  # Reduce by half
        patience=2,  # Wait for 2 epochs before reducing
        verbose=1
    )
]


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# Load the MobileNetV2 model pre-trained on ImageNet, excluding the top layer
base_model = MobileNetV2(weights='imagenet', include_top=False)

# Freeze the base model
base_model.trainable = False

# Add custom top layers for your specific task
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Add a global average pooling layer
x = Dense(256, activation='relu')(x)  # Add a fully connected layer
outputs = {
    'age': Dense(8, activation='softmax', name='age')(x),
    'gender': Dense(2, activation='sigmoid', name='gender')(x),
    'race': Dense(7, activation='softmax', name='race')(x),
}

# Create the model
model = Model(inputs=base_model.input, outputs=outputs)

# Compile the model with the initial frozen layers
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),  # Initial higher learning rate
    loss={
        'age': 'sparse_categorical_crossentropy',
        'gender': 'sparse_categorical_crossentropy',
        'race': 'sparse_categorical_crossentropy',
    },
    metrics={
        'age': 'accuracy',
        'gender': 'accuracy',
        'race': 'accuracy',
    }
)





  base_model = MobileNetV2(weights='imagenet', include_top=False)


In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
import json

# Custom callback to save epoch values
class EpochLogger(Callback):
    def __init__(self, log_file='epoch_log.json'):
        self.log_file = log_file
        self.epoch_data = []

    def on_epoch_end(self, epoch, logs=None):
        # Save epoch number and logs (loss, accuracy, etc.)
        epoch_info = {'epoch': epoch + 1}
        if logs:
            epoch_info.update(logs)
        self.epoch_data.append(epoch_info)

        # Write to JSON file
        with open(self.log_file, 'w') as file:
            json.dump(self.epoch_data, file, indent=4)

# Define checkpoint callback to save weights
checkpoint_filepath = 'model_checkpoints/weights_epoch_{epoch:02d}.weights.h5'
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,  # Save only weights
    monitor='val_loss',      # Monitor validation loss
    mode='min',
    save_best_only=False,    # Save weights after every epoch
    verbose=1
)

# Load weights if available
try:
    latest_weights = tf.train.latest_checkpoint('model_checkpoints/')
    if latest_weights:
        model.load_weights(latest_weights)
        print(f"Loaded weights from {latest_weights}")
except Exception as e:
    print("No weights found to load. Starting fresh.")

# Combine callbacks
epoch_logger = EpochLogger(log_file='epoch_log.json')
callbacks = [model_checkpoint, epoch_logger]


In [None]:
# Train the model
history = model.fit(
    train_dataset,
    epochs=20,  # Total desired epochs
    validation_data=val_dataset,
    callbacks=callbacks
)

Epoch 1/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - age_accuracy: 0.2053 - age_loss: 2.0566 - gender_accuracy: 0.5696 - gender_loss: 0.6971 - loss: 4.7690 - race_accuracy: 0.1681 - race_loss: 2.0153
Epoch 1: saving model to model_checkpoints/weights_epoch_01.weights.h5
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 344ms/step - age_accuracy: 0.2055 - age_loss: 2.0561 - gender_accuracy: 0.5698 - gender_loss: 0.6969 - loss: 4.7680 - race_accuracy: 0.1681 - race_loss: 2.0151 - val_age_accuracy: 0.2692 - val_age_loss: 1.8274 - val_gender_accuracy: 0.6749 - val_gender_loss: 0.5947 - val_loss: 4.3336 - val_race_accuracy: 0.2094 - val_race_loss: 1.8999
Epoch 2/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step - age_accuracy: 0.3056 - age_loss: 1.7561 - gender_accuracy: 0.6849 - gender_loss: 0.5926 - loss: 4.1908 - race_accuracy: 0.2452 - race_loss: 1.8421
Epoch 2: saving model to model_checkpoints/weights_e

In [None]:
# Fine-tune the base model
base_model.trainable = True  # Unfreeze the base model

# Recompile the model with a lower learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss={
        'age': 'sparse_categorical_crossentropy',
        'gender': 'sparse_categorical_crossentropy',
        'race': 'sparse_categorical_crossentropy',
    },
    metrics={
        'age': 'accuracy',
        'gender': 'accuracy',
        'race': 'accuracy',
    }
)

# Train the model
history = model.fit(
    train_dataset,
    epochs=20,  # Total desired epochs
    validation_data=val_dataset,
    callbacks=callbacks
)

Epoch 1/20
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 246ms/step - age_accuracy: 0.2410 - age_loss: 2.0329 - gender_accuracy: 0.5934 - gender_loss: 0.7621 - loss: 4.8595 - race_accuracy: 0.2377 - race_loss: 2.0644
Epoch 1: saving model to model_checkpoints/weights_epoch_01.weights.h5
[1m193/193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 312ms/step - age_accuracy: 0.2411 - age_loss: 2.0325 - gender_accuracy: 0.5934 - gender_loss: 0.7620 - loss: 4.8585 - race_accuracy: 0.2377 - race_loss: 2.0639 - val_age_accuracy: 0.2692 - val_age_loss: 2.2054 - val_gender_accuracy: 0.6710 - val_gender_loss: 0.6400 - val_loss: 4.7416 - val_race_accuracy: 0.2880 - val_race_loss: 1.8834
Epoch 2/20
[1m192/193[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 127ms/step - age_accuracy: 0.3256 - age_loss: 1.7476 - gender_accuracy: 0.6579 - gender_loss: 0.6355 - loss: 4.1573 - race_accuracy: 0.3207 - race_loss: 1.7743
Epoch 2: saving model to model_checkpoints/weights

In [None]:
# Read the last saved epoch log from the JSON file
epoch_log_path = 'epoch_log.json'

# Check if the log file exists and retrieve the last epoch details
try:
    with open(epoch_log_path, 'r') as file:
        epoch_log = json.load(file)
        last_epoch_values = epoch_log[-1]  # Get the last epoch details
        last_epoch_values
except FileNotFoundError:
    last_epoch_values = "No epoch log file found."
except Exception as e:
    last_epoch_values = f"An error occurred: {e}"

last_epoch_values


{'epoch': 20,
 'age_accuracy': 0.7926154732704163,
 'age_loss': 0.6943234205245972,
 'gender_accuracy': 0.9547820687294006,
 'gender_loss': 0.15857507288455963,
 'loss': 1.4603650569915771,
 'race_accuracy': 0.8262849450111389,
 'race_loss': 0.6137818098068237,
 'val_age_accuracy': 0.35240572690963745,
 'val_age_loss': 1.6984646320343018,
 'val_gender_accuracy': 0.7191157341003418,
 'val_gender_loss': 0.6310276985168457,
 'val_loss': 4.091618537902832,
 'val_race_accuracy': 0.38231468200683594,
 'val_race_loss': 1.7518839836120605}

In [None]:
import pickle

# Serialize model architecture and weights into a dictionary
model_data = {
    "architecture": model.to_json(),  # Save model architecture
    "weights": model.get_weights(),  # Save model weights
}

# Save the model dictionary as a pickle file
with open("model.pkl", "wb") as file:
    pickle.dump(model_data, file)

print("Model saved as model.pkl")


Model saved as model.pkl
