In [1]:
# Import necessary libraries
import cv2
import numpy as np
import os
from glob import glob
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Convolution2D, Activation
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
import tensorflow as tf
import gdown
from deepface.basemodels import VGGFace
from deepface.commons import functions

# Check TensorFlow version for compatibility
tf_version = int(tf.__version__.split(".", maxsplit=1)[0])
if tf_version == 1:
    from keras.models import Model, Sequential
    from keras.layers import Convolution2D, Flatten, Activation
elif tf_version == 2:
    from tensorflow.keras.models import Model, Sequential
    from tensorflow.keras.layers import Convolution2D, Flatten, Activation

# Define race mapping and preprocessing functions
race_mapping = {
    0: "White",
    1: "Black",
    2: "Asian",
    3: "Indian",
    4: "Others"  # Others include Hispanic, Latino, Middle Eastern, etc.
}





In [2]:
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (200, 200))
    return img_to_array(img) / 255.0

def parse_labels(filename):
    parts = os.path.basename(filename).split("_")
    age = int(parts[0])
    gender = int(parts[1])
    race = int(parts[2])
    return age, gender, race

# Function to safely parse labels from filename
def safe_parse_labels(filename):
    try:
        parts = os.path.basename(filename).split("_")
        if len(parts) < 3:  # Check if filename has enough parts
            return None
        age = int(parts[0])
        gender = int(parts[1])
        race = int(parts[2])
        return age, gender, race
    except ValueError:
        return None

In [3]:
# Data Generators for Training
def age_data_generator(file_paths, batch_size):
    while True:
        batch_paths = np.random.choice(a=file_paths, size=batch_size)
        batch_input = []
        batch_age_output = []

        for input_path in batch_paths:
            labels = safe_parse_labels(input_path)
            if labels is None:  # Skip files with incorrect format
                continue
            image = preprocess_image(input_path)
            age, _, _ = labels
            batch_input.append(image)
            batch_age_output.append(age)

        if not batch_input:  # Skip batch if empty
            continue

        yield np.array(batch_input, dtype='float32'), np.array(batch_age_output, dtype='float32')

def gender_data_generator(file_paths, batch_size):
    while True:
        batch_paths = np.random.choice(a=file_paths, size=batch_size)
        batch_input = []
        batch_gender_output = []

        for input_path in batch_paths:
            labels = safe_parse_labels(input_path)
            if labels is None:  # Skip files with incorrect format
                continue
            image = preprocess_image(input_path)
            _, gender, _ = labels
            batch_input.append(image)
            batch_gender_output.append(gender)

        if not batch_input:  # Skip batch if empty
            continue

        yield np.array(batch_input, dtype='float32'), np.array(batch_gender_output, dtype='float32')

# Path to your images directory
images_directory = 'E:/capJC/paper/part1'  # Update with your path
img_paths = glob(os.path.join(images_directory, "*.jpg"))

# Splitting dataset into training and testing
train_paths, test_paths = train_test_split(img_paths, test_size=0.2, random_state=42)

# Define batch size
batch_size = 32  # Adjust this based on your memory constraints

# Training data generators
train_age_generator = age_data_generator(train_paths, batch_size)
train_gender_generator = gender_data_generator(train_paths, batch_size)

# Define the age_model
age_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(200, 200, 3)),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, name='age_output')
])
age_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Define the gender_model
gender_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(200, 200, 3)),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid', name='gender_output')
])
gender_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])







In [4]:
# Load pre-built model for ethnicity prediction
def load_ethnicity_model(url="https://github.com/serengil/deepface_models/releases/download/v1.0/race_model_single_batch.h5"):
    model = VGGFace.baseModel()
    classes = 6
    base_model_output = Sequential()
    base_model_output = Convolution2D(classes, (1, 1), name="predictions")(model.layers[-4].output)
    base_model_output = Flatten()(base_model_output)
    base_model_output = Activation("softmax")(base_model_output)
    race_model = Model(inputs=model.input, outputs=base_model_output)

    # Load weights
    home = functions.get_deepface_home()
    output = home + "/.deepface/weights/race_model_single_batch.h5"
    if not os.path.isfile(output):
        print("race_model_single_batch.h5 will be downloaded...")
        gdown.download(url, output, quiet=False)
    race_model.load_weights(output)
    return race_model

ethnicity_model = load_ethnicity_model()

# Calculate steps_per_epoch for training
steps_per_epoch = len(train_paths) // batch_size


In [6]:
# Train the age model
history_age = age_model.fit(
    train_age_generator, 
    steps_per_epoch=steps_per_epoch, 
    epochs=10
)

# Train the gender model
history_gender = gender_model.fit(
    train_gender_generator, 
    steps_per_epoch=steps_per_epoch, 
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns

# Function to plot training and validation loss
def plot_loss(history, title):
    plt.figure()
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# Function to plot training and validation accuracy
def plot_accuracy(history, title, is_binary=False):
    plt.figure()
    acc_key = 'accuracy' if 'accuracy' in history.history else 'acc'
    val_acc_key = 'val_accuracy' if 'val_accuracy' in history.history else 'val_acc'
    
    plt.plot(history.history[acc_key], label='Training Accuracy')
    plt.plot(history.history[val_acc_key], label='Validation Accuracy')
    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy' if is_binary else 'MAE')
    plt.legend()
    plt.show()

# Ethnicity Confusion Matrix
def generate_confusion_matrix(ethnicity_generator, model, num_classes):
    y_true = []
    y_pred = []
    for images, labels in ethnicity_generator:
        preds = model.predict(images)
        y_true.extend(np.argmax(labels, axis=1))
        y_pred.extend(np.argmax(preds, axis=1))
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='g', xticklabels=list(race_mapping.values()), yticklabels=list(race_mapping.values()))
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Ethnicity Prediction Confusion Matrix')
    plt.show()

# Assuming 'test_ethnicity_generator' is a valid generator for the test set
# generate_confusion_matrix(test_ethnicity_generator, ethnicity_model, len(race_mapping))

# Note: To use the plot functions and confusion matrix, ensure that the respective history objects and test generators are correctly defined and generated.
