# DEMO NOTEBOOK

**Import all the libraries**

Importing libraries for data handling, visualization, deep learning, image processing, and model evaluation.


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
from sklearn.metrics import mean_absolute_error, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from tensorflow.keras.models import load_model
import subprocess
import gdown
import pickle

**Demo Code - To download files**

I defined a function to download files from Google Drive using file IDs, and downloading all required model and dataset files if they don't already exist. It skips the download if the file has already been downloaded

In [None]:
def download_file(file_id, output):
    url = f"https://drive.google.com/uc?id={file_id}"
    if os.path.exists(output):
        print(f"{output} already exists, skipping download.")
    else:
        gdown.download(url, output, quiet=True)
        print(f"Downloaded {output}")

download_file("1ms24THoput1n58NTo2TjZI1Rt0WTWJF", "face_age")
download_file("19nQRWGdeRbebgohygTslh80_ERMle88r", "test_set.csv")
download_file("1CciU3xti-8lbkKLsVQqhrQF7R6qT5VRi", "best_regression_model.h5")
download_file("1-V4i2p-NWIZjSbgT8mOcoM8yjbnfLx-Z", "best_classification_model.h5")
download_file("1fKaSCp0-tIcj1tDVpQ2ouJVt19fcHq-q", "block1_set.csv")
download_file("1fzbPYJeFhlvAXha2r4yn-qsOcRgzjeKS", "block2_set.csv")
download_file("1_jf1yhsHTLTzbMfiwbBG0zwQcxoIuGJy", "autoencoder_best_model.h5")
download_file("1iow6FC0mol3Zf-ys-uIbpn061zy-i5vH", "autoencoder_history.pkl")
download_file("13ZV3EjAaQtk-Y1r0JW-nxGIhzT-ItExa", "transfer_learning_block2.h5")
download_file("1X3OQh34rBCNOhqBfAaIU6Ty_CYl1hGEu", "backbone_history.pkl")
download_file("1EJAiAxXccTsJrpmeOkHyYr7GUX_WsD0U", "backbone_block1.h5")
download_file("1LDWJVoBltIPkV8za3AieaCElEIOyB0se", "test_set_with_gender.csv")
download_file("1ZPaZ2FKz6m0Bm_vdksB0hpDahvRkI-VE", "biased_model_male_only.h5")
download_file("1iaCM6Y_b7Cumo32SbQTjqHTHNLpsQT7G", "balanced_model.h5")


**Configure local image base path**

Here, I'm configuring the local image base path where the image file is present

In [None]:
# Get the user's Downloads directory
downloads_path = os.path.join(os.path.expanduser("~"), "Downloads")
image_base_path = os.path.join(downloads_path, "face_age")
print(f"Using local image base path: {image_base_path}.")

**Load the test data and check if the image exists**

Loading the test data and printing a sample image paths from the test_set.csv file just to confirm the image paths and to check if the image exists.

In [None]:
df_test = pd.read_csv("test_set.csv")
print("Sample image paths from test_set.csv:")
print(df_test['image_path'].head())

In [None]:
df_test['full_path'] = df_test['image_path'].apply(
    lambda x: os.path.join(image_base_path, x.replace('face_age\\', '').replace('face_age/', '').lstrip('\\').lstrip('/'))
)
#Check if first few paths exist
print("Checking first 5 full paths:")
for path in df_test['full_path'].head():
    print(f"{path}: {'Exists' if os.path.exists(path) else 'Missing'}")

# REGRESSION AND CLASSIFICATION DEMO

**Load the images for Regression and classification**

I loaded test images from file paths in 'df_test'. Each image was processed as follows:

1. For classification, images were loaded in BGR format, converted to RGB, resized to 128×128 pixels, and normalized.
2. For regression, images were loaded in BGR format, converted to RGB, resized to 128×128 pixels, and normalized.

I then extracted:

1. 'X_test_class' for the classification model, which contains the processed RGB images.
2. 'y_test_class' as the one-hot encoded labels for classification.
3. 'X_test_reg' for the regression model, which contains the processed RGB images.
4. 'y_test_reg' as the continuous age labels for regression.

Finally, I confirmed the shapes of the extracted data:

1. 'X_test_class' shape: (978, 128, 128, 3)
2. 'y_test_class' shape: (978, 6)
3. 'X_test_reg'  shape: (978, 128, 128, 3)
4. 'y_test_reg' shape: (978,)


In [None]:
import numpy as np
import cv2
import pandas as pd
import os
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import mean_absolute_error

# Function to load images and labels
def load_images(df, img_size=(128, 128), for_classification=True):
    images = []
    labels = []

    for _, row in tqdm(df.iterrows(), total=len(df)):
        path = row['image_path']
        if not os.path.exists(path):
            print(f"Missing: {path}")
            continue
        
        img = cv2.imread(path)
        if img is None:
            print(f"Failed to load: {path}")
            continue

        if for_classification:
            # RGB for classification
            img = cv2.resize(img, img_size).astype('float32') / 255.0
            images.append(img)
            labels.append(row['age_category'])
        else:
            # Grayscale for regression model
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = cv2.resize(img, img_size).astype('float32') / 255.0
            img = np.expand_dims(img, axis=-1)  # Convert to (128, 128, 1)
            images.append(img)
            labels.append(row['age'])

    images = np.array(images, dtype='float32')

    if for_classification:
        # Encode categorical labels
        label_encoder = LabelEncoder()
        labels = label_encoder.fit_transform(labels)
        labels = to_categorical(labels, num_classes=6)
        return images, labels, label_encoder
    else:
        return images, np.array(labels, dtype='float32')

# Load test data
df_test = pd.read_csv('test_set.csv')

# Load classification data (RGB)
X_test_class, y_test_class, label_encoder = load_images(df_test, for_classification=True)

# Load regression data (Grayscale)
X_test_reg, y_test_reg = load_images(df_test, for_classification=False)

# Verify shapes
print(f"Loaded {len(X_test_class)} test images for classification.")
print(f"X_test_class shape (for class_model): {X_test_class.shape}")
print(f"y_test_class shape: {y_test_class.shape}")
print(f"Loaded {len(X_test_reg)} test images for regression.")
print(f"X_test_reg shape (for reg_model): {X_test_reg.shape}")
print(f"y_test_reg shape: {y_test_reg.shape}")
print(f"LabelEncoder classes: {label_encoder.classes_}")


**Load Models**

I've loaded all the trained models here instead of calling them separately . loaded the regression,classification, autoencoder,biased,balanced models and have confirmed if they were loaded sucessfully or not by the confirmation message at the end that "All the models has been sucessfully loaded!".

In [None]:
reg_model = load_model("best_regression_model.h5")
class_model = load_model("best_classification_model.h5")
autoencoder = load_model("autoencoder_best_model.h5")
transfer_model = load_model("transfer_learning_block2.h5")
backbone_model = load_model("backbone_block1.h5")
biased_model = load_model("biased_model_male_only.h5")
balanced_model = load_model("balanced_model.h5")

print("All the models has been sucessfully loaded!")

**Age Regression Prediction**

I've used the 'reg_model' to predict ages on the grayscale test data ('X_test_gray'). Then I calculated the Mean Absolute Error (MAE) between the predicted and actual ages

mae: the average difference in years between predicted and true ages

In [None]:
y_pred_reg = reg_model.predict(X_test_reg, verbose=0).flatten()
mae = mean_absolute_error(y_test_reg, y_pred_reg)
print(f"Age Regression MAE: {mae:.2f} years")

**Age Classification Prediction**

I've used the 'class_model' to predict age categories on the RGB test data ('X_test_rgb'). Then I calculated the accuracy by comparing the predicted labels with the true labels

acc: the percentage of correctly predicted age categories.

In [None]:
y_pred_class = class_model.predict(X_test_class, verbose=0)  
y_pred_labels = np.argmax(y_pred_class, axis=1)
y_true_labels = np.argmax(y_test_class, axis=1)
acc = accuracy_score(y_true_labels, y_pred_labels)
print(f"Age Classification Accuracy: {acc:.2%}")


**Confusion Matrix**

I plotted the confusion matrix for the age classification model to visualize the prediction performance. The matrix shows how well the model predicted each age category. Each row is the true age, each column is the predicted age, and the numbers show how many times the model made each prediction.  Darker squares mean more predictions in that category.


In [None]:
plt.figure(figsize=(8, 6))
cm = confusion_matrix(y_true_labels, y_pred_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=pd.get_dummies(df_test['age_category']).columns)
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
plt.title("Confusion Matrix - Age Classification")
plt.tight_layout()
plt.show()


**Regression Scatter plot for visualisation**

I created a scatter plot to visualize the regression model's performance by comparing true ages with predicted ages. This plot shows how well a model predicts age.  Each point represents a person.  The x-axis is their actual age, and the y-axis is the age the model predicted.  The red line shows perfect predictions.  The closer the points are to the red line, the better the model is.

In [None]:
y_true_reg = y_test_reg  # True continuous ages (1D array)
plt.figure(figsize=(8, 6))
plt.scatter(y_true_reg, y_pred_reg, alpha=0.7)
plt.xlabel("True Age")
plt.ylabel("Predicted Age")
plt.title("Regression Performance: Predicted vs. True Age")
plt.plot([y_true_reg.min(), y_true_reg.max()], [y_true_reg.min(), y_true_reg.max()], 'r--')
plt.grid(True)
plt.tight_layout()
plt.show()

**Show Sample Predictions for Regression and classification**

created a function 'show_predictions()' to display random sample predictions for both regression and classification tasks. For each sample:

**1. Regression**: Shows the true vs. predicted age values

**2. Classification**: Shows the true vs. predicted age category

In [None]:
def show_predictions(X, y_true, y_pred, task="regression", n=5):
    plt.figure(figsize=(10, 6))
    indices = np.random.choice(len(X), n, replace=False)
    for i in range(n):
        plt.subplot(1, n, i+1)
        plt.imshow(X[indices[i]])
        title = f"True: {y_true[indices[i]]}\nPred: {y_pred[indices[i]]:.1f}" if task == "regression" else f"True: {df_test.iloc[indices[i]]['age_category']}\nPred: {pd.get_dummies(df_test['age_category']).columns[y_pred[indices[i]]]}"
        plt.title(title); plt.axis('off')
    plt.tight_layout(); plt.show()

print("\n Sample Regression Predictions:")
show_predictions(X_test_reg, y_test_reg, y_pred_reg, task="regression", n=5)

print("\n Sample Classification Predictions:")
show_predictions(X_test_class, y_true_labels, y_pred_labels, task="classification", n=5)


# Autoencoder Demo

**Evaluate the Training and Valoidation loss**

Here, I captured all the history and saved it in the part 2 as autoencoder_history.pkl file, Later loaded here for the demo section to print the metrics

In [None]:
df_block_1 = pd.read_csv("block1_set.csv")

# Load training history
with open("autoencoder_history.pkl", "rb") as f:
    history = pickle.load(f)

# Print losses
print(f"Autoencoder Training Loss (Final Epoch): {history['loss'][-1]:.4f}")
print(f"Autoencoder Validation Loss (Final Epoch): {history['val_loss'][-1]:.4f}")

**Demonstrated the autoencoder's ability to reconstruct images by comparing the original images with their reconstructed versions.**

In [None]:
print("Autoencoder Reconstruction Demo:")
try:
    import numpy as np
    n_samples = 5
    indices = np.random.choice(X_test_class.shape[0], n_samples, replace=False)
    images = X_test_class[indices]
    reconstructions = autoencoder.predict(images, verbose=0)
    if reconstructions.shape != images.shape:
        raise ValueError("Reconstruction shape error")
    plt.figure(figsize=(10, 4))
    for i in range(n_samples):
        plt.subplot(2, n_samples, i + 1); plt.imshow(images[i]); plt.title("Original"); plt.axis('off')
        plt.subplot(2, n_samples, i + n_samples + 1); plt.imshow(reconstructions[i]); plt.title("Reconstructed"); plt.axis('off')
    plt.suptitle("Autoencoder Reconstructions"); plt.tight_layout(); plt.show()
except Exception as e:
    print(f"Error: {e}")


These images shows how well an autoencoder recreates face images.  The top row has the original images, and the bottom row has the autoencoder's reconstructions.  It's a visual comparison of the input and output of the autoencoder.

# Transfer Learning Demo

**Model Evaluation Summary**

Implemented a function to load and preprocess images from specified paths, ensuring they are resized to (128, 128) and normalized for model input. I loaded the test dataset from a CSV file and constructed full image paths for loading. To prepare the labels for evaluation, I used 'LabelEncoder' to convert categorical labels into a numerical format. The transfer learning model was then evaluated on the test dataset, calculating both test loss and accuracy. Finally, I compared the test accuracy of the current model against the best accuracy from Part1l to assess the performance

In [None]:
from tensorflow.keras.utils import to_categorical

def load_images_from_paths(image_paths, image_size=(128, 128)):
    images = []
    for path in image_paths:
        if os.path.exists(path):
            img = cv2.imread(path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, image_size)
            img = img.astype('float32') / 255.0
            images.append(img)
        else:
            print(f"Image not found: {path}")
    return np.array(images)

# Load test CSV
df_test = pd.read_csv('test_set.csv')

df_test['full_path'] = df_test['image_path']  

# Load images
test_paths = df_test['full_path'].values
X_test = load_images_from_paths(test_paths)

# Encode labels
label_encoder = LabelEncoder()
y_test_encoded = label_encoder.fit_transform(df_test['age_category'])
y_test_class = to_categorical(y_test_encoded, num_classes=6) 

test_loss, test_acc = transfer_model.evaluate(X_test, y_test_class)
print(f"Transfer Learning Test Accuracy: {test_acc:.4f}")
print(f"Comparison to Part 1 Best Model: {test_acc - acc:+.4f}")

**Visualize Sample Predictions from Transfer Learning Model**

In [None]:
# Predict class probabilities
y_pred_probs = transfer_model.predict(X_test)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_true_classes = np.argmax(y_test_class, axis=1)

# Convert back to label names
pred_labels = label_encoder.inverse_transform(y_pred_classes)
true_labels = label_encoder.inverse_transform(y_true_classes)

num_samples = 5  # Number of samples to visualize
random_indices = np.random.choice(len(X_test), size=num_samples, replace=False)

# Visualize the randomly selected samples
plt.figure(figsize=(12, 6))

for i, idx in enumerate(random_indices):
    plt.subplot(1, num_samples, i + 1)
    plt.imshow(X_test[idx])
    is_correct = pred_labels[idx] == true_labels[idx]
    color = 'green' if is_correct else 'red'
    plt.title(f"Pred: {pred_labels[idx]}\nTrue: {true_labels[idx]}", color=color)
    plt.axis('off')

plt.tight_layout()
plt.subplots_adjust(top=0.85)
plt.suptitle("Sample Transfer Learning Predictions", fontsize=16)
plt.show()

This is the sample predictions made by the Transfere learning model. As we can clearly say that there's - 0.0051 difference while compared to the best model in part 1

# Backbone model Reuse Demo

In [None]:
# Backbone Model Demo (ResNet50)
print("Backbone Model Demo (ResNet50):")

# Load best validation accuracy (replace with actual history file)
try:
    with open('backbone_history.pkl', 'rb') as f:  # history file 
        history = pickle.load(f)
    best_val_accuracy = max(history['val_accuracy'])
    print(f"Best validation accuracy: {best_val_accuracy:.4f}")
except FileNotFoundError:
    print("Error: 'backbone_history.pkl' not found. Please provide the history file.")
    best_val_accuracy = None

# Evaluate Backbone model on test set
y_pred_backbone = transfer_model.predict(X_test_class, verbose=0)  # Use RGB images
y_pred_backbone_labels = np.argmax(y_pred_backbone, axis=1)
y_true_labels = np.argmax(y_test_class, axis=1)
test_loss, test_acc = backbone_model.evaluate(X_test, y_test_class)
accuracy_diff = test_acc - acc
print(f"Backbone Model Test Accuracy: {test_acc:.4f}")
print(f"Comparison to Part 1 Best Model ({test_acc:.4f})- {acc:.4f}: : {accuracy_diff:.4f} ")

**Visualize sample predictions from Backbone model**

In [None]:
print("Sample Predictions from Backbone Model (ResNet50):")

n_samples = 6
indices = np.random.choice(len(X_test_class), n_samples, replace=False)

sample_images = X_test_class[indices]
sample_preds = y_pred_backbone_labels[indices]
sample_trues = y_true_labels[indices]

# Decode age categories if available
if 'label_encoder' in globals():
    pred_labels = label_encoder.inverse_transform(sample_preds)
    true_labels = label_encoder.inverse_transform(sample_trues)
else:
    pred_labels = sample_preds
    true_labels = sample_trues

# Plotting
plt.figure(figsize=(15, 4))
for i in range(n_samples):
    is_correct = pred_labels[i] == true_labels[i]
    color = 'green' if is_correct else 'red'
    plt.subplot(1, n_samples, i + 1)
    plt.imshow(sample_images[i])
    plt.title(f"True: {true_labels[i]}\nPred: {pred_labels[i]}", color=color)
    plt.axis('off')

plt.suptitle("ResNet50 Model: Predicted vs. Actual Age Categories on Test Set", fontsize=14)
plt.tight_layout()
plt.show()


# Gender Bias Analysis Demo

**Demo for Gender Bias analysis:**

I conducted a gender bias analysis to compare the performance of two models:

1. **Biased Model (Male-Only)**: Trained on male-only data
2. **Balanced Model**: Trained on a balanced gender dataset

For each model, I calculated: Overall accuracy and Accuracy for each gender (Male and Female).
  
I then visualized the gender-specific accuracies in a bar chart.

In [None]:
print("Gender Bias Analysis Demo:")

try:
    # Load gender info (ensure same order as df_test)
    df_test_gender = pd.read_csv("test_set_with_gender.csv")
    
    if len(df_test_gender) != len(df_test):
        raise ValueError("Gender test set size mismatch")
    
    # Add gender column directly to df_test to ensure alignment
    df_test['gender'] = df_test_gender['gender'].values
    gender_labels_mapped = df_test['gender'].map({0: 'Male', 1: 'Female'}).values

    # True labels from classification test set
    y_true = np.argmax(y_test_class, axis=1)

    
    def evaluate_model(model, name):
        loss, acc = model.evaluate(X_test, y_test_class, verbose=0)
        print(f"{name} Overall Accuracy: {acc:.2%}")
        for gender in np.unique(gender_labels_mapped):
            mask = gender_labels_mapped == gender
            loss_g, acc_g = model.evaluate(X_test[mask], y_test_class[mask], verbose=0)
            print(f"{name} {gender} Accuracy: {acc_g:.2%}")

    # Run evaluations
    evaluate_model(biased_model, "Biased Model (Male-Only)")
    evaluate_model(balanced_model, "Balanced Model")


except Exception as e:
    print(f"Error: {e}")

**Bar graph visualisation**

In [None]:
# Visualization
# Plot gender-specific accuracies for both models
genders = np.unique(gender_labels_mapped)
plt.figure(figsize=(8, 6))
width = 0.35

# Accuracy for each gender group
accs_biased = [
    biased_model.evaluate(X_test[gender_labels_mapped == g], y_test_class[gender_labels_mapped == g], verbose=0)[1]
    for g in genders
]
accs_balanced = [
    balanced_model.evaluate(X_test[gender_labels_mapped == g], y_test_class[gender_labels_mapped == g], verbose=0)[1]
    for g in genders
]

# Bar plot
plt.bar(np.arange(len(genders)) - width/2, accs_biased, width, label='Biased Model', color='orange')
plt.bar(np.arange(len(genders)) + width/2, accs_balanced, width, label='Balanced Model', color='blue')
plt.xlabel('Gender')
plt.ylabel('Accuracy')
plt.title('Gender-Specific Accuracy')
plt.xticks(np.arange(len(genders)), genders)
plt.legend()
plt.tight_layout()
plt.show()

**Sample Predictions for Biased and Balanced models**

I visualized predictions from two models :
**Biased Model** and **Balanced Model** for both **Male** and **Female** test samples.

1. **True label**: Actual age category
2. **Predicted labels**: Age categories predicted by each model

For each image:
1. **Green**: Correct prediction
2. **Red**: Incorrect prediction

The plot displays 3 random samples from both male and female categories, showing the true label along with the predictions from both models.


In [None]:
# Load data and define mappings
df_test_gender = pd.read_csv("test_set_with_gender.csv")
gender_labels_mapped = np.array(['Male' if g == 0 else 'Female' for g in df_test_gender['gender']])
age_categories = {0: 'Child', 1: 'Mature', 2: 'Mid', 3: 'Youth', 4: 'Older', 5: 'Teen'}

# Get predictions
y_pred_biased_labels = np.argmax(biased_model.predict(X_test_class, verbose=0), axis=1)
y_pred_balanced_labels = np.argmax(balanced_model.predict(X_test_class, verbose=0), axis=1)
y_true_labels = np.argmax(y_test_class, axis=1)

# Select and plot samples
male_indices = np.where(gender_labels_mapped == 'Male')[0]
female_indices = np.where(gender_labels_mapped == 'Female')[0]
selected_indices = np.concatenate([np.random.choice(male_indices, 3, replace=False), np.random.choice(female_indices, 3, replace=False)])

fig, axes = plt.subplots(2, 3, figsize=(12, 8))
axes = axes.flatten()

for i, idx in enumerate(selected_indices):
    img = X_test_class[idx]
    true_label = age_categories[y_true_labels[idx]]
    biased_pred = age_categories[y_pred_biased_labels[idx]]
    balanced_pred = age_categories[y_pred_balanced_labels[idx]]
    gender = gender_labels_mapped[idx]

    axes[i].imshow(img)
    axes[i].axis('off')
    axes[i].set_title(gender)
    axes[i].text(0, 135, f"True: {true_label}", fontsize=10, color='black')
    axes[i].text(0, 150, f"Biased: {biased_pred}", fontsize=10, color='green' if biased_pred == true_label else 'red')
    axes[i].text(0, 165, f"Balanced: {balanced_pred}", fontsize=10, color='green' if balanced_pred == true_label else 'red')

plt.suptitle('Sample Predictions', fontsize=14)
plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()
