In [None]:
#@title Step 1: Install Dependencies
!pip install deepface
!pip install tqdm
!pip install scikit-learn


Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting lz4>=4.3.3 (from mtcnn>=0.1.0->deepface)
  Downloading lz4-4.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading deepface-0.0.93-py3-none-any.whl (108 kB)
[2K   [90m━━

In [None]:
from google.colab import files
uploaded = files.upload()
!unzip -q dataset.zip


Saving dataset.zip to dataset.zip


In [None]:
#@title Step 3: Prepare the Data and Extract Features
import os
import cv2
import numpy as np
from deepface import DeepFace
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Define dataset path and labels mapping
dataset_path = "dataset"  # make sure your zip unzips to a folder named 'dataset'
label_map = {"man": 0, "woman": 1}

# Prepare lists for embeddings and labels
embeddings = []
labels = []

# Iterate over the subfolders ("man" and "woman")
for gender in label_map.keys():
    folder = os.path.join(dataset_path, gender)
    if not os.path.isdir(folder):
        print(f"Folder {folder} not found.")
        continue
    for file in tqdm(os.listdir(folder), desc=f"Processing {gender} images"):
        file_path = os.path.join(folder, file)
        # Read the image with OpenCV
        img = cv2.imread(file_path)
        if img is None:
            continue

        # You can optionally resize or preprocess here if needed

        try:
            # Extract the face embedding using DeepFace (default model is 'VGG-Face')
            # enforce_detection=False allows processing even if a face is not detected.
            representation = DeepFace.represent(img_path = file_path, model_name = 'VGG-Face', enforce_detection=False)
            embeddings.append(representation[0]["embedding"])
            labels.append(label_map[gender])
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

# Convert lists to numpy arrays
X = np.array(embeddings)
y = np.array(labels)

print("Embeddings shape:", X.shape)
print("Labels shape:", y.shape)


25-03-28 14:55:28 - Directory /root/.deepface has been created
25-03-28 14:55:28 - Directory /root/.deepface/weights has been created


Processing man images:   0%|          | 0/1173 [00:00<?, ?it/s]

25-03-28 14:55:30 - vgg_face_weights.h5 will be downloaded...


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/vgg_face_weights.h5
To: /root/.deepface/weights/vgg_face_weights.h5

  0%|          | 0.00/580M [00:00<?, ?B/s][A
  4%|▎         | 21.0M/580M [00:00<00:02, 208MB/s][A
 10%|▉         | 56.1M/580M [00:00<00:01, 292MB/s][A
 16%|█▌        | 92.3M/580M [00:00<00:01, 322MB/s][A
 22%|██▏       | 128M/580M [00:00<00:01, 336MB/s] [A
 28%|██▊       | 163M/580M [00:00<00:01, 327MB/s][A
 34%|███▎      | 196M/580M [00:00<00:01, 320MB/s][A
 39%|███▉      | 228M/580M [00:00<00:01, 304MB/s][A
 45%|████▍     | 260M/580M [00:00<00:01, 307MB/s][A
 50%|█████     | 290M/580M [00:00<00:00, 296MB/s][A
 55%|█████▌    | 320M/580M [00:01<00:00, 270MB/s][A
 60%|██████    | 348M/580M [00:01<00:00, 268MB/s][A
 65%|██████▍   | 375M/580M [00:01<00:00, 261MB/s][A
 69%|██████▉   | 402M/580M [00:01<00:00, 255MB/s][A
 74%|███████▍  | 431M/580M [00:01<00:00, 264MB/s][A
 79%|███████▉  | 459M/580M [00:01<00:00, 265MB/s][

Embeddings shape: (2307, 4096)
Labels shape: (2307,)


In [None]:
#@title Step 4: Train a Gender Classifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier
clf = SVC(kernel='linear', probability=True)

# Train the classifier
clf.fit(X_train, y_train)

# Evaluate the classifier
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.987012987012987

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99       232
           1       0.98      0.99      0.99       230

    accuracy                           0.99       462
   macro avg       0.99      0.99      0.99       462
weighted avg       0.99      0.99      0.99       462



In [None]:
#@title Step 5: Save the Classifier (Optional)
import joblib
joblib.dump(clf, "gender_classifier.pkl")
print("Model saved as gender_classifier.pkl")


NameError: name 'clf' is not defined

In [None]:
#@title Step 6: Test an Uploaded Image for Gender Classification
from google.colab import files
import cv2
import numpy as np
from deepface import DeepFace
import joblib
import matplotlib.pyplot as plt

# Upload an image
uploaded = files.upload()

# Process each uploaded file
for filename in uploaded.keys():
    # Read the image using OpenCV
    img = cv2.imread(filename)
    if img is None:
        print("Error reading the image file.")
        continue

    # Display the image (convert from BGR to RGB for proper colors)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img_rgb)
    plt.title("Uploaded Image")
    plt.axis("off")
    plt.show()

    # Extract face embedding using DeepFace
    try:
        # enforce_detection=False allows processing even if no face is detected
        representation = DeepFace.represent(img_path=filename, model_name='VGG-Face', enforce_detection=False)
        embedding = np.array(representation[0]["embedding"]).reshape(1, -1)
    except Exception as e:
        print("Error extracting embedding:", e)
        continue

    # Load the saved classifier
    clf = joblib.load("gender_classifier.pkl")

    # Predict the gender
    prediction = clf.predict(embedding)
    gender = "man" if prediction[0] == 0 else "woman"
    print("Predicted Gender:", gender)


In [None]:
#@title Step 7: Fairness Analysis for Treatment Equality

import numpy as np
from sklearn.metrics import confusion_matrix

# Assuming X_test, y_test, and y_pred are available from your previous test evaluation
# If not, ensure to run the test split and predictions cell first.

# Convert y_test and y_pred to numpy arrays if needed
y_test = np.array(y_test)
y_pred = np.array(y_pred)

# Define the mapping for readability
gender_map = {0: "man", 1: "woman"}

def compute_group_metrics(y_true, y_pred, group_label):
    # For the group, define:
    # True Positive (TP): predicted as the group and truth is the group.
    # False Positive (FP): predicted as the group but truth is not the group.
    # False Negative (FN): truth is the group but predicted as the other.
    # True Negative (TN): truth is not the group and predicted as not the group.
    tp = np.sum((y_true == group_label) & (y_pred == group_label))
    fp = np.sum((y_true != group_label) & (y_pred == group_label))
    fn = np.sum((y_true == group_label) & (y_pred != group_label))
    tn = np.sum((y_true != group_label) & (y_pred != group_label))

    # Calculate error rates. Avoid division by zero.
    fpr = fp / (fp + tn) if (fp + tn) != 0 else None
    fnr = fn / (tp + fn) if (tp + fn) != 0 else None
    # Treatment equality metric: ratio of false positives to false negatives.
    te_ratio = fp / fn if fn != 0 else None

    return {
        "TP": tp, "FP": fp, "FN": fn, "TN": tn,
        "FPR": fpr, "FNR": fnr, "Treatment Equality (FP/FN)": te_ratio
    }

# Compute metrics for each group
results = {}
for group in [0, 1]:
    results[gender_map[group]] = compute_group_metrics(y_test, y_pred, group)

# Display the results
for gender, metrics in results.items():
    print(f"Metrics for {gender}:")
    for metric_name, value in metrics.items():
        print(f"  {metric_name}: {value}")
    print()

# Compare treatment equality ratios between groups
te_ratio_man = results["man"]["Treatment Equality (FP/FN)"]
te_ratio_woman = results["woman"]["Treatment Equality (FP/FN)"]

print("Treatment Equality Ratio Comparison:")
if te_ratio_man is not None and te_ratio_woman is not None:
    print(f"  Man: {te_ratio_man:.2f} | Woman: {te_ratio_woman:.2f}")
    diff = abs(te_ratio_man - te_ratio_woman)
    print(f"  Difference in Treatment Equality Ratio: {diff:.2f}")
else:
    print("  Unable to compute treatment equality ratio for one or both groups due to zero false negatives.")


Metrics for man:
  TP: 228
  FP: 2
  FN: 4
  TN: 228
  FPR: 0.008695652173913044
  FNR: 0.017241379310344827
  Treatment Equality (FP/FN): 0.5

Metrics for woman:
  TP: 228
  FP: 4
  FN: 2
  TN: 228
  FPR: 0.017241379310344827
  FNR: 0.008695652173913044
  Treatment Equality (FP/FN): 2.0

Treatment Equality Ratio Comparison:
  Man: 0.50 | Woman: 2.00
  Difference in Treatment Equality Ratio: 1.50


In [None]:
import numpy as np

# Assuming y_test and y_pred are available as numpy arrays.
# Define the mapping for readability (as before)
gender_map = {0: "man", 1: "woman"}

def compute_tpr(y_true, y_pred, group_label):
    # True Positive (TP): correctly predicted as the group.
    tp = np.sum((y_true == group_label) & (y_pred == group_label))
    # False Negative (FN): true group but predicted as other.
    fn = np.sum((y_true == group_label) & (y_pred != group_label))

    tpr = tp / (tp + fn) if (tp + fn) != 0 else None
    return tpr

# Compute TPR (Equality of Opportunity) for each group
tpr_results = {}
for group in [0, 1]:
    tpr_results[gender_map[group]] = compute_tpr(y_test, y_pred, group)

# Display the TPR results for each group
for gender, tpr in tpr_results.items():
    print(f"TPR (Equality of Opportunity) for {gender}: {tpr:.2f}" if tpr is not None else f"TPR for {gender}: Not computable")

# Compare the TPR differences between groups
if tpr_results["man"] is not None and tpr_results["woman"] is not None:
    diff_tpr = abs(tpr_results["man"] - tpr_results["woman"])
    print("\nEquality of Opportunity Comparison:")
    print(f"  Man TPR: {tpr_results['man']:.2f} | Woman TPR: {tpr_results['woman']:.2f}")
    print(f"  Difference in TPR: {diff_tpr:.2f}")
else:
    print("Unable to compute TPR for one or both groups.")


TPR (Equality of Opportunity) for man: 0.98
TPR (Equality of Opportunity) for woman: 0.99

Equality of Opportunity Comparison:
  Man TPR: 0.98 | Woman TPR: 0.99
  Difference in TPR: 0.01


In [None]:
import numpy as np
from scipy.stats import entropy

# Assuming y_test and clf_gender (our trained classifier) are available.
# Get predicted probabilities for the test set.
probs = clf.predict_proba(X_test)  # shape: (n_samples, 2)

# For demonstration, we compute the distribution of the predicted probability for class 1 ("woman")
# for samples whose true labels belong to group 0 ("man") and group 1 ("woman").

group0_probs = probs[y_test == 0, 1]  # Predicted probability of class 1 for true "man"
group1_probs = probs[y_test == 1, 1]  # Predicted probability of class 1 for true "woman"

# Create histograms (discrete distributions) for each group.
# Here we use 10 bins between 0 and 1.
bins = np.linspace(0, 1, 11)
hist0, _ = np.histogram(group0_probs, bins=bins, density=True)
hist1, _ = np.histogram(group1_probs, bins=bins, density=True)

# Normalize the histograms so that they sum to 1.
hist0 = hist0 / np.sum(hist0)
hist1 = hist1 / np.sum(hist1)

# Compute the KL-Divergence from group0's distribution to group1's distribution.
kl_divergence = entropy(hist0, hist1)  # KL(P||Q)
print("KL Divergence (man [group 0] vs woman [group 1]):", kl_divergence)


KL Divergence (man [group 0] vs woman [group 1]): inf


In [None]:
import numpy as np

# Assuming y_test and y_pred are available as numpy arrays.
# Define the mapping for readability.
gender_map = {0: "man", 1: "woman"}

def compute_ppv(y_true, y_pred, group_label):
    # True Positives: correct predictions for the group.
    tp = np.sum((y_true == group_label) & (y_pred == group_label))
    # False Positives: samples from other groups predicted as this group.
    fp = np.sum((y_true != group_label) & (y_pred == group_label))

    ppv = tp / (tp + fp) if (tp + fp) != 0 else None
    return ppv

# Compute PPV for each group.
ppv_results = {}
for group in [0, 1]:
    ppv_results[gender_map[group]] = compute_ppv(y_test, y_pred, group)

# Display the PPV for each group.
for gender, ppv in ppv_results.items():
    if ppv is not None:
        print(f"PPV for {gender}: {ppv:.2f}")
    else:
        print(f"PPV for {gender}: Not computable (no predicted positives)")

# Compare the PPV differences between groups.
if ppv_results["man"] is not None and ppv_results["woman"] is not None:
    diff_ppv = abs(ppv_results["man"] - ppv_results["woman"])
    print("\nPredictive Parity Comparison:")
    print(f"  Man PPV: {ppv_results['man']:.2f} | Woman PPV: {ppv_results['woman']:.2f}")
    print(f"  Difference in PPV: {diff_ppv:.2f}")
else:
    print("Unable to compute PPV for one or both groups.")


PPV for man: 0.99
PPV for woman: 0.98

Predictive Parity Comparison:
  Man PPV: 0.99 | Woman PPV: 0.98
  Difference in PPV: 0.01
