In [None]:
# =========================================
# Model Evaluation, Fairness & Explainable AI
# =========================================

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.ensemble import GradientBoostingClassifier

# Fairness & Explainability
from fairlearn.metrics import MetricFrame, selection_rate, demographic_parity_difference, equalized_odds_difference
import shap

# ------------------------------
# Part 1: Load Data
# ------------------------------
titanic = sns.load_dataset("titanic").dropna(subset=["age", "fare", "sex", "class", "survived"])

df = titanic.copy()
df["sex"] = df["sex"].map({"male": 0, "female": 1})
df["class"] = df["class"].map({"First": 1, "Second": 2, "Third": 3})

X = df[["age", "fare", "sex", "class"]]
y = df["survived"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ------------------------------
# Part 2: Train Model
# ------------------------------
model = GradientBoostingClassifier(n_estimators=200, learning_rate=0.05, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]

# ------------------------------
# Part 3: Evaluation Metrics
# ------------------------------
print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_proba))

sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.show()

# ------------------------------
# Part 4: Fairness Auditing
# ------------------------------
sensitive_feature = X_test["sex"]  # fairness by gender

mf = MetricFrame(metrics=accuracy_score, y_true=y_test, y_pred=y_pred, sensitive_features=sensitive_feature)
print("\n=== Accuracy by Gender ===")
print(mf.by_group)

print("\n=== Demographic Parity Difference ===")
print(demographic_parity_difference(y_test, y_pred, sensitive_features=sensitive_feature))

print("\n=== Equalized Odds Difference ===")
print(equalized_odds_difference(y_test, y_pred, sensitive_features=sensitive_feature))

# ------------------------------
# Part 5: Explainability (Feature Importance + SHAP)
# ------------------------------
feat_importances = pd.Series(model.feature_importances_, index=X.columns)
feat_importances.sort_values().plot(kind="barh", figsize=(6,4), title="Feature Importance (GB)")
plt.show()

# SHAP values
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test)

# Global explanation
shap.summary_plot(shap_values, X_test)

# Local explanation for first sample
shap.plots.waterfall(shap_values[0])

# ------------------------------
# Mission Task
# ------------------------------
# 1. Evaluate model using precision, recall, F1 separately for each gender group.
# 2. Train a RandomForestClassifier and compare fairness metrics with GradientBoosting.
# 3. Use SHAP to explain a misclassified example.
# 4. Discuss: How can we mitigate unfairness in the model?
