# Module 12 — Model Evaluation, Explainability & Debugging (Expanded)

This notebook provides practical demos for:

- classification metrics: confusion matrix, precision/recall, ROC-AUC, PR curve
- model explainability: Grad-CAM for CNNs, SHAP for tabular models
- debugging tips and common issues

Uses small datasets and short training runs suitable for classroom demos.

## 1 — Setup (install packages and imports)

In [None]:
!pip -q install -U tensorflow scikit-learn shap matplotlib --quiet

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve, precision_recall_curve, auc
import shap
print('TF version:', tf.__version__)
print('SHAP version:', shap.__version__)


## 2 — Tabular baseline model & evaluation (RandomForest)

In [None]:
# Synthetic dataset
X, y = make_classification(n_samples=500, n_features=10, n_informative=5, random_state=42)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train RandomForest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predictions & metrics
y_pred = rf.predict(X_test)
y_proba = rf.predict_proba(X_test)[:,1]
print('Classification report:\n', classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
print('Confusion matrix:\n', cm)

# ROC & PR
roc_auc = roc_auc_score(y_test, y_proba)
print('ROC AUC:', roc_auc)
fpr, tpr, _ = roc_curve(y_test, y_proba)
prec, rec, _ = precision_recall_curve(y_test, y_proba)

plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(fpr, tpr, label=f'ROC AUC={roc_auc:.2f}')
plt.xlabel('FPR'); plt.ylabel('TPR'); plt.title('ROC Curve'); plt.legend()
plt.subplot(1,2,2)
plt.plot(rec, prec); plt.xlabel('Recall'); plt.ylabel('Precision'); plt.title('Precision-Recall Curve')
plt.show()


## 3 — SHAP explanations for tabular model (RandomForest)

In [None]:
# Use a small background sample for SHAP explainer
explainer = shap.Explainer(rf.predict_proba, X_train[:100])
shap_values = explainer(X_test[:20])

# Summary plot
shap.plots.beeswarm(shap_values)


## 4 — Grad-CAM for CNNs (Keras) — small demo on CIFAR-10 subset

In [None]:
from tensorflow.keras.datasets import cifar10
from tensorflow.keras import layers, models

# Load small subset
(x_train,y_train),(x_test,y_test) = cifar10.load_data()
x_train = x_train[:2000].astype('float32')/255.0; y_train = y_train[:2000]
x_test = x_test[:500].astype('float32')/255.0; y_test = y_test[:500]

# Build tiny CNN
def tiny_cnn():
    model = models.Sequential([
        layers.Input((32,32,3)),
        layers.Conv2D(16,3,activation='relu', name='conv1'),
        layers.MaxPooling2D(),
        layers.Conv2D(32,3,activation='relu', name='conv2'),
        layers.MaxPooling2D(),
        layers.Flatten(), layers.Dense(64, activation='relu'), layers.Dense(10, activation='softmax')
    ])
    return model

model = tiny_cnn()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=3, batch_size=64, validation_split=0.1)

# pick an image and compute Grad-CAM for its predicted class
img = x_test[5:6]
preds = model.predict(img)
pred_class = np.argmax(preds[0])
print('Predicted class:', pred_class)

# Grad-CAM implementation
last_conv_layer_name = 'conv2'

grad_model = tf.keras.models.Model([model.inputs], [model.get_layer(last_conv_layer_name).output, model.output])
with tf.GradientTape() as tape:
    conv_outputs, predictions = grad_model(img)
    loss = predictions[:, pred_class]

grads = tape.gradient(loss, conv_outputs)[0]
pooled_grads = tf.reduce_mean(grads, axis=(0,1))
conv_outputs = conv_outputs[0]
heatmap = tf.reduce_sum(tf.multiply(pooled_grads, conv_outputs), axis=-1)
heatmap = np.maximum(heatmap, 0) / (np.max(heatmap) + 1e-8)

# Resize heatmap to image size and overlay
import cv2
heatmap = cv2.resize(heatmap.numpy(), (32,32))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed = heatmap * 0.4 + np.uint8(img[0]*255)

plt.figure(figsize=(8,4))
plt.subplot(1,2,1); plt.imshow(img[0]); plt.title('Original'); plt.axis('off')
plt.subplot(1,2,2); plt.imshow(superimposed.astype('uint8')); plt.title('Grad-CAM'); plt.axis('off')
plt.show()


## 5 — Debugging checklist & common issues

1. Data leakage: ensure train/test separation by time/user when applicable.
2. Label noise: inspect samples near decision boundary; clean labels or use robust loss.
3. Class imbalance: try class weights or resampling.
4. Learning rate issues: use LR finder or try common schedulers.
5. Overfitting: monitor train vs val curves; use dropout, augmentation, or simplify model.

**In-class exercise:** take a misclassified image from the CNN above, apply Grad-CAM, and discuss what the model focused on. Use SHAP to explain tabular misclassifications.
