# 03 · Interpretability with SHAP (XGBoost)
Global and local explanations using SHAP.

**Install:** `pip install shap`

In [None]:
import kagglehub, pandas as pd, numpy as np
import xgboost as xgb, shap
from pathlib import Path
import matplotlib.pyplot as plt

def make_dm_ready(X: pd.DataFrame) -> pd.DataFrame:
    Xc = X.copy()
    for c in Xc.columns:
        if Xc[c].dtype == "object":
            Xc[c] = Xc[c].astype("category")
        if str(Xc[c].dtype).startswith("category"):
            Xc[c] = Xc[c].cat.add_categories(["__NA__"]).fillna("__NA__")
    return Xc

path = kagglehub.dataset_download("sagnik1511/car-insurance-data")
csv = list(Path(path).rglob("*.csv"))[0]
df = pd.read_csv(csv)

target = "OUTCOME"
y = (df[target].astype(str).str.lower()
        .map({"1":1,"0":0,"yes":1,"no":0,"true":1,"false":0})
        .fillna(df[target]).astype(int))
X = df.drop(columns=[target])
Xc = make_dm_ready(X)

dX = xgb.DMatrix(Xc, enable_categorical=True)

booster = xgb.Booster(); booster.load_model(str(Path("../models/car_insurance_xgb.json")))

explainer = shap.TreeExplainer(booster)
shap_values = explainer.shap_values(dX)
print(shap_values.shape)

ValueError: DataFrame.dtypes for data must be int, float, bool or category. When categorical type is supplied, the experimental DMatrix parameter`enable_categorical` must be set to `True`.  Invalid columns:AGE: category, GENDER: category, RACE: category, DRIVING_EXPERIENCE: category, EDUCATION: category, INCOME: category, VEHICLE_YEAR: category, VEHICLE_TYPE: category

## SHAP summary plot

In [None]:
plt.figure(figsize=(8,6))
shap.summary_plot(shap_values, Xc, show=False)
plt.tight_layout(); plt.show()

NameError: name 'shap_values' is not defined

<Figure size 800x600 with 0 Axes>

## SHAP force plot (single row)

In [0]:
row_idx = 0
shap.force_plot(explainer.expected_value, shap_values[row_idx,:], Xc.iloc[row_idx,:], matplotlib=True)