# Setup

In [1]:
%autoreload 2
%load_ext autoreload
%matplotlib widget

import sys, os, pickle, pdb, shutil, re, math
from copy import deepcopy, copy
from pathlib import Path
from pprint import pprint

from tqdm.notebook import tqdm
import pandas as pd, numpy as np, torch
from sklearn.utils import shuffle
import torchvision.transforms as T
from torch.utils.tensorboard import SummaryWriter

from utils import lime_fit, sample_around

paths = [Path("").parent.absolute() / "shap", Path("").parent.absolute() / "shap_original"]
for path in paths:
    if str(path) not in sys.path:
        sys.path.insert(0, str(path))
import shap, shap_original

DTYPE, DEVICE = torch.float32, torch.device("cuda")
TOPTS = dict(dtype=DTYPE, device=DEVICE)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
df = pd.read_csv(Path("") / "data" / "compas" / "cox-violent-parsed_filt.csv")

# Preprocess the data

# Filter out entries with no indication of recidivism or no compass score
df = df[df["is_recid"] != -1]
df = df[df["decile_score"] != -1]

# Rename recidivism column
df["recidivism_within_2_years"] = df["is_recid"]

# Make the COMPASS label column numeric (0 and 1), for use in our model
df["COMPASS_determination"] = np.where(df["score_text"] == "Low", 0, 1)

df = pd.get_dummies(df, columns=["sex", "race"])

# Get list of all columns from the dataset we will use for model input or output.
input_features = [
    "sex_Female",
    "sex_Male",
    "age",
    "race_African-American",
    "race_Caucasian",
    "race_Hispanic",
    "race_Native American",
    "race_Other",
    "priors_count",
    "juv_fel_count",
    "juv_misd_count",
    "juv_other_count",
]

to_keep = input_features + ["recidivism_within_2_years", "COMPASS_determination"]

to_remove = [col for col in df.columns if col not in to_keep]
df = df.drop(columns=to_remove)

input_columns = df.columns.tolist()
labels = df["COMPASS_determination"]
#df.head()

In [3]:
# Create data structures needing for training and testing.
# The training data doesn't contain the column we are predicting,
# 'COMPASS_determination', or the column we are using for evaluation of our
# trained model, 'recidivism_within_2_years'.
df_for_training = df.drop(columns=["COMPASS_determination", "recidivism_within_2_years"])
train_size = int(len(df_for_training) * 0.8)

train_data = df_for_training[:train_size]
train_labels = labels[:train_size]
test_data = df_for_training[train_size:]
test_labels = labels[train_size:]

test_data_with_labels = df[train_size:]

In [4]:
Xtr, Ytr = train_data.values, train_labels.values
Xts, Yts = test_data.values, test_labels.values
MU, STD = np.mean(Xtr, -2), np.std(Xtr, -2)
normalize_fn = lambda x: (x - MU[None, ...]) / STD[None, ...]
Xtr = normalize_fn(Xtr)
Xts = normalize_fn(Xts)
train_loader = torch.utils.data.DataLoader(list(zip(*[Xtr, Ytr])), batch_size=1024, num_workers=8)
test_loader = torch.utils.data.DataLoader(list(zip(*[Xts, Yts])), batch_size=1024, num_workers=8)

In [5]:
# Create the model

# This is the size of the array we'll be feeding into our model for each example
input_size = len(train_data.iloc[0])

RACE_IDX = [i for (i, z) in enumerate(train_data.columns) if re.match(r"race_.*", z) is not None]

# activation = torch.nn.ReLU()
activation = torch.nn.Softplus(1e2)


def generate_model():
    model = (
        torch.nn.Sequential(
            torch.nn.Linear(input_size, 128),
            copy(activation),
            torch.nn.Linear(128, 128),
            copy(activation),
            torch.nn.Linear(128, 1),
            torch.nn.Sigmoid(),
        )
        .to(DTYPE)
        .to(DEVICE)
    )
    loss_obj = torch.nn.BCELoss()
    lam = 1e-3
    loss_fn = lambda Yp, Y: loss_obj(Yp, Y) + sum(lam * torch.sum(param**2) / 2 for param in model.parameters())

    x0 = torch.as_tensor(Xtr[torch.randint(0, Xtr.shape[0], size=(1000,)), :], device=DEVICE, dtype=DTYPE)
    Xs = sample_around(x0, torch.tensor(STD, device=DEVICE, dtype=DTYPE), N=int(1e2), alf=1e-2)
    Xs = Xs.transpose(0, 1)
    loss_fn_ = loss_fn

    def loss_fn(Yp, Y, penalize=True, penalty="exact", gam=1e1):
        ret = loss_fn_(Yp, Y)
        if penalize:
            Yp = model(Xs)
            W, b = lime_fit(Xs, Yp)
            if penalty == "exact":
                ret = ret + gam * torch.mean(torch.norm(W[..., RACE_IDX, 0], dim=-1))
            elif penalty == "mse":
                ret = ret + gam * torch.mean(torch.norm(W[..., RACE_IDX, 0], dim=-1) ** 2)
            elif penalty == "super-exact":
                ret = ret + gam * torch.mean(torch.sqrt(torch.norm(W[..., RACE_IDX, 0], dim=-1)))
            else:
                raise ValueError(f"penalty [{penalty}] is not supported")
        return ret

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.3)
    return model, loss_fn, optimizer, scheduler, Xs


def accuracy(model, loader):
    correct = 0
    for X, Y in loader:
        X, Y = X.to(DTYPE).to(DEVICE), Y.to(DEVICE)
        Yp = model(X)
        correct += torch.sum((Yp > 0.5).reshape(Y.shape) == Y).detach()
    return correct / len(loader.dataset)

# LIME Experiments

In [15]:
accs, metrics = [], []
for (penalize, penalty, gam) in [
    (False, "exact", 1e1),
    (True, "exact", 1e1),
    (True, "mse", 1e2),
    (True, "super-exact", 1.5e0),
]:
    model, loss_fn, optimizer, scheduler, Xs = generate_model()
    shutil.rmtree(Path("") / "runs")
    writer = SummaryWriter()
    rng = tqdm(range(int(10)))
    for epoch in rng:
        for (i, (X, Y)) in enumerate(train_loader):
            X, Y = X.to(DTYPE).to(DEVICE), Y.to(DTYPE).to(DEVICE)
            loss = loss_fn(model(X).reshape(Y.shape), Y, penalize=penalize, penalty=penalty, gam=gam)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # rng.set_description(f"{loss.detach():.4e}")
            writer.add_scalar("loss/train", float(loss), i + epoch * len(train_loader))
            writer.add_scalar("step_size", float(optimizer.param_groups[0]["lr"]), i + epoch * len(train_loader))
            writer.flush()
        scheduler.step()
        # tqdm.write(f"Accuracy = {1e2 * accuracy(model, test_loader):.3f}%")
        rng.set_description(
            f"Accuracy = (test = {1e2 * accuracy(model, test_loader):.3f}%,"
            + f"train = {1e2 * accuracy(model, train_loader):.3f}%)"
        )
        # tqdm.write(f"Loss =     {loss_obj(model(X).reshape(Y.shape), Y):.5e}")

    W, b = lime_fit(Xs, model(Xs))
    print(f"Penalize = {penalize}")
    if penalize:
        print(f"Penalty = {penalty}")
    metric = torch.mean(torch.norm(W[..., RACE_IDX, 0], dim=-1))
    print(float(metric))
    accs.append(float(1e2 * accuracy(model, test_loader)))
    metrics.append(float(metric))
    print("#" * 80)

  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = False
0.3325492739677429
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = exact
0.00370749831199646
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = mse
0.009267126210033894
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
0.0019427312072366476
################################################################################


In [19]:
def check_gam(gam):
    model, loss_fn, optimizer, scheduler, Xs = generate_model()
    shutil.rmtree(Path("") / "runs")
    writer = SummaryWriter()
    rng = tqdm(range(int(10)))
    for epoch in rng:
        for (i, (X, Y)) in enumerate(train_loader):
            X, Y = X.to(DTYPE).to(DEVICE), Y.to(DTYPE).to(DEVICE)
            loss = loss_fn(model(X).reshape(Y.shape), Y, penalize=True, penalty="exact", gam=gam)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            writer.add_scalar("loss/train", float(loss), i + epoch * len(train_loader))
            writer.add_scalar("step_size", float(optimizer.param_groups[0]["lr"]), i + epoch * len(train_loader))
            writer.flush()
        scheduler.step()
        # tqdm.write(f"Accuracy = {1e2 * accuracy(model, test_loader):.3f}%")
        rng.set_description(
            f"Accuracy = (test = {1e2 * accuracy(model, test_loader):.3f}%,"
            + f"train = {1e2 * accuracy(model, train_loader):.3f}%)"
        )
        # tqdm.write(f"Loss =     {loss_obj(model(X).reshape(Y.shape), Y):.5e}")

    W, b = lime_fit(Xs, model(Xs))
    print(f"Penalize = {penalize}")
    if penalize:
        print(f"Penalty = {penalty}")
        print(f"Gam = {gam}")
    metric = torch.mean(torch.norm(W[..., RACE_IDX, 0], dim=-1))
    print(metric)
    print("#" * 80)
    return metric

In [20]:
gams = 10.0 ** np.linspace(-2, 2, 10)
vals = [check_gam(gam) for gam in gams]

  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 0.01
tensor(0.2527, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 0.027825594022071243
tensor(0.1842, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 0.0774263682681127
tensor(0.1134, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 0.21544346900318834
tensor(0.0638, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 0.5994842503189409
tensor(0.0332, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 1.6681005372000592
tensor(0.0144, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 4.6415888336127775
tensor(0.0067, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 12.915496650148826
tensor(0.0025, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 35.93813663804626
tensor(0.0006, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


  0%|          | 0/10 [00:00<?, ?it/s]

Penalize = True
Penalty = super-exact
Gam = 100.0
tensor(0.0004, device='cuda:0', grad_fn=<MeanBackward0>)
################################################################################


--------------------------------------------------------------------------------

# Testing differentiating through Shapley Values

In [8]:
# Create a SHAP explainer by passing a subset of our training data
model, loss_fn, optimizer, scheduler, Xs = generate_model()
X = torch.as_tensor(train_data.values[:200], **TOPTS)

explainer = shap.DeepExplainer(deepcopy(model), X)
vals1 = explainer.shap_values(X[:10, :])

explainer_original = shap_original.DeepExplainer(deepcopy(model), X)
vals2 = torch.as_tensor(explainer_original.shap_values(X[:10, :]), **TOPTS)

print(torch.norm(vals1 - vals2) / torch.norm(vals1))

tensor(1.4286e-07, device='cuda:0', grad_fn=<DivBackward0>)


In [None]:
vals

In [31]:
# Explain predictions of the model on the first 5 examples from our training set
# to test the SHAP explainer.
shap_values = explainer.shap_values(torch.tensor(train_data.values[:5]).to(DTYPE).to(DEVICE))
shap_values
# print(shap_values[0, :])

array([[-5.71903102e-02, -1.97675225e-04, -1.98997383e-04,
        -1.92532389e-04, -4.12244089e-02, -1.43817724e-09,
         1.28570010e-09, -1.71174586e-04,  1.77759330e-06,
         3.25610164e-11,  0.00000000e+00, -3.68765322e-04],
       [-5.71903102e-02, -1.97675225e-04, -1.98997383e-04,
        -1.92532389e-04, -4.12244089e-02, -1.43817724e-09,
         1.28570010e-09, -1.71174586e-04,  1.77759330e-06,
         3.25610164e-11,  0.00000000e+00, -3.68765322e-04],
       [-6.04670588e-03, -3.85728083e-04, -3.56173958e-04,
        -4.00341756e-04, -8.20497125e-02, -4.35711378e-09,
         2.81565038e-09,  2.28080557e-06,  3.80985762e-06,
         1.35330511e-10,  0.00000000e+00,  1.81077341e-11],
       [ 5.16895279e-02, -7.97657587e-04, -5.81035041e-04,
        -3.81269056e-04, -1.27719998e-01, -1.18468336e-07,
        -3.39043034e-08,  8.59804459e-06,  1.11365889e-05,
         9.25007129e-08,  0.00000000e+00,  1.37961976e-07],
       [ 5.16895279e-02, -7.97657587e-04, -5.8103504

--------------------------------------------------------------------------------