In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np

from counterfactuals.datasets import LawDataset, AdultDataset, GermanCreditDataset
from counterfactuals.discriminative_models import MultilayerPerceptron

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
datasets = {
    "adult": (
        AdultDataset("../data/adult.csv"),
        "adult_disc_model.pt",
        "adult_flow.pth",
    ),
    "law": (LawDataset("../data/law.csv"), "law_disc_model.pt", "law_flow.pth"),
    "german": (
        GermanCreditDataset("../data/german_credit.csv"),
        "german_disc_model.pt",
        "german_flow.pth",
    ),
}

dataset, disc_model_path, gen_model_path = datasets["law"]

In [4]:
dataset.X_train = dataset.X_train.astype(np.float32)
dataset.X_test = dataset.X_test.astype(np.float32)

In [None]:
# disc_model = MultilayerPerceptron(dataset.X_test.shape[1], [512, 512], 2)
disc_model = MultilayerPerceptron(dataset.X_test.shape[1], [256, 256], 1)
# disc_model.fit(
#     dataset.train_dataloader(batch_size=128, shuffle=True),
#     dataset.test_dataloader(batch_size=128, shuffle=False),
#     epochs=5000,
#     patience=100,
#     lr=1e-3,
#     checkpoint_path=disc_model_path,
# )
disc_model.load(disc_model_path)
# disc_model.load("german_disc_model_onehot.pt")

  self.load_state_dict(torch.load(path))
Epoch 125, Train: 0.4991, test: 0.4964, patience: 100:   3%|▎         | 126/5000 [00:02<01:44, 46.82it/s]


In [7]:
y_pred = disc_model.predict(dataset.X_test).detach().numpy().flatten()
print("Test accuracy:", (y_pred == dataset.y_test).mean())

Test accuracy: 0.759009009009009


In [8]:
dataset.y_train = disc_model.predict(dataset.X_train).detach().numpy()
dataset.y_test = disc_model.predict(dataset.X_test).detach().numpy()

In [14]:
import pandas as pd
import dice_ml

dataset = LawDataset("../data/law.csv", transform=False)

X_train, y_train = dataset.X_train, dataset.y_train

features = list(range(dataset.X_train.shape[1])) + ["label"]
features = list(map(str, features))

In [15]:
input_dataframe = pd.DataFrame(
    np.concatenate((X_train, y_train.reshape(-1, 1)), axis=1),
    columns=features,
)

dice = dice_ml.Data(
    dataframe=input_dataframe,
    continuous_features=list(map(str, dataset.numerical_columns)),
    outcome_name=features[-1],
)

In [16]:
import torch.nn as nn


class DiscWrapper(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, x):
        return torch.sigmoid(self.model(x))

In [17]:
disc_model_w = DiscWrapper(disc_model)

In [18]:
model = dice_ml.Model(disc_model_w, backend="PYT", func="ohe-min-max")

exp = dice_ml.Dice(dice, model, method="gradient")

In [19]:
query_instance = pd.DataFrame(dataset.X_test, columns=features[:-1])

In [20]:
exp.generate_counterfactuals(
    query_instance,
    total_CFs=1,
    desired_class="opposite",
    posthoc_sparsity_param=None,
    learning_rate=0.05,
)
#                             proximity_weight=0, diversity_weight=0, categorical_penalty=0.0)

  0%|          | 1/444 [00:01<13:09,  1.78s/it]

Diverse Counterfactuals found! total time taken: 00 min 01 sec


  0%|          | 2/444 [00:05<21:40,  2.94s/it]

Diverse Counterfactuals found! total time taken: 00 min 03 sec


  1%|          | 3/444 [00:06<15:11,  2.07s/it]

Diverse Counterfactuals found! total time taken: 00 min 01 sec


  1%|          | 4/444 [00:07<10:49,  1.48s/it]

Diverse Counterfactuals found! total time taken: 00 min 00 sec


  1%|          | 5/444 [00:08<11:08,  1.52s/it]

Diverse Counterfactuals found! total time taken: 00 min 01 sec


  1%|▏         | 6/444 [00:09<09:32,  1.31s/it]

Diverse Counterfactuals found! total time taken: 00 min 00 sec


  2%|▏         | 7/444 [00:11<10:13,  1.40s/it]

Diverse Counterfactuals found! total time taken: 00 min 01 sec


  2%|▏         | 8/444 [00:12<10:40,  1.47s/it]

Diverse Counterfactuals found! total time taken: 00 min 01 sec


  2%|▏         | 9/444 [00:13<08:24,  1.16s/it]

Diverse Counterfactuals found! total time taken: 00 min 00 sec


  2%|▏         | 10/444 [00:15<10:36,  1.47s/it]

Diverse Counterfactuals found! total time taken: 00 min 02 sec


  2%|▏         | 11/444 [00:17<12:02,  1.67s/it]

Diverse Counterfactuals found! total time taken: 00 min 02 sec


  3%|▎         | 12/444 [00:21<16:01,  2.23s/it]

Diverse Counterfactuals found! total time taken: 00 min 03 sec


  3%|▎         | 13/444 [00:23<16:02,  2.23s/it]

Diverse Counterfactuals found! total time taken: 00 min 02 sec


  3%|▎         | 13/444 [00:24<13:36,  1.89s/it]


KeyboardInterrupt: 

In [15]:
from sklearn.model_selection import train_test_split

import dice_ml
from dice_ml.utils import helpers

In [20]:
dataset = helpers.load_adult_income_dataset()
target = dataset["income"]
train_dataset, test_dataset, y_train, y_test = train_test_split(
    dataset, target, test_size=0.2, random_state=0, stratify=target
)
x_train = train_dataset.drop("income", axis=1)
x_test = test_dataset.drop("income", axis=1)

KeyboardInterrupt: 

In [17]:
d = dice_ml.Data(
    dataframe=train_dataset,
    continuous_features=["age", "hours_per_week"],
    outcome_name="income",
)

In [18]:
x_test[1:3]

Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week
24228,50,Other/Unknown,Some-college,Married,Other/Unknown,White,Male,40
605,50,Private,Bachelors,Married,Professional,White,Male,40


In [None]:
backend = "PYT"  # needs pytorch installed
ML_modelpath = helpers.get_adult_income_modelpath(backend=backend)
m = dice_ml.Model(model_path=ML_modelpath, backend=backend, func="ohe-min-max")
exp = dice_ml.Dice(d, m, method="gradient")

# generate counterfactuals
dice_exp = exp.generate_counterfactuals(
    x_test[1:3], total_CFs=4, desired_class="opposite"
)
# highlight only the changes
dice_exp.visualize_as_dataframe(show_only_changes=True)

  self.model = torch.load(self.model_path)
  0%|          | 0/2 [00:00<?, ?it/s]

tensor(1.5248, grad_fn=<DivBackward0>)
tensor(0.5594, grad_fn=<DivBackward0>)
tensor(0.0845, grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0.0076, grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tens

 50%|█████     | 1/2 [00:02<00:02,  2.01s/it]

tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
Diverse Counterfactuals found! total time taken: 00 min 01 sec
tensor(4.4410, grad_fn=<DivBackward0>)
tensor(3.3583, grad_fn=<DivBackward0>)
tensor(2.4770, grad_fn=<DivBackward0>)
tensor(1.6074, grad_fn=<DivBackward0>)
tensor(0.7526, grad_fn=<DivBackward0>)
tensor(0.2586, grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)
tensor(0., grad_fn=<DivBackward0>)





KeyboardInterrupt: 

In [None]:
x_train = dataset.X_train
x_test = dataset.X_test

input_dataframe = pd.DataFrame(
    np.concatenate((x_train, dataset.y_train.reshape(-1, 1)), axis=1),
    columns=features,
)

d = dice_ml.Data(
    dataframe=input_dataframe, continuous_features=["0", "1", "2"], outcome_name="label"
)

MultilayerPerceptron(
  (layers): ModuleList(
    (0): Linear(in_features=3, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=256, bias=True)
    (2): Linear(in_features=256, out_features=2, bias=True)
  )
  (relu): ReLU()
  (dropout): Dropout(p=0.2, inplace=False)
  (final_activation): Softmax(dim=1)
  (criterion): CrossEntropyLoss()
)

In [None]:
m = dice_ml.Model(model=disc_model, backend=backend, func="ohe-min-max")
exp = dice_ml.Dice(d, m, method="gradient")

# generate counterfactuals
dice_exp = exp.generate_counterfactuals(
    input_dataframe.iloc[:4, :-1], total_CFs=4, desired_class="opposite"
)
# highlight only the changes
dice_exp.visualize_as_dataframe(show_only_changes=True)

  0%|          | 0/4 [00:00<?, ?it/s]

tensor(2.8680, grad_fn=<DivBackward0>)
tensor(2.9213, grad_fn=<DivBackward0>)
tensor(2.9018, grad_fn=<DivBackward0>)
tensor(2.9511, grad_fn=<DivBackward0>)
tensor(2.6759, grad_fn=<DivBackward0>)
tensor(2.5812, grad_fn=<DivBackward0>)
tensor(3.0578, grad_fn=<DivBackward0>)
tensor(2.8266, grad_fn=<DivBackward0>)
tensor(2.5059, grad_fn=<DivBackward0>)
tensor(2.4702, grad_fn=<DivBackward0>)
tensor(2.4241, grad_fn=<DivBackward0>)
tensor(2.5391, grad_fn=<DivBackward0>)
tensor(2.3601, grad_fn=<DivBackward0>)
tensor(2.5543, grad_fn=<DivBackward0>)
tensor(2.4518, grad_fn=<DivBackward0>)
tensor(2.5221, grad_fn=<DivBackward0>)
tensor(2.2850, grad_fn=<DivBackward0>)
tensor(2.3903, grad_fn=<DivBackward0>)
tensor(2.1513, grad_fn=<DivBackward0>)
tensor(2.5907, grad_fn=<DivBackward0>)
tensor(2.1453, grad_fn=<DivBackward0>)
tensor(2.4277, grad_fn=<DivBackward0>)
tensor(2.5599, grad_fn=<DivBackward0>)
tensor(2.3559, grad_fn=<DivBackward0>)
tensor(2.3465, grad_fn=<DivBackward0>)
tensor(2.3389, grad_fn=<D

  0%|          | 0/4 [00:10<?, ?it/s]

tensor(2.4818, grad_fn=<DivBackward0>)
tensor(2.0953, grad_fn=<DivBackward0>)
tensor(2.4730, grad_fn=<DivBackward0>)
tensor(2.2817, grad_fn=<DivBackward0>)
tensor(2.4502, grad_fn=<DivBackward0>)
tensor(2.4524, grad_fn=<DivBackward0>)
tensor(2.4668, grad_fn=<DivBackward0>)
tensor(2.4687, grad_fn=<DivBackward0>)
tensor(2.2944, grad_fn=<DivBackward0>)
tensor(2.0727, grad_fn=<DivBackward0>)
tensor(2.3594, grad_fn=<DivBackward0>)
tensor(2.4369, grad_fn=<DivBackward0>)
tensor(2.3806, grad_fn=<DivBackward0>)
tensor(2.3896, grad_fn=<DivBackward0>)
tensor(2.3469, grad_fn=<DivBackward0>)
tensor(2.3889, grad_fn=<DivBackward0>)
tensor(2.2596, grad_fn=<DivBackward0>)
tensor(2.2906, grad_fn=<DivBackward0>)
tensor(2.2735, grad_fn=<DivBackward0>)
tensor(2.3728, grad_fn=<DivBackward0>)
tensor(2.4019, grad_fn=<DivBackward0>)
tensor(2.4577, grad_fn=<DivBackward0>)
tensor(2.2262, grad_fn=<DivBackward0>)
tensor(2.5881, grad_fn=<DivBackward0>)
tensor(2.0830, grad_fn=<DivBackward0>)
tensor(2.3280, grad_fn=<D




KeyboardInterrupt: 