In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np

from counterfactuals.datasets import LawDataset, AdultDataset, GermanCreditDataset
from counterfactuals.discriminative_models import MultilayerPerceptron

In [3]:
datasets = {
    "adult": (
        AdultDataset("../data/adult.csv"),
        "adult_disc_model.pt",
        "adult_flow.pth",
    ),
    "law": (LawDataset("../data/law.csv"), "law_disc_model.pt", "law_flow.pth"),
    "german": (
        GermanCreditDataset("../data/german_credit.csv"),
        "german_disc_model.pt",
        "german_flow.pth",
    ),
}

dataset, disc_model_path, gen_model_path = datasets["law"]

In [4]:
dataset.X_train = dataset.X_train.astype(np.float32)
dataset.X_test = dataset.X_test.astype(np.float32)

In [5]:
# disc_model = MultilayerPerceptron(dataset.X_test.shape[1], [512, 512], 2)
disc_model = MultilayerPerceptron(dataset.X_test.shape[1], [256, 256], 1)
# disc_model.fit(
#     dataset.train_dataloader(batch_size=128, shuffle=True),
#     dataset.test_dataloader(batch_size=128, shuffle=False),
#     epochs=5000,
#     patience=100,
#     lr=1e-3,
#     checkpoint_path=disc_model_path,
# )
disc_model.load(disc_model_path)
# disc_model.load("german_disc_model_onehot.pt")

  self.load_state_dict(torch.load(path))


In [6]:
y_pred = disc_model.predict(dataset.X_test).detach().numpy().flatten()
print("Test accuracy:", (y_pred == dataset.y_test).mean())

Test accuracy: 0.759009009009009


In [7]:
dataset.y_train = disc_model.predict(dataset.X_train).detach().numpy()
dataset.y_test = disc_model.predict(dataset.X_test).detach().numpy()

In [23]:
import pandas as pd
import dice_ml

dataset = LawDataset("../data/law.csv")

X_train, y_train = dataset.X_train, dataset.y_train

features = list(range(dataset.X_train.shape[1])) + ["label"]
features = list(map(str, features))

In [9]:
input_dataframe = pd.DataFrame(
    np.concatenate((X_train, y_train.reshape(-1, 1)), axis=1),
    columns=features,
)

dice = dice_ml.Data(
    dataframe=input_dataframe,
    continuous_features=list(map(str, dataset.numerical_columns)),
    outcome_name=features[-1],
)

In [10]:
import torch.nn as nn


class DiscWrapper(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, x):
        x = torch.sigmoid(self.model(x))
        return x

In [11]:
disc_model_w = DiscWrapper(disc_model)

In [12]:
model = dice_ml.Model(disc_model_w, backend="PYT")

exp = dice_ml.Dice(dice, model, method="gradient")

In [27]:
X_test_origin = dataset.X_test[dataset.y_test != 0]
y_test_origin = dataset.y_test[dataset.y_test != 0]
X_test_origin, y_test_origin = X_test_origin[:10], y_test_origin[:10]

In [28]:
query_instance = pd.DataFrame(X_test_origin, columns=features[:-1])
query_instance = query_instance.iloc[:10]

In [29]:
cfs = exp.generate_counterfactuals(
    query_instance,
    total_CFs=1,
    desired_class="opposite",
    posthoc_sparsity_param=None,
    learning_rate=0.05,
)
#                             proximity_weight=0, diversity_weight=0, categorical_penalty=0.0)

Diverse Counterfactuals found! total time taken: 00 min 01 sec
Diverse Counterfactuals found! total time taken: 00 min 00 sec
Diverse Counterfactuals found! total time taken: 00 min 01 sec
Diverse Counterfactuals found! total time taken: 00 min 18 sec
Diverse Counterfactuals found! total time taken: 00 min 02 sec
Diverse Counterfactuals found! total time taken: 00 min 19 sec
Diverse Counterfactuals found! total time taken: 00 min 02 sec
Diverse Counterfactuals found! total time taken: 00 min 08 sec
Diverse Counterfactuals found! total time taken: 00 min 01 sec
Diverse Counterfactuals found! total time taken: 00 min 01 sec


In [15]:
from sklearn.model_selection import train_test_split

import dice_ml
from dice_ml.utils import helpers

In [16]:
dataset = helpers.load_adult_income_dataset()
target = dataset["income"]
train_dataset, test_dataset, y_train, y_test = train_test_split(
    dataset, target, test_size=0.2, random_state=0, stratify=target
)
x_train = train_dataset.drop("income", axis=1)
x_test = test_dataset.drop("income", axis=1)