In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import torch

from counterfactuals.cf_methods.ppcef import PPCEF
from counterfactuals.datasets import MoonsDataset
from counterfactuals.discriminative_models import MultilayerPerceptron
from counterfactuals.generative_models import MaskedAutoregressiveFlow
from counterfactuals.losses import BinaryDiscLoss
from counterfactuals.metrics import evaluate_cf

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
dataset = MoonsDataset("../data/moons.csv")
train_dataloader = dataset.train_dataloader(batch_size=1024, shuffle=True)
test_dataloader = dataset.test_dataloader(batch_size=1024, shuffle=False)

In [5]:
train_dataloader = dataset.train_dataloader(batch_size=128, shuffle=True)
test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)
disc_model = MultilayerPerceptron(
    input_size=2, hidden_layer_sizes=[256, 256], target_size=1, dropout=0.2
)
# disc_model.fit(
#     train_dataloader,
#     test_dataloader,
#     epochs=5000,
#     patience=300,
#     lr=1e-3,
# )
disc_model.load("../models/MoonsDataset/disc_model_MultilayerPerceptron.pt")

  self.load_state_dict(torch.load(path))


In [6]:
gen_model = MaskedAutoregressiveFlow(
    features=dataset.X_train.shape[1], hidden_features=8, context_features=1
)
gen_train_dataloader = dataset.train_dataloader(
    batch_size=1024, shuffle=True, noise_lvl=0.03
)
gen_model.fit(train_dataloader, test_dataloader, num_epochs=1000)

Epoch 219, Train: -1.4838, test: -1.5296, patience: 20:  22%|██▏       | 219/1000 [00:05<00:20, 38.60it/s]
  self.load_state_dict(torch.load(path))


In [8]:
device = next(gen_model.parameters()).device
print(f"Model is on device: {device}")

Model is on device: cpu


In [9]:
cf_dataloader = dataset.test_dataloader(batch_size=1024, shuffle=False)
log_prob_threshold = torch.quantile(gen_model.predict_log_prob(cf_dataloader), 0.25)

In [16]:
DEVICE = "mps"

In [19]:
cf = PPCEF(
    gen_model=gen_model,
    disc_model=disc_model,
    disc_model_criterion=BinaryDiscLoss(),
    neptune_run=None,
    device=DEVICE,
)
disc_model = disc_model.to(DEVICE)
gen_model = gen_model.to(DEVICE)
deltas, X_orig, y_orig, y_target, logs = cf.explain_dataloader(
    cf_dataloader, alpha=100, log_prob_threshold=log_prob_threshold, epochs=4000
)

Discriminator loss: 0.0000, Prob loss: 0.0000:  52%|█████▏    | 2089/4000 [00:25<00:22, 83.45it/s]


In [None]:
X_cf = X_orig + deltas

In [None]:
evaluate_cf(
    disc_model=disc_model,
    gen_model=gen_model,
    X_cf=X_cf,
    model_returned=np.ones(X_cf.shape[0]),
    continuous_features=dataset.numerical_features,
    categorical_features=dataset.categorical_features,
    X_train=dataset.X_train,
    y_train=dataset.y_train,
    X_test=X_orig,
    y_test=y_orig,
    median_log_prob=log_prob_threshold,
    y_target=y_target,
)

{'coverage': 1.0,
 'validity': 0.9951219512195122,
 'actionability': 0.0,
 'sparsity': 1.0,
 'proximity_categorical_hamming': nan,
 'proximity_categorical_jaccard': 0.25987768229983255,
 'proximity_continuous_manhattan': 0.3320311238149218,
 'proximity_continuous_euclidean': 0.25987768229983255,
 'proximity_continuous_mad': 1.7110702190913407,
 'proximity_l2_jaccard': 0.25987768229983255,
 'proximity_mad_hamming': nan,
 'prob_plausibility': 0.8585365853658536,
 'log_density_cf': 0.75172883,
 'log_density_test': -849.48065,
 'lof_scores_cf': 1.0917826,
 'lof_scores_test': 1.0409402,
 'isolation_forest_scores_cf': 0.01993312725633935,
 'isolation_forest_scores_test': 0.0041604418163049064}