In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

from counterfactuals.cf_methods import PPCEF
from counterfactuals.datasets.file_dataset import FileDataset
from counterfactuals.losses import BinaryDiscLoss
from counterfactuals.metrics.metrics import evaluate_cf
from counterfactuals.models import LogisticRegression, MaskedAutoregressiveFlow

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Prepare the data

dataset = FileDataset(config_path="../config/datasets/moons.yaml")
# dataset = AdultDataset()

# Get the split data that's already available
X_train = dataset.X_train
X_test = dataset.X_test
y_train = dataset.y_train
y_test = dataset.y_test

train_dataset = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train, dtype=torch.float32),
)
test_dataset = TensorDataset(
    torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)
)

train_dataloader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

In [4]:
# Train a discriminative model
num_inputs = X_train.shape[1]
num_targets = 1

discrimaiative_model = LogisticRegression(
    num_inputs=num_inputs,
    num_targets=num_targets,
)
discrimaiative_model.fit(train_dataloader, test_dataloader, epochs=10000, patience=600, lr=0.01)

Epoch 2244, Train: 0.2591, test: 0.2769, patience: 600:  22%|██▏       | 2244/10000 [00:05<00:19, 389.46it/s]


In [5]:
# Train a generative model
num_inputs = X_train.shape[1]
num_targets = 1

generative_model = MaskedAutoregressiveFlow(
    features=num_inputs,
    hidden_features=128,
    context_features=num_targets,
)
generative_model.fit(train_dataloader, test_dataloader, epochs=10000, patience=600, lr=0.01)

Epoch 2225, Train: 0.3243, test: 0.4486, patience: 600:  22%|██▏       | 2225/10000 [00:41<02:25, 53.54it/s]


In [6]:
log_prob_threshold = np.median(generative_model.predict_log_prob(test_dataloader))

In [7]:
cf_method = PPCEF(
    disc_model=discrimaiative_model,
    gen_model=generative_model,
    disc_model_criterion=BinaryDiscLoss(),
)

results = cf_method.explain_dataloader(
    test_dataloader,
    alpha=100,
    log_prob_threshold=log_prob_threshold,
    epochs=10000,
    patience=600,
    lr=0.01,
)

Discriminator loss: 0.0000, Prob loss: 0.2344: 100%|██████████| 10000/10000 [00:32<00:00, 307.03it/s]


In [13]:
evaluate_cf(
    disc_model=discrimaiative_model,
    gen_model=generative_model,
    X_cf=results.x_cfs,
    model_returned=np.ones_like(results.x_cfs),
    continuous_features=dataset.numerical_features,
    categorical_features=dataset.categorical_features,
    median_log_prob=log_prob_threshold,
    X_train=X_train,
    y_train=y_train,
    X_test=X_test,
    y_test=y_test,
)

AttributeError: 'numpy.ndarray' object has no attribute 'numpy'