In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

from cel.cf_methods import DICE
from cel.datasets.file_dataset import FileDataset
from cel.metrics.metrics import evaluate_cf
from cel.models import LogisticRegression, MaskedAutoregressiveFlow

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Prepare the data

dataset = FileDataset(config_path="../config/datasets/moons.yaml")
# dataset = AdultDataset()

# Get the split data that's already available
X_train = dataset.X_train
X_test = dataset.X_test
y_train = dataset.y_train
y_test = dataset.y_test

train_dataset = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train, dtype=torch.float32),
)
test_dataset = TensorDataset(
    torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)
)

train_dataloader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1024, shuffle=False)

In [4]:
# Train a discriminative model
num_inputs = X_train.shape[1]
num_targets = 1

discrimaiative_model = LogisticRegression(
    num_inputs=num_inputs,
    num_targets=num_targets,
)
discrimaiative_model.fit(train_dataloader, test_dataloader, epochs=10000, patience=600, lr=0.01)

Epoch 1965, Train: 0.2589, test: 0.2771, patience: 600:  20%|█▉        | 1965/10000 [00:05<00:21, 371.11it/s]


In [5]:
# Train a generative model
num_inputs = X_train.shape[1]
num_targets = 1

generative_model = MaskedAutoregressiveFlow(
    features=num_inputs,
    hidden_features=128,
    context_features=num_targets,
)
generative_model.fit(train_dataloader, test_dataloader, epochs=10000, patience=600, lr=0.01)

Epoch 1898, Train: 10.7169, test: 10.9516, patience: 600:  19%|█▉        | 1898/10000 [00:35<02:30, 53.83it/s]   


In [6]:
log_prob_threshold = np.median(generative_model.predict_log_prob(test_dataloader))

In [7]:
cf_method = DICE(
    X_train=X_train,
    y_train=y_train,
    features=dataset.features,
    disc_model=discrimaiative_model,
)

results = cf_method.explain_dataloader(test_dataloader)

  final_cfs_sparse.at[cf_ix, feature] += np.sign(diff)*change
 20%|██        | 1/5 [00:02<00:09,  2.30s/it]

Diverse Counterfactuals found! total time taken: 00 min 00 sec


 60%|██████    | 3/5 [00:04<00:02,  1.08s/it]

No Counterfactuals found for the given configuation,  perhaps try with different values of proximity (or diversity) weights or learning rate... ; total time taken: 00 min 01 sec
Diverse Counterfactuals found! total time taken: 00 min 00 sec


100%|██████████| 5/5 [00:05<00:00,  1.14s/it]

No Counterfactuals found for the given configuation,  perhaps try with different values of proximity (or diversity) weights or learning rate... ; total time taken: 00 min 01 sec
Diverse Counterfactuals found! total time taken: 00 min 00 sec





In [8]:
results

ExplanationResult(x_cfs=array([[0.7659872, 0.027    ],
       [1.       , 0.035    ],
       [1.       , 0.035    ]], dtype=float32), y_cf_targets=array([0., 0., 0.], dtype=float32), x_origs=array([[ 0.76923686, -0.4073719 ],
       [ 1.554943  , -0.28547853],
       [ 1.1425214 , -0.61483914]], dtype=float32), y_origs=array([1., 1., 1.], dtype=float32), logs=None)

In [9]:
evaluate_cf(
    disc_model=discrimaiative_model,
    gen_model=generative_model,
    X_cf=results.x_cfs,
    model_returned=np.ones_like(results.x_cfs),
    continuous_features=dataset.numerical_features,
    categorical_features=dataset.categorical_features,
    median_log_prob=log_prob_threshold,
    X_train=X_train,
    y_train=y_train,
    X_test=results.x_origs,
    y_test=results.y_origs,
)

2025-10-12 18:47:38,951 - counterfactuals.metrics.distances - INFO - Calculating combined distance
2025-10-12 18:47:38,951 - counterfactuals.metrics.distances - INFO - Calculating continuous distance


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices