In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import numpy as np

In [None]:
from nflows.flows import MaskedAutoregressiveFlow

from counterfactuals.datasets import (
    AdultDataset,
)
from counterfactuals.discriminative_models import LogisticRegression
from counterfactuals.optimizers.ppcef import PPCEF

from counterfactuals.metrics.metrics import (
    evaluate_cf,
)
from sklearn.metrics import classification_report

# Create dataset

In [None]:
# dataset = CompasDataset(file_path="../data/compas_two_years.csv")
dataset = AdultDataset(file_path="../data/adult.csv")
# dataset = GermanCreditDataset(file_path="../data/german_credit.csv")

In [None]:
disc_model = LogisticRegression(dataset.X_train.shape[1], 1)

# disc_model = MultilayerPerceptron([dataset.X_train.shape[1], 128, 1])
train_dataloader = dataset.train_dataloader(batch_size=128, shuffle=True, noise_lvl=0)
disc_model.fit(train_dataloader, epochs=20)
print(classification_report(dataset.y_test, disc_model.predict(dataset.X_test)))
disc_model.predict(dataset.X_test).shape

# Relabeling

In [None]:
y_pred_train = disc_model.predict(dataset.X_train)
y_pred_test = disc_model.predict(dataset.X_test)
dataset.y_train = y_pred_train
dataset.y_test = y_pred_test

# noise_lvl - zaszumianie numerycznych cech treningowego datasetu
train_dataloader = dataset.train_dataloader(
    batch_size=128, shuffle=True, noise_lvl=1e-5
)
test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)

# Create flow model

In [None]:
# from nflows.flows import SimpleRealNVP

# flow = SimpleRealNVP(use_volume_preserving=True, features=dataset.X_train.shape[1], hidden_features=4, context_features=1, num_layers=5)

flow = MaskedAutoregressiveFlow(
    features=dataset.X_train.shape[1],
    hidden_features=4,
    num_blocks_per_layer=2,
    num_layers=1,
    context_features=1,
)

# Create cf class, train and test flow model

In [None]:
cf = PPCEF(
    gen_model=flow,
    disc_model=disc_model,
    disc_model_criterion=torch.nn.BCELoss(),
    checkpoint_path="model.pt",
    neptune_run=None,
)

In [None]:
cf.train_model(
    train_loader=train_dataloader,
    test_loader=test_dataloader,
    epochs=20,
    patience=20,
    eps=1e-3,  # eps for patience
)

In [None]:
cf.test_model(test_loader=test_dataloader)

# Search counterfactuals

In [None]:
search_step_kwargs = {
    "alpha": 20,
    "beta": 0.1,
}
test_dataloader = dataset.test_dataloader(batch_size=16, shuffle=False)
Xs_cf, Xs_orig, ys_orig = cf.search_batch(
    dataloader=test_dataloader, epochs=1000, lr=0.005, **search_step_kwargs
)

# Evaluate

In [None]:
evaluate_cf(
    cf_class=cf,
    disc_model=disc_model,
    X=Xs_orig,
    X_cf=Xs_cf,
    model_returned=np.ones(Xs_cf.shape[0]).astype(bool),
    continuous_features=dataset.numerical_features,
    categorical_features=dataset.categorical_features,
    X_train=dataset.X_train,
    y_train=dataset.y_train,
    X_test=dataset.X_test,
    y_test=dataset.y_test,
)