In [1]:
import numpy as np
import torch
from counterfactuals.cf_methods.ppcef import PPCEF
from counterfactuals.generative_models import MaskedAutoregressiveFlow
from counterfactuals.discriminative_models import MultilayerPerceptron
from counterfactuals.losses import BinaryDiscLoss
from counterfactuals.metrics import evaluate_cf


from counterfactuals.datasets import GermanCreditDataset

dataset = GermanCreditDataset("../data/german_credit.csv")
train_dataloader = dataset.train_dataloader(batch_size=128, shuffle=True)
test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)

disc_model = MultilayerPerceptron(
    input_size=dataset.X_train.shape[1], hidden_layer_sizes=[256, 256], target_size=1, dropout=0.2
)
disc_model.fit(
    train_dataloader,
    test_dataloader,
    epochs=5000,
    patience=300,
    lr=1e-3,
)

gen_model = MaskedAutoregressiveFlow(
    features=dataset.X_train.shape[1], hidden_features=8, context_features=1
)
gen_model.fit(train_dataloader, test_dataloader, num_epochs=1000)

cf = PPCEF(
    gen_model=gen_model,
    disc_model=disc_model,
    disc_model_criterion=BinaryDiscLoss(),
    neptune_run=None,
)
cf_dataloader = dataset.test_dataloader(batch_size=1024, shuffle=False)
log_prob_threshold = torch.quantile(gen_model.predict_log_prob(cf_dataloader), 0.25)
deltas, X_orig, y_orig, y_target, logs = cf.explain_dataloader(
    cf_dataloader, alpha=100, log_prob_threshold=log_prob_threshold, epochs=4000
)
X_cf = X_orig + deltas
print(X_cf)
evaluate_cf(
    disc_model=disc_model,
    gen_model=gen_model,
    X_cf=X_cf,
    model_returned=np.ones(X_cf.shape[0]),
    continuous_features=dataset.numerical_features,
    categorical_features=dataset.categorical_features,
    X_train=dataset.X_train,
    y_train=dataset.y_train,
    X_test=X_orig,
    y_test=y_orig,
    median_log_prob=log_prob_threshold,
    y_target=y_target,
)

  from .autonotebook import tqdm as notebook_tqdm
  self.load_state_dict(torch.load(path))
Epoch 882, Train: 0.4440, test: 0.4569, patience: 300:  18%|█▊        | 883/5000 [00:09<00:44, 93.22it/s] 
Epoch 84, Train: 9.0371, test: 12.3365, patience: 20:   8%|▊         | 84/1000 [00:02<00:31, 28.84it/s] 
  self.load_state_dict(torch.load(path))
Discriminator loss: 0.0000, Prob loss: 0.0000:  48%|████▊     | 1918/4000 [00:08<00:09, 219.48it/s]


[[ 0.20855986  0.1378095   0.20237193 ...  0.16957149  0.02099049
   1.1299303 ]
 [ 0.33084458  0.24264444  0.85357374 ...  0.8833586   0.09983674
  -0.12132711]
 [ 0.47696322  0.40052953  0.10900552 ...  0.10918819 -0.06130932
   0.02809672]
 ...
 [ 0.1445571   0.12429325  0.7784836  ...  0.9764855   0.05507394
   0.11581844]
 [ 0.25083196  0.18131249  1.1315845  ...  1.1039239   0.11300158
   0.13523567]
 [ 0.59167206  0.29338205  1.1213503  ...  1.0743941   0.10213802
   0.11957524]]


{'coverage': 1.0,
 'validity': 0.65,
 'actionability': 0.14166666666666666,
 'sparsity': 0.8580409356725146,
 'proximity_categorical_hamming': 0.7799560093909939,
 'proximity_categorical_jaccard': 0.6141890278345388,
 'proximity_continuous_manhattan': 0.6556664725228718,
 'proximity_continuous_euclidean': 0.6141890278345388,
 'proximity_continuous_mad': 1.0053072366993714,
 'proximity_l2_jaccard': 0.6141890278345388,
 'proximity_mad_hamming': 1.1710742182558267,
 'prob_plausibility': 1.0,
 'log_density_cf': -10.40897,
 'log_density_test': -14.123992,
 'lof_scores_cf': 1.0274575,
 'lof_scores_test': 1.0363257,
 'isolation_forest_scores_cf': 0.013081027773655189,
 'isolation_forest_scores_test': 0.03140577863894244}

In [2]:
from counterfactuals.datasets import DigitsDataset

dataset = DigitsDataset("../data/digits.csv")
train_dataloader = dataset.train_dataloader(batch_size=128, shuffle=True)
test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)

disc_model = MultilayerPerceptron(
    input_size=dataset.X_train.shape[1], hidden_layer_sizes=[256, 256], target_size=1, dropout=0.2
)
disc_model.fit(
    train_dataloader,
    test_dataloader,
    epochs=5000,
    patience=300,
    lr=1e-3,
)

gen_model = MaskedAutoregressiveFlow(
    features=dataset.X_train.shape[1], hidden_features=8, context_features=1
)
gen_model.fit(train_dataloader, test_dataloader, num_epochs=1000)

cf = PPCEF(
    gen_model=gen_model,
    disc_model=disc_model,
    disc_model_criterion=BinaryDiscLoss(),
    neptune_run=None,
)
cf_dataloader = dataset.test_dataloader(batch_size=1024, shuffle=False)
log_prob_threshold = torch.quantile(gen_model.predict_log_prob(cf_dataloader), 0.25)
deltas, X_orig, y_orig, y_target, logs = cf.explain_dataloader(
    cf_dataloader, alpha=100, log_prob_threshold=log_prob_threshold, epochs=4000
)
X_cf = X_orig + deltas
print(X_cf)
evaluate_cf(
    disc_model=disc_model,
    gen_model=gen_model,
    X_cf=X_cf,
    model_returned=np.ones(X_cf.shape[0]),
    continuous_features=dataset.numerical_features,
    categorical_features=dataset.categorical_features,
    X_train=dataset.X_train,
    y_train=dataset.y_train,
    X_test=X_orig,
    y_test=y_orig,
    median_log_prob=log_prob_threshold,
    y_target=y_target,
)

  self.load_state_dict(torch.load(path))
Epoch 4999, Train: -10626744661.3333, test: -10549363712.0000, patience: 159: 100%|██████████| 5000/5000 [02:14<00:00, 37.23it/s]
Epoch 57, Train: -50.5208, test: -21.7906, patience: 20:   6%|▌         | 57/1000 [00:04<01:09, 13.51it/s]
  self.load_state_dict(torch.load(path))
Discriminator loss: 2930.0640, Prob loss: 92.7364: 100%|██████████| 4000/4000 [00:29<00:00, 133.50it/s]     


[[-0.06541917 -0.34846738 -0.3484676  ...  0.58902955 -0.03596798
  -0.348468  ]
 [ 0.          0.          0.375      ...  0.          0.
   0.        ]
 [ 0.          0.          0.25       ...  0.          0.
   0.        ]
 ...
 [ 0.          0.          0.1875     ...  0.          0.
   0.        ]
 [ 0.          0.          0.         ...  1.          0.0625
   0.        ]
 [ 0.          0.          0.3125     ...  0.625       0.0625
   0.        ]]


{'coverage': 1.0,
 'validity': 0.8972222222222223,
 'actionability': 0.5555555555555556,
 'sparsity': 0.44114583333333335,
 'proximity_categorical_hamming': nan,
 'proximity_categorical_jaccard': 0.17294361829826818,
 'proximity_continuous_manhattan': 1.2148535747310887,
 'proximity_continuous_euclidean': 0.17294361829826818,
 'proximity_continuous_mad': 4.719014181145086,
 'proximity_l2_jaccard': 0.17294361829826818,
 'proximity_mad_hamming': nan,
 'prob_plausibility': 0.8944444444444445,
 'log_density_cf': -53.09069,
 'log_density_test': 16.866123,
 'lof_scores_cf': 1.139849,
 'lof_scores_test': 1.0452181,
 'isolation_forest_scores_cf': 0.019408738300851748,
 'isolation_forest_scores_test': 0.018654854560416018}

In [7]:
from counterfactuals.datasets import BlobsDataset

dataset = BlobsDataset("../data/blobs.csv")
train_dataloader = dataset.train_dataloader(batch_size=128, shuffle=True)
test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)

disc_model = MultilayerPerceptron(
    input_size=dataset.X_train.shape[1], hidden_layer_sizes=[256, 256], target_size=1, dropout=0.2
)
disc_model.fit(
    train_dataloader,
    test_dataloader,
    epochs=5000,
    patience=300,
    lr=1e-3,
)

gen_model = MaskedAutoregressiveFlow(
    features=dataset.X_train.shape[1], hidden_features=8, context_features=1
)
gen_model.fit(train_dataloader, test_dataloader, num_epochs=1000)

cf = PPCEF(
    gen_model=gen_model,
    disc_model=disc_model,
    disc_model_criterion=BinaryDiscLoss(),
    neptune_run=None,
)
cf_dataloader = dataset.test_dataloader(batch_size=1024, shuffle=False)
log_prob_threshold = torch.quantile(gen_model.predict_log_prob(cf_dataloader), 0.25)
deltas, X_orig, y_orig, y_target, logs = cf.explain_dataloader(
    cf_dataloader, alpha=100, log_prob_threshold=log_prob_threshold, epochs=4000
)

X_orig = X_orig.astype(np.float32)
deltas = deltas.astype(np.float32)
X_cf = X_orig + deltas

print(X_cf)
evaluate_cf(
    disc_model=disc_model,
    gen_model=gen_model,
    X_cf=X_cf,
    model_returned=np.ones(X_cf.shape[0]),
    continuous_features=dataset.numerical_features,
    categorical_features=dataset.categorical_features,
    X_train=dataset.X_train,
    y_train=dataset.y_train,
    X_test=X_orig,
    y_test=y_orig,
    median_log_prob=log_prob_threshold,
    y_target=y_target,
)

  self.load_state_dict(torch.load(path))
Epoch 2383, Train: -2319938.7875, test: -2633079.7917, patience: 300:  48%|████▊     | 2384/5000 [00:56<01:02, 41.89it/s]
Epoch 102, Train: -2.5840, test: -2.5759, patience: 20:  10%|█         | 102/1000 [00:04<00:43, 20.81it/s]
  self.load_state_dict(torch.load(path))
Discriminator loss: 0.0000, Prob loss: 16.4842: 100%|██████████| 4000/4000 [00:16<00:00, 241.17it/s]    


[[0.6848254  0.5007166 ]
 [0.69642645 0.449036  ]
 [0.4738344  0.7422067 ]
 [0.69618946 0.5334375 ]
 [0.6935954  0.5281948 ]
 [0.68659556 0.51020414]
 [0.6904239  0.46120664]
 [0.7017569  0.54154766]
 [0.6841757  0.4876479 ]
 [1.1486564  0.6837827 ]
 [0.68787587 0.5144143 ]
 [0.36684412 0.7533405 ]
 [0.41149533 0.74930394]
 [0.95798486 0.7036364 ]
 [0.68413347 0.48747236]
 [0.36292073 0.7538557 ]
 [0.6908908  0.4596936 ]
 [0.68418837 0.48736665]
 [1.0037599  0.6984266 ]
 [0.6885126  0.5159408 ]
 [1.0132532  0.6977692 ]
 [0.38778174 0.7514899 ]
 [0.68948233 0.46340895]
 [0.70393854 0.54538953]
 [0.684158   0.48703423]
 [1.0838853  0.690157  ]
 [1.1181935  0.68698406]
 [0.39024925 0.7514297 ]
 [0.92602634 0.7066598 ]
 [1.0052577  0.69857895]
 [0.68659794 0.47207546]
 [0.68939096 0.46362922]
 [0.5113932  0.73872197]
 [0.98596793 0.70063215]
 [1.1754625  0.6807574 ]
 [0.2928777  0.7613116 ]
 [0.6846109  0.50124246]
 [0.33304965 0.7571043 ]
 [0.9782733  0.7011778 ]
 [0.69353    0.45456603]


{'coverage': 1.0,
 'validity': 1.0,
 'actionability': 0.0,
 'sparsity': 1.0,
 'proximity_categorical_hamming': nan,
 'proximity_categorical_jaccard': 0.34577512233162705,
 'proximity_continuous_manhattan': 0.4841130584424051,
 'proximity_continuous_euclidean': 0.34577512233162705,
 'proximity_continuous_mad': 2.0497399573524793,
 'proximity_l2_jaccard': 0.34577512233162705,
 'proximity_mad_hamming': nan,
 'prob_plausibility': 0.47333333333333333,
 'log_density_cf': -14.2182865,
 'log_density_test': -31.31396,
 'lof_scores_cf': 1.7909473,
 'lof_scores_test': 1.0961207,
 'isolation_forest_scores_cf': -0.049401894089847014,
 'isolation_forest_scores_test': 0.014531079122414758}