In [None]:
import numpy as np
import torch
from counterfactuals.datasets import NetflixDataset
from counterfactuals.cf_methods.ppcef import PPCEF
from counterfactuals.generative_models import MaskedAutoregressiveFlow
from counterfactuals.discriminative_models import MultilayerPerceptron
from counterfactuals.losses import BinaryDiscLoss
from counterfactuals.metrics import evaluate_cf

dataset = NetflixDataset("../data/netflix_titles.csv")
train_dataloader = dataset.train_dataloader(batch_size=128, shuffle=True)
test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)

disc_model = MultilayerPerceptron(
    input_size=dataset.X_train.shape[1], hidden_layer_sizes=[256, 256], target_size=1, dropout=0.2
)
disc_model.fit(
    train_dataloader,
    test_dataloader,
    epochs=5000,
    patience=300,
    lr=1e-3,
)

gen_model = MaskedAutoregressiveFlow(
    features=dataset.X_train.shape[1], hidden_features=8, context_features=1
)
gen_model.fit(train_dataloader, test_dataloader, num_epochs=1000)

cf = PPCEF(
    gen_model=gen_model,
    disc_model=disc_model,
    disc_model_criterion=BinaryDiscLoss(),
    neptune_run=None,
)

cf_dataloader = dataset.test_dataloader(batch_size=1024, shuffle=False)
log_prob_threshold = torch.quantile(gen_model.predict_log_prob(cf_dataloader), 0.25)
deltas, X_orig, y_orig, y_target, logs = cf.explain_dataloader(
    cf_dataloader, alpha=100, log_prob_threshold=log_prob_threshold, epochs=4000
)

X_cf = X_orig + deltas
print("Generated Counterfactuals:", X_cf)

evaluate_cf(
    disc_model=disc_model,
    gen_model=gen_model,
    X_cf=X_cf,
    model_returned=np.ones(X_cf.shape[0]),
    continuous_features=dataset.numerical_features,
    categorical_features=dataset.categorical_features,
    X_train=dataset.X_train,
    y_train=dataset.y_train,
    X_test=X_orig,
    y_test=y_orig,
    median_log_prob=log_prob_threshold,
    y_target=y_target,
)

  from .autonotebook import tqdm as notebook_tqdm
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  raw_data[target_column] = raw_data[target_column].map({"Movie": 0, "TV Show": 1})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  raw_data["duration"] = raw_data["duration"].apply(parse_duration)
  self.load_state_dict(torch.load(path))
Epoch 485, Train: 0.3177, test: 0.3454, patience: 300:  10%|▉         | 486/5000 [00:46<07:10, 10.50it/s]
Epoch 81, Train: -65.6134, test: -67.7431, patience: 20:   8%|▊         | 81/1000 [00:24<04:40,  3.27it/s]
  self.load_state_

Generated Counterfactuals: [[ 8.92069519e-01  1.71561509e-01 -1.37060299e-04 ...  3.86449285e-02
  -1.42845107e-04 -1.00449470e-04]
 [ 8.92879367e-01  1.68963432e-01 -1.06772124e-04 ...  3.50932106e-02
   2.67594354e-04  2.00288632e-04]
 [ 8.97637188e-01  1.74661323e-01 -9.32478288e-05 ...  6.49642646e-02
  -1.08920867e-04 -1.26485276e-04]
 ...
 [ 9.47916687e-01  1.15044251e-01  0.00000000e+00 ...  0.00000000e+00
   0.00000000e+00  0.00000000e+00]
 [ 1.00656700e+00  1.15073711e-01  2.38647699e-05 ... -5.43177947e-02
  -7.78357935e-06  3.83529929e-04]
 [ 1.18613565e+00 -1.53614804e-02  1.26865125e-05 ... -1.30682960e-02
   8.01245624e-04  9.10882780e-04]]


{'coverage': 1.0,
 'validity': 0.9693181818181819,
 'actionability': 0.09488636363636363,
 'sparsity': 0.9051136363636364,
 'proximity_categorical_hamming': 0.8142011977508523,
 'proximity_categorical_jaccard': 0.8142011977508523,
 'proximity_continuous_manhattan': 0.8187039034653473,
 'proximity_continuous_euclidean': 0.8142011977508523,
 'proximity_continuous_mad': 1.9302398575096449,
 'proximity_l2_jaccard': 0.8142011977508523,
 'proximity_mad_hamming': 1.9302398575096449,
 'prob_plausibility': 0.7471590909090909,
 'log_density_cf': 69.9904,
 'log_density_test': 37.34087,
 'lof_scores_cf': 46647844.0,
 'lof_scores_test': 1089414.9,
 'isolation_forest_scores_cf': 0.0767617504134978,
 'isolation_forest_scores_test': 0.09178902924334606}