In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np
from counterfactuals.datasets import (
    LawDataset,
    MoonsDataset,
    HelocDataset,
    AuditDataset,
    AdultDataset,
)

from counterfactuals.generative_models.kde import KDE
from counterfactuals.generative_models.maf import MaskedAutoregressiveFlow
from counterfactuals.generative_models.nice import NICE
from counterfactuals.generative_models.real_nvp import RealNVP

In [3]:
# dataset = MoonsDataset("../data/moons.csv")
# dataset = AuditDataset("../data/audit.csv")
# dataset = LawDataset("../data/law.csv")
# dataset = HelocDataset("../data/heloc.csv")
# dataset = PolishBankDataset("../data/polish_bankruptcy.csv")
dataset = AdultDataset("../data/adult.csv")
train_dataloader = dataset.train_dataloader(batch_size=128, shuffle=True, noise_lvl=0)
test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)

In [4]:
maf = MaskedAutoregressiveFlow(
    features=dataset.X_train.shape[1],
    hidden_features=128,
    context_features=1,
    num_layers=5,
    num_blocks_per_layer=2,
    use_residual_blocks=True,
    use_random_masks=False,
)
maf.fit(train_dataloader, test_dataloader)

Epoch 47, Train: -27.3569, test: -43.2998, patience: 20:  47%|████▋     | 47/100 [00:46<00:52,  1.02it/s] 


In [5]:
nice = NICE(
    features=dataset.X_train.shape[1],
    hidden_features=128,
    context_features=1,
    num_layers=5,
    num_blocks_per_layer=2,
)
nice.fit(train_dataloader, test_dataloader)

Epoch 99, Train: 26.6591, test: 26.6644, patience: 5: 100%|██████████| 100/100 [01:37<00:00,  1.02it/s]


In [6]:
real_nvp = RealNVP(
    features=dataset.X_train.shape[1],
    hidden_features=128,
    context_features=1,
    num_layers=5,
    num_blocks_per_layer=2,
)
real_nvp.fit(train_dataloader, test_dataloader)

Epoch 99, Train: 26.5830, test: 26.5827, patience: 4: 100%|██████████| 100/100 [01:36<00:00,  1.03it/s]


In [7]:
kde = KDE(bandwidth=0.1)
kde.fit(train_dataloader, test_dataloader)
log_prob = kde.predict_log_prob(train_dataloader)
# log_prob = []
# for x, y in train_dataloader:
#     log_prob.append(kde.log_prob(x, y))
print("KDE Train")
med_kde_train = np.median(log_prob).round(2)
mean_kde_train = np.mean(log_prob).round(2)

# log_prob = []
log_prob = kde.predict_log_prob(test_dataloader)
# for x, y in test_dataloader:
#     log_prob.append(kde.log_prob(x, y))
#     kde.predict_log_prob(x, y)
print("KDE Test")
med_kde_test = np.median(log_prob).round(2)
mean_kde_test = np.mean(log_prob).round(2)

print("KDE Train", med_kde_train, mean_kde_train)
print("KDE Test", med_kde_test, mean_kde_test)


# flow = torch.load("../models/gen_model_FLOW_orig_MoonsDataset.pt")
# log_prob = []
# with torch.no_grad():
#     for x, y in train_dataloader:
#         y = y.view(-1, 1)
#         log_prob.append(flow.log_prob(x, y))
# med_flow_train = np.median(torch.hstack(log_prob).numpy()).round(2)
# mean_flow_train = np.mean(torch.hstack(log_prob).numpy()).round(2)

# log_prob = []
# with torch.no_grad():
#     for x, y in test_dataloader:
#         y = y.view(-1, 1)
#         log_prob.append(flow.log_prob(x, y))
# med_flow_test = np.median(torch.hstack(log_prob).numpy()).round(2)
# mean_flow_test = np.mean(torch.hstack(log_prob).numpy()).round(2)

Train log-likelihood: 33.29024124145508
Test log-likelihood: 30.512453079223633
KDE Train


TypeError: mean() received an invalid combination of arguments - got (out=NoneType, dtype=NoneType, axis=NoneType, ), but expected one of:
 * (*, torch.dtype dtype = None)
 * (tuple of ints dim, bool keepdim = False, *, torch.dtype dtype = None)
 * (tuple of names dim, bool keepdim = False, *, torch.dtype dtype = None)


In [None]:
for dataset in [
    MoonsDataset("../data/moons.csv"),
    LawDataset("../data/law.csv"),
    HelocDataset("../data/heloc.csv"),
    AuditDataset("../data/audit.csv"),
]:
    train_dataloader = dataset.train_dataloader(
        batch_size=128, shuffle=True, noise_lvl=0
    )
    test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)
    kde = KDE(bandwidth=0.1)
    kde.fit(train_dataloader)
    log_prob = []
    for x, y in train_dataloader:
        log_prob.append(kde.log_prob(x, y))
    med_kde_train = np.median(torch.hstack(log_prob).numpy()).round(2)
    mean_kde_train = np.mean(torch.hstack(log_prob).numpy()).round(2)

    log_prob = []
    for x, y in test_dataloader:
        log_prob.append(kde.log_prob(x, y))
    med_kde_test = np.median(torch.hstack(log_prob).numpy()).round(2)
    mean_kde_test = np.mean(torch.hstack(log_prob).numpy()).round(2)

    flow = torch.load(
        f"../models/gen_model_FLOW_orig_{str(dataset).split(' ')[0].split('.')[-1]}.pt"
    )
    print(
        f"../models/gen_model_FLOW_orig_{str(dataset).split(' ')[0].split('.')[-1]}.pt"
    )
    with torch.no_grad():
        log_prob = []
        with torch.no_grad():
            for x, y in train_dataloader:
                y = y.view(-1, 1)
                log_prob.append(flow.log_prob(x, y).squeeze())
        med_flow_train = np.median(torch.hstack(log_prob).numpy()).round(2)
        mean_flow_train = np.mean(torch.hstack(log_prob).numpy()).round(2)

        log_prob = []
        with torch.no_grad():
            for x, y in test_dataloader:
                y = y.view(-1, 1)
                log_prob.append(flow.log_prob(x, y).squeeze())
        med_flow_test = np.median(torch.hstack(log_prob).numpy()).round(2)
        mean_flow_test = np.mean(torch.hstack(log_prob).numpy()).round(2)

    print(str(dataset))
    print("mean table")
    print(
        f"{mean_kde_train:.2f}, {mean_kde_test:.2f}, {mean_flow_train:.2f}, {mean_flow_test:.2f}"
    )
    print("median table")
    print(
        f"{med_kde_train:.2f}, {med_kde_test:.2f}, {med_flow_train:.2f}, {med_flow_test:.2f}"
    )