In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import numpy as np
from counterfactuals.datasets import (
    LawDataset,
    MoonsDataset,
    HelocDataset,
    AuditDataset,
)

from counterfactuals.generative_models.kde import KDE

In [None]:
# dataset = MoonsDataset("../data/moons.csv")
dataset = AuditDataset("../data/audit.csv")
# dataset = LawDataset("../data/law.csv")
# dataset = HelocDataset("../data/heloc.csv")
# dataset = PolishBankDataset("../data/polish_bankruptcy.csv")
train_dataloader = dataset.train_dataloader(batch_size=128, shuffle=True, noise_lvl=0)
test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)

In [None]:
kde = KDE(bandwidth=0.1)
kde.fit(train_dataloader)
log_prob = []
for x, y in train_dataloader:
    log_prob.append(kde.log_prob(x, y))
print("KDE Train")
med_kde_train = np.median(torch.hstack(log_prob).numpy()).round(2)
mean_kde_train = np.mean(torch.hstack(log_prob).numpy()).round(2)

log_prob = []
for x, y in test_dataloader:
    log_prob.append(kde.log_prob(x, y))
print("KDE Test")
med_kde_test = np.median(torch.hstack(log_prob).numpy()).round(2)
mean_kde_test = np.mean(torch.hstack(log_prob).numpy()).round(2)

print("KDE Train", med_kde_train, mean_kde_train)
print("KDE Test", med_kde_test, mean_kde_test)


# flow = torch.load("../models/gen_model_FLOW_orig_MoonsDataset.pt")
# log_prob = []
# with torch.no_grad():
#     for x, y in train_dataloader:
#         y = y.view(-1, 1)
#         log_prob.append(flow.log_prob(x, y))
# med_flow_train = np.median(torch.hstack(log_prob).numpy()).round(2)
# mean_flow_train = np.mean(torch.hstack(log_prob).numpy()).round(2)

# log_prob = []
# with torch.no_grad():
#     for x, y in test_dataloader:
#         y = y.view(-1, 1)
#         log_prob.append(flow.log_prob(x, y))
# med_flow_test = np.median(torch.hstack(log_prob).numpy()).round(2)
# mean_flow_test = np.mean(torch.hstack(log_prob).numpy()).round(2)

In [None]:
for dataset in [
    MoonsDataset("../data/moons.csv"),
    LawDataset("../data/law.csv"),
    HelocDataset("../data/heloc.csv"),
    AuditDataset("../data/audit.csv"),
]:
    train_dataloader = dataset.train_dataloader(
        batch_size=128, shuffle=True, noise_lvl=0
    )
    test_dataloader = dataset.test_dataloader(batch_size=128, shuffle=False)
    kde = KDE(bandwidth=0.1)
    kde.fit(train_dataloader)
    log_prob = []
    for x, y in train_dataloader:
        log_prob.append(kde.log_prob(x, y))
    med_kde_train = np.median(torch.hstack(log_prob).numpy()).round(2)
    mean_kde_train = np.mean(torch.hstack(log_prob).numpy()).round(2)

    log_prob = []
    for x, y in test_dataloader:
        log_prob.append(kde.log_prob(x, y))
    med_kde_test = np.median(torch.hstack(log_prob).numpy()).round(2)
    mean_kde_test = np.mean(torch.hstack(log_prob).numpy()).round(2)

    flow = torch.load(
        f"../models/gen_model_FLOW_orig_{str(dataset).split(' ')[0].split('.')[-1]}.pt"
    )
    print(
        f"../models/gen_model_FLOW_orig_{str(dataset).split(' ')[0].split('.')[-1]}.pt"
    )
    with torch.no_grad():
        log_prob = []
        with torch.no_grad():
            for x, y in train_dataloader:
                y = y.view(-1, 1)
                log_prob.append(flow.log_prob(x, y).squeeze())
        med_flow_train = np.median(torch.hstack(log_prob).numpy()).round(2)
        mean_flow_train = np.mean(torch.hstack(log_prob).numpy()).round(2)

        log_prob = []
        with torch.no_grad():
            for x, y in test_dataloader:
                y = y.view(-1, 1)
                log_prob.append(flow.log_prob(x, y).squeeze())
        med_flow_test = np.median(torch.hstack(log_prob).numpy()).round(2)
        mean_flow_test = np.mean(torch.hstack(log_prob).numpy()).round(2)

    print(str(dataset))
    print("mean table")
    print(
        f"{mean_kde_train:.2f}, {mean_kde_test:.2f}, {mean_flow_train:.2f}, {mean_flow_test:.2f}"
    )
    print("median table")
    print(
        f"{med_kde_train:.2f}, {med_kde_test:.2f}, {med_flow_train:.2f}, {med_flow_test:.2f}"
    )