# Ablation study on Amex data

In [1]:
# imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import pandas as pd
import os
import gc
import cupy
import yaml
from datetime import datetime
import sklearn
from tqdm.auto import tqdm

from src.lib.experimentation import load_amex_numpy_data, undo_min_max_corrupt_func
import pandas as pd
import numpy as np 
import seaborn as sns
from src.lib.plotting import get_figsize, save_plot, update_plot_params


with open(os.path.join("..", "config.yaml")) as f:
    main_cfg = yaml.load(f, Loader=yaml.FullLoader)
    
with open(os.path.join("..", "src", "experiments", "configs", "experiment-config-alpha.yaml")) as f:
    amex_cfg = yaml.load(f, Loader=yaml.FullLoader)

In [2]:
X_train, y_train = load_amex_numpy_data(
    os.path.join(main_cfg['dataset_directory'], "derived", "processed-splits"),
    amex_cfg['amex_dataset']['fill'],
    lambda X, y: undo_min_max_corrupt_func(X, y, 42),
    11,
)
print(X_train.shape)

(458913, 13, 188)


In [3]:
NUM_VAL_ITEMS = int(X_train.shape[0] * 2 / 10)

train_loader = torch.utils.data.DataLoader(
    dataset=torch.utils.data.TensorDataset(
        torch.from_numpy(X_train[NUM_VAL_ITEMS:]).float(),
        torch.from_numpy(y_train[NUM_VAL_ITEMS:]).float(),
    ), 
    **amex_cfg['amex_dataset']['data_loader']
)
val_loader = torch.utils.data.DataLoader(
    dataset=torch.utils.data.TensorDataset(
        torch.from_numpy(X_train[:NUM_VAL_ITEMS]).float(),
        torch.from_numpy(y_train[:NUM_VAL_ITEMS]).float(),
    ), 
    **amex_cfg['amex_dataset']['data_loader']
)

In [9]:
from src.models.adaptive_grunet import AdaptiveGRUNet
from src.preprocessing.normalizing_flows import EDAIN_Layer

TRAIN_EDAIN_MODEL = False

edain_model = AdaptiveGRUNet(
    adaptive_layer=EDAIN_Layer(
        input_dim=177,
        invert_bijector=False,
        outlier_removal_residual_connection=True,
        batch_aware=False,
        init_sigma=0.000001,
        outlier_removal_mode='exp',
    ),
    num_features=188,
    num_cat_columns=11,
    hidden_dim=128,
    layer_dim=2,
    embedding_dim=4,
    time_series_length=13,
    dim_first=False,
)

optim = torch.optim.Adam(
    edain_model.preprocess.get_optimizer_param_list(
        base_lr=1e-3,
        # For local-aware
        # scale_lr=0.0001,
        # shift_lr=0.01,
        # outlier_lr=10.0,
        # power_lr=0.1,
        # below for global-aware
        scale_lr=0.01,
        shift_lr=0.01,
        outlier_lr=100.0,
        power_lr=10.0,
        # # below for EDAIN-KL
        # scale_lr: 0.0001
        # shift_lr: 0.01
        # outlier_lr: 10.0
        # power_lr: 0.001
    ) + [
        {'params' : edain_model.gru.parameters(), 'lr' : 1e-3},
        {'params' : edain_model.feed_forward.parameters(), 'lr' : 1e-3},
        {'params' : edain_model.emb_layers.parameters(), 'lr' : 1e-3 },
    ], lr=1e-3)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optim, milestones=[4, 7], gamma=0.1)

if TRAIN_EDAIN_MODEL:
    history = fit_model(edain_model, F.binary_cross_entropy, train_loader, val_loader, optim, scheduler, num_epochs=20, device_ids=DEV) #torch.device('cpu'))
    torch.save(edain_model.state_dict(), os.path.join(main_cfg['checkpoint_directory'], "edain_amex_checkpoint"))
else:
    edain_model = edain_model.to(torch.device('cuda', 4))
    edain_model.load_state_dict(torch.load(os.path.join(main_cfg['checkpoint_directory'], "edain_amex_checkpoint"), map_location='cuda:4'))
    edain_model.eval()

In [13]:
edain_model.preprocess.transform_list

[AdaptiveOutlierRemoval(),
 AdaptiveShift(),
 AdaptiveScale(),
 AdaptivePowerTransform()]

In [14]:
F.cross_entropy

<function torch.nn.functional.cross_entropy(input: torch.Tensor, target: torch.Tensor, weight: Optional[torch.Tensor] = None, size_average: Optional[bool] = None, ignore_index: int = -100, reduce: Optional[bool] = None, reduction: str = 'mean', label_smoothing: float = 0.0) -> torch.Tensor>

In [21]:
dev = torch.device('cuda', 4)

def evaluate_model(mod, outlier_removal=True, shift=True, scale=True, power_transform=True):
    running_vloss = 0.0
    mod = mod.to(dev)
    with torch.no_grad():
        for i, vdata in tqdm(enumerate(train_loader), desc="Evaluating model"):
            vinputs, vlabels = vdata
            vinputs, vlabels = vinputs.to(dev), vlabels.to(dev)
            
            # apply the preprocessing
            # ...
            if i == 0:
                mod.train()
            elif i == 1:
                mod.eval()
            voutputs = mod(vinputs)
            vloss = F.cross_entropy(voutputs, vlabels, reduction='mean').cpu().item()
            # vmetric = amex_metric_mod(vlabels.cpu().numpy(), voutputs.cpu().numpy())
            if i > 0:
                running_vloss += vloss
            # running_vmetric += vmetric
    avg_vloss = running_vloss / (i + 1)
    return avg_vloss


evaluate_model(edain_model, False, True, True, False)

Evaluating model: 0it [00:00, ?it/s]

1828.3663323258554