# Deep Kernel Learning Classification

In [None]:
%%capture
%pip install lightning-uq-box

## Imports

In [None]:
import os
import tempfile
from collections import defaultdict
from functools import partial

import torch
from lightning import Trainer
from lightning.pytorch import seed_everything
from lightning.pytorch.loggers import CSVLogger

from lightning_uq_box.datamodules import TwoMoonsDataModule
from lightning_uq_box.models import MLP
from lightning_uq_box.uq_methods import DKLClassification
from lightning_uq_box.viz_utils import (
    plot_predictions_classification,
    plot_training_metrics,
    plot_two_moons_data,
)

In [None]:
seed_everything(2)

In [None]:
# temporary directory for saving
my_temp_dir = tempfile.mkdtemp()

In [None]:
dm = TwoMoonsDataModule(batch_size=100)

In [None]:
# define data
X_train, y_train, X_test, y_test, test_grid_points = (
    dm.X_train,
    dm.y_train,
    dm.X_test,
    dm.y_test,
    dm.test_grid_points,
)

In [None]:
fig = plot_two_moons_data(X_train, y_train, X_test, y_test)

## Feature Extractor

In [None]:
feature_extractor = MLP(
    n_inputs=2, n_outputs=13, n_hidden=[50], activation_fn=torch.nn.ELU()
)

## Deep Kernel Learning Model

In [None]:
dkl_model = DKLClassification(
    feature_extractor,
    gp_kernel="RBF",
    num_classes=2,
    optimizer=partial(torch.optim.Adam, lr=1e-2),
    n_inducing_points=20,
)

## Trainer

In [None]:
logger = CSVLogger(my_temp_dir)
trainer = Trainer(
    max_epochs=100,  # number of epochs we want to train
    logger=logger,  # log training metrics for later evaluation
    log_every_n_steps=1,
    enable_checkpointing=False,
    enable_progress_bar=False,
    default_root_dir=my_temp_dir,
)

In [None]:
trainer.fit(dkl_model, dm)

## Training Metrics

In [None]:
fig = plot_training_metrics(
    os.path.join(my_temp_dir, "lightning_logs"), ["train_loss", "trainAcc"]
)

## Prediction

In [None]:
# save predictions
trainer.test(dkl_model, dm.test_dataloader())

## Evaluate Predictions

In [None]:
# due to the GP we need to predict in batches
batch_size = 200
batches = test_grid_points.chunk(
    (test_grid_points.size(0) + batch_size - 1) // batch_size
)


preds = defaultdict(list)

for batch in batches:
    for key, value in dkl_model.predict_step(batch).items():
        if key != "out":
            preds[key].append(value)

preds = {key: torch.cat(value, dim=0) for key, value in preds.items()}

In [None]:
fig = plot_predictions_classification(
    X_test,
    y_test,
    preds["pred"].argmax(-1),
    test_grid_points,
    preds["pred_uct"].cpu().numpy(),
)