# Dependencies

In [None]:
!pip install torch_uncertainty



In [None]:
!pip install blitz-bayesian-pytorch



In [None]:
from pathlib import Path

import torch
from torch import nn, optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.optim.lr_scheduler import LambdaLR

from torch_uncertainty import TUTrainer
from torch_uncertainty.datamodules import MNISTDataModule
from torch_uncertainty.losses import ELBOLoss
from torch_uncertainty.models.lenet import bayesian_lenet
from torch_uncertainty.models import mc_dropout
from torch_uncertainty.routines import ClassificationRoutine

from blitz.modules import BayesianLinear
from blitz.utils import variational_estimator
import scipy.stats as st

from pathlib import Path
from safetensors.torch import load_file
from torchvision.datasets import MNIST

from google.colab import drive
drive.flush_and_unmount()  # Unmount Google Drive
drive.mount('/content/drive')  # Remount Google Drive
import os


Mounted at /content/drive


# OptuNet Posterior Approximation

In [None]:
# Constants
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_PATH = "data"

# Parameters from paper
EPOCHS = 60
BATCH_SIZE = 64
LEARNING_RATE = 0.04
#WEIGHT_DECAY = 2e-4

NUM_WORKERS = 4
## OptuNet params
DROPOUT_RATE = 0.2 # last layer dropout rate

## Load Data

In [None]:
# Load MNIST data
root = Path(DATA_PATH)
datamodule = MNISTDataModule(root=root, batch_size=BATCH_SIZE, eval_ood=False, num_workers=NUM_WORKERS)

## OptuNet Model

In [None]:
# the variational_estimator decorator adjusts the model to compute and optimize
# the Evidence Lower Bound (ELBO)
@variational_estimator
class OptuNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        # Add layers for OptuNet (use Section C.2.1 from the paper for details)
        # Layers: Conv2D (out_ch=2, ks=4, groups=1) -> Max Pooling (ks=3, stride=3) -> ReLU -> Conv2D (out_ch=10, ks=5, groups=2) -> Average Pooling -> ReLU -> Linear 10x10
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=4, groups=1, bias=False)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=3)
        self.conv2 = nn.Conv2d(in_channels=2, out_channels=10, kernel_size=5, groups=2, bias=False)
        self.pool2 = nn.AvgPool2d(kernel_size=2)
        self.fc1 = nn.Linear(in_features=10, out_features=10)
        #self.fc1 = nn.Linear(in_features=10 * 2 * 2, out_features=10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.pool1(self.conv1(x)))  # First conv, max pooling, ReLU
        x = self.relu(self.pool2(self.conv2(x)))  # Second conv, avg pooling, ReLU
        x = x.mean(dim=[2, 3])
        x = self.fc1(x)  # Linear layer
        return x


## Train / Test

In [None]:
class CustomClassificationRoutine(ClassificationRoutine):
    def __init__(self, lr_scheduler, num_samples, kl_weight, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.lr_scheduler = lr_scheduler
        #self.epoch_outputs = []  # Store outputs here if needed
        self.num_samples = num_samples
        self.kl_weight = kl_weight

    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)

        # Calculate ELBO using sample_elbo
        elbo = self.model.sample_elbo(
            inputs=inputs,
            labels=targets,
            criterion=nn.CrossEntropyLoss(),
            sample_nbr=self.num_samples,
            complexity_cost_weight=self.kl_weight
        )

        self.log("train_elbo", elbo)
        return elbo

    def on_train_epoch_end(self):
        # Step the scheduler if it exists
        if self.lr_scheduler:
            self.lr_scheduler.step()

        # Optionally, process self.epoch_outputs here
        #self.epoch_outputs.clear()  # Clear outputs for the next epoch



In [None]:

def optim_lenet(model: nn.Module):
    optimizer = optim.SGD(
        model.parameters(),
        lr=0.04
    )
    return optimizer

# learning rate scheduler to  decay
#the learning rate twice during training, at epochs 15 and 30, dividing the learning rate by 2.
"""
def scheduler_lenet(optimizer):
    scheduler = MultiStepLR(
        optimizer,
        milestones=[15, 30],  # Epochs at which to decay the learning rate
        gamma=0.5,            # Factor by which to multiply the learning rate
    )
    return scheduler
"""
def warmup_cosine_scheduler(optimizer, warmup_steps, total_steps, min_lr=0, max_lr=0.04):
    # Define the learning rate scheduler as a Lambda function
    def lr_lambda(epoch):
        if epoch < warmup_steps:
            # Linear warmup: Increase from 0 to max_lr
            return float(epoch) / float(max(1, warmup_steps))
        else:
            # Cosine decay after warmup
            progress = (epoch - warmup_steps) / float(max(1, total_steps - warmup_steps))
            return min_lr + 0.5 * (max_lr - min_lr) * (1 + torch.cos(torch.tensor(torch.pi * progress)))

    scheduler = LambdaLR(optimizer, lr_lambda)
    return scheduler

#trainer = TUTrainer(accelerator="gpu", enable_progress_bar=False, max_epochs=60)
trainer = TUTrainer(accelerator="cpu", enable_progress_bar=False, max_epochs=60)

# model
#model = load_optunet_model(version=1000)
model = OptuNet(num_classes=datamodule.num_classes)

optimizer = optim_lenet(model)
#scheduler = scheduler_lenet(optimizer)
# Warmup and cosine decay setup
warmup_steps = 5
total_steps = 60
scheduler = warmup_cosine_scheduler(optimizer, warmup_steps, total_steps)

routine = CustomClassificationRoutine(
    model=model,
    num_classes=datamodule.num_classes,
    loss=None, # computed by sample_elbo in training_step()
    optim_recipe=optimizer,
    lr_scheduler=scheduler,
    num_samples = 3,
    kl_weight=1/100000
    #is_ensemble=True
)

trainer.fit(model=routine, datamodule=datamodule)
results = trainer.test(model=routine, datamodule=datamodule)

#save state dictionary on drive (model parameters)
model_path = "/content/drive/MyDrive/optunet_trained_model_cosine_dec.pth"
torch.save(model.state_dict(), model_path)
print(f"Model_ saved to {model_path}")

INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: 
  | Name             | Type             | Params | Mode 
--------------------------------------------------------------
0 | model            | OptuNet          | 392    | train
1 | format_batch_fn  | Identity         | 0      | train
2 | val_cls_metrics  | MetricCollection | 0      | train
3 | test_cls_metrics | MetricCollection | 0      | train
4 | test_id_entropy  | Entropy          | 0      | train
5 | mixup            | Identity         | 0      | train
--------------------------------------------------------------
392       Trainable params
0         Non-trainable params
392       Total params
0.002     Total es

Model_ saved to /content/drive/MyDrive/optunet_trained_model_cosine_dec.pth


## Confidence Interval Evaluation Function
* Sample predictions from your Bayesian model (OptuNet) 3 times for each input.
* Calculate the mean and standard deviation of these predictions.
* Use these statistics to construct a confidence interval, assuming a Gaussian distribution.




In [None]:
import torch.nn.functional as F

def evaluate_confidence_interval(model, dataloader, confidence=0.95):
    model.eval()

    all_preds = []
    all_targets = []
    lower_bounds = []
    upper_bounds = []

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs = inputs.to(DEVICE)
            targets = targets.to(DEVICE)

            # For Monte carlo estimation of the ELBO using 3 samples
            preds = torch.stack([model(inputs) for _ in range(3)], dim=0)

            # Calculate mean and standard deviation
            preds_mean = preds.mean(dim=0)
            preds_std = preds.std(dim=0)

            # Apply softmax to the mean predictions for probabilities
            preds_mean = F.softmax(preds_mean, dim=1)


            # Compute confidence intervals
            z_value = st.norm.ppf(1 - (1 - confidence) / 2)  #dynamically computing the z-score for a given confidence level (e.g., 95%, 99%).
            ci_lower = preds_mean - z_value * preds_std
            ci_upper = preds_mean + z_value * preds_std

            all_preds.append(preds_mean)
            all_targets.append(targets)
            lower_bounds.append(ci_lower)
            upper_bounds.append(ci_upper)

    # Concatenate results for all batches
    all_preds = torch.cat(all_preds)
    all_targets = torch.cat(all_targets)
    lower_bounds = torch.cat(lower_bounds)
    upper_bounds = torch.cat(upper_bounds)

    return all_preds, all_targets, lower_bounds, upper_bounds


In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DATA_PATH = "data"
BATCH_SIZE = 64
NUM_WORKERS = 4

In [None]:
# Download dataset to the specified path
MNIST(root=DATA_PATH, train=True, download=True)
MNIST(root=DATA_PATH, train=False, download=True)

Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test

In [None]:
datamodule = MNISTDataModule(
    root=Path(DATA_PATH),
    batch_size=BATCH_SIZE,
    eval_ood=False,
    num_workers=NUM_WORKERS
)


In [None]:
from torch.utils.data import DataLoader

In [None]:
datamodule.setup(stage='test')
test_dataset = datamodule.test  # Ensure `self.test` exists and is initialized correctly.
test_dataloader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
)


In [None]:
from sklearn.metrics import precision_recall_curve, auc
from sklearn.metrics import roc_curve
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.calibration import calibration_curve

In [None]:
def aupr_score(predictions, labels):
    predictions = predictions.cpu().numpy()  # Convert to numpy
    labels = labels.cpu().numpy() # Convert to numpy

    n_classes = predictions.shape[1]
    precision = {}
    recall = {}
    aupr = {}

    for i in range(n_classes):
        # Binarize the labels for class i
        binary_labels = (labels == i).astype(int)
        precision[i], recall[i], _ = precision_recall_curve(binary_labels, predictions[:, i])
        aupr[i] = auc(recall[i], precision[i])

    # Optional: Aggregate AUPR
    mean_aupr = np.mean(list(aupr.values()))
    print(f"Mean AUPR: {mean_aupr}")

In [None]:
def fpr95_score(predictions, labels):
    predictions = predictions.cpu().numpy()  # Convert to numpy
    labels = labels.cpu().numpy() # Convert to numpy

    # Calculate predicted classes and confidences
    predicted_classes = np.argmax(predictions, axis=1)  # Class with highest probability
    confidences = np.max(predictions, axis=1)           # Confidence scores (max probability)
    binary_labels = (predicted_classes == labels).astype(int) # Determine binary labels (1 for correct, 0 for incorrect)
    fpr, tpr, thresholds = roc_curve(binary_labels, confidences)

    # Find the threshold where TPR is closest to 95%
    idx = np.where(tpr >= 0.95)[0][0]
    #return fpr[idx]
    print(f"FPR95: {fpr[idx]}")

In [None]:
def ace_score(predictions, labels, n_bins=10):
    # Convert predictions and labels to numpy arrays
    predicted_probs = predictions.cpu().numpy()
    true_labels = labels.cpu().numpy()

    # One-hot encode true labels for multi-class calibration
    num_classes = predicted_probs.shape[1]
    true_labels_one_hot = np.eye(num_classes)[true_labels]  # Shape: (num_samples, num_classes)

    # Initialize ACE
    ace = 0.0

    # Loop over each class
    for class_idx in range(num_classes):
        # Get predicted probabilities and true labels for the current class
        prob_pred = predicted_probs[:, class_idx]
        prob_true = true_labels_one_hot[:, class_idx]

        # Compute calibration curve
        fraction_of_positives, mean_predicted_value = calibration_curve(prob_true, prob_pred, n_bins=n_bins)

        # Compute ACE for this class
        ace += np.mean(np.abs(fraction_of_positives - mean_predicted_value))

    # Average over all classes
    ace /= num_classes
    print(f"ACE score: {ace}")


In [None]:
def monte_carlo_sampling(model, data_loader, num_samples=100):
    # model.eval()
    predictions = []

    for _ in range(num_samples):
        sampled_preds = []
        for inputs, _ in data_loader:
            # inputs = inputs.cuda()
            with torch.no_grad():
                outputs = model(inputs)
                sampled_preds.append(outputs.cpu().numpy())
        predictions.append(np.concatenate(sampled_preds, axis=0))

    return np.array(predictions) # Shape: (num_samples, num_examples, num_classes)

In [None]:
def target_model_predictions(models, data_loader):
    all_predictions = []

    for model in models:
        model.eval()
        preds = []
        for inputs, _ in data_loader:
            inputs = inputs.cuda()
            with torch.no_grad():
                outputs = model(inputs)  # Logits
                preds.append(outputs.cpu().numpy())
        all_predictions.append(np.concatenate(preds, axis=0))  # Combine batches

    return np.array(all_predictions)  # Shape: (num_models, num_datapoints, num_classes

In [None]:
def calculate_mmd(model, posterior_models, test_dataset, num_samples=100):
    # # Posterior estimation with weights?
    # target_weights = generate_target_samples(posterior_models)
    # source_weights = generate_source_samples(model, test_dataset, num_samples=num_samples)

    # Posterior estimation with predictions?
    target_preds = target_model_predictions(posterior_models, test_dataset)
    source_preds = monte_carlo_sampling(model, test_dataset, num_samples=num_samples)
    target_avg = np.mean(target_preds, axis=1)
    source_avg = np.mean(source_preds, axis=1)

    mmd_preds = mmdagg(
        X=source_avg,
        Y=target_avg,
        alpha=0.05,
        kernel="laplace_gaussian",
        number_bandwidths=10,
        weights_type="uniform",
        B1=2000,
        B2=2000,
        B3=50,
        seed=42424242
    )

    return None, mmd_preds

In [None]:
#(reinitializing the optunet architecture)
model = OptuNet(num_classes=datamodule.num_classes)
#load state dictionary
model_path = "/content/drive/MyDrive/optunet_trained_model_cosine_dec.pth"
model.load_state_dict(torch.load(model_path))
model.to(DEVICE)  # Send model to appropriate device
print(f"Model loaded successfully!")
preds, targets, lower_bounds, upper_bounds = evaluate_confidence_interval(model, test_dataloader, confidence=0.95)
aupr_score(preds, targets)
fpr95_score(preds, targets)
ace_score(preds, targets)

Model loaded successfully!


  model.load_state_dict(torch.load(model_path))


Mean AUPR: 0.7674309046280484
FPR95: 0.7889972144846796
ACE score: 0.09389052589950675


In [None]:
def load_target_models(version: int):
    posterior_models = []
    num_models = 75
    for i in range(num_models):
      #Load the model corresponding to the given version.
      model = OptuNet(num_classes=datamodule.num_classes)
      #path = Path(f"models/mnist-optunet-0-8191/version_{version}.safetensors")
      #notebook_dir = Path("/content/drive/MyDrive/DL,\ adv/project")
      path = f"/content/drive/MyDrive/DL, adv/project/our_models/model_{i}.pth"

      print(f"os.path.exists(path): {os.path.exists(path)}")

      if not os.path.exists(path):
          raise ValueError("File does not exist")

      state_dict = load_file(path)

      model.load_state_dict(state_dict=state_dict)
      return posterior_models