In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import time
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset
import torch

from bayesian_torch.models.dnn_to_bnn import dnn_to_bnn

import pickle

In [2]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))   # -> (32, 32, 32)
        x = self.pool(F.relu(self.conv2(x)))   # -> (64, 16, 16)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [3]:
def load_data(batch_size=54):
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.3444, 0.3809, 0.4082], std=[0.1809, 0.1331, 0.1137])
    ])

    dataset = datasets.EuroSAT(root='./data', transform=transform, download=True)


    #train_size = int(0.8 * len(dataset))
    #test_size = len(dataset) - train_size
    #train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    with open('datasplit/split_indices.pkl', 'rb') as f:
        split = pickle.load(f)
        train_dataset = Subset(dataset, split['train'])
        test_dataset = Subset(dataset, split['test'])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    return train_loader, test_loader

In [4]:
def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            correct += (pred == target).sum().item()
            total += target.size(0)

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

In [5]:
model_cnn = CNN()
model_cnn.load_state_dict(torch.load("results_eurosat/cnn_model.pth", map_location="cpu"))
model_cnn.eval()  # Set to evaluation mode

CNN(
  (conv1): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=16384, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [6]:
train_loader, test_loader = load_data()
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device= "cpu"
test_acc_cnn = evaluate_model(model_cnn, test_loader)

Test Accuracy: 84.06%


In [None]:
#decision_threshold = 0.8  # Confidence threshold for known/unknown

#bcnn_mean_probs, bcnn_classes, bcnn_confidences, bcnnKnownMask, bcnnUnknownMask = predict_with_indecision(ensemble_predict_reproduce(model_bcnn, input_tensor, n_samples=20, seed=reproduce_seed), alpha=0.8)

#bcnn_confidences, bcnn_preds = bcnn_mean_probs.max(dim=1)

In [7]:
model_bcnn = CNN()

const_bnn_prior_parameters = {
    "prior_mu": 0.0,
    "prior_sigma": 1.0,
    "posterior_mu_init": 0.0,
    "posterior_rho_init": -3.0,
    "type": "Reparameterization",  # Flipout or Reparameterization
    "moped_enable": False,  # True to initialize mu/sigma from the pretrained dnn weights
    "moped_delta": 0.5,
    }
    
dnn_to_bnn(
    model_bcnn,
    const_bnn_prior_parameters
)

model_bcnn.load_state_dict(torch.load("results_eurosat/bayesian_cnn_model.pth", map_location="cpu"))

<All keys matched successfully>

In [None]:
model_bcnn

In [8]:
def evaluate_model_bcnn(model, loader, reproduce_seed=42):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            correct += (pred == target).sum().item()
            total += target.size(0)

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

In [9]:
from tqdm import tqdm

In [None]:
def complete_evaluate_model_bcnn_ensemble_with_uncertainty(model, 
                                                           loader, 
                                                           reproduce_seed=42, 
                                                           n_samples=20, 
                                                           alpha=0.8,
                                                           accepted_accuracy=0):
    """Returns mean prediction probabilities over n posterior samples"""
    model.eval()
    correct = 0
    total = 0
    unknown_count = 0

    with torch.no_grad():
        for data, target in tqdm(loader, desc="Evaluating"):
            data, target = data.to(device), target.to(device)
            batch_preds = []
            for _ in range(n_samples):
                output = model(data)  # stochastic forward pass
                prob = F.softmax(output, dim=1)
                batch_preds.append(prob)
            mean_probs = torch.stack(batch_preds).mean(dim=0)  # [batch_size, num_classes]

            confidences, pred_classes = mean_probs.max(dim=1)
            known_mask = confidences >= alpha
            unknown_mask = ~known_mask

            correct += (pred_classes == target).sum().item()
            total += target.size(0)
            unknown_count += unknown_mask.sum().item()

        accuracy = 100 * correct / total
        indecision = unknown_count / total

        accuracy_in_presence_of_indecision = correct / (total - unknown_count) if (total - unknown_count) > 0 else 0

        # tolerance measurements
        max_accuracy = 1
        min_accuracy_for_max_penalty = 0

        def compute_tolerance(accuracy, accepted_accuracy, max_accuracy):
            if accuracy >= accepted_accuracy:
                return max(min(accuracy, max_accuracy) - accepted_accuracy, 0) / (max_accuracy - accepted_accuracy)
            else:
                return 0
            
        def compute_penalization(accuracy, accepted_accuracy, min_accuracy_for_max_penalty):
            if accuracy >= accepted_accuracy:
                return 0
            else:
                return max(0, min(1, (accepted_accuracy - accuracy) / (accepted_accuracy - min_accuracy_for_max_penalty)))
        
        tolerance_1 = compute_tolerance(accuracy, accepted_accuracy, max_accuracy)
        penalization_1 = compute_penalization(accuracy, accepted_accuracy, min_accuracy_for_max_penalty)

        # gamma
        accepted_ratio_of_certain = 0.8

        tolerance_2 = compute_tolerance(1-indecision, accepted_ratio_of_certain, max_accuracy)
        penalization_2 = compute_penalization(1-indecision, accepted_ratio_of_certain, 0.5)

        robustness_without_uncertainty = (tolerance_1 - penalization_1) / 2 + 1/2
        robustness_with_uncertainty = (tolerance_2 - penalization_2) / 2 + 1/2

        effectiveness = (accuracy * (1 - indecision)) / (indecision + 1)

        #teta = 0 #minimum accuracy accepted
        beta = accepted_accuracy * accepted_ratio_of_certain / (accepted_ratio_of_certain + 2)

        augmented_robustness = 0 #TODO

        print(f"Test Ensemble Accuracy: {accuracy:.2f}%")
        print(f"Indecision Rate: {indecision:.2f}")
        print(f"Accuracy in presence of indecision: {accuracy_in_presence_of_indecision:.2f}%")
        print(f"Tolerance: {tolerance_1:.2f}")
        print(f"Penalization: {penalization_1:.2f}")
        print(f"Robustness without uncertainty: {robustness_without_uncertainty:.2f}")
        print(f"Robustness with uncertainty: {robustness_with_uncertainty:.2f}")
        print(f"Effectiveness: {effectiveness:.2f}")

        return accuracy


In [11]:
def evaluate_model_bcnn_ensemble_with_uncertainty(model, loader, device="cpu", n_samples=20, alpha=0.8):
    """Returns mean prediction probabilities over n posterior samples"""
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in tqdm(loader, desc="Evaluating"):
            data, target = data.to(device), target.to(device)
            batch_preds = []
            for _ in range(n_samples):
                output = model(data)  # stochastic forward pass
                prob = F.softmax(output, dim=1)
                batch_preds.append(prob)
            mean_probs = torch.stack(batch_preds).mean(dim=0)  # [batch_size, num_classes]

            confidences, pred_classes = mean_probs.max(dim=1)
            known_mask = confidences >= alpha
            unknown_mask = ~known_mask

            correct += (pred_classes == target).sum().item()
            total += target.size(0)

    accuracy = 100 * correct / total
    print(f"Test Ensemble Accuracy: {accuracy:.2f}%")
    return accuracy

In [12]:
test_acc_bcnn = evaluate_model(model_bcnn, test_loader)

Test Accuracy: 61.54%


In [13]:
test_acc_cnn = evaluate_model(model_cnn, test_loader)

Test Accuracy: 84.06%


In [14]:
test_acc_bcnn_ensemble = evaluate_model_bcnn_ensemble_with_uncertainty(model_bcnn, test_loader, n_samples=20, alpha=0.8)

Evaluating: 100%|██████████| 100/100 [01:51<00:00,  1.12s/it]

Test Ensemble Accuracy: 67.63%





In [15]:
test_acc_bcnn_ensemble_complete = complete_evaluate_model_bcnn_ensemble_with_uncertainty(model_bcnn, test_loader, n_samples=20, alpha=0.8)

Evaluating: 100%|██████████| 100/100 [01:58<00:00,  1.19s/it]

Test Ensemble Accuracy: 67.81%
Indecision Rate: 0.74
Accuracy in presence of indecision: 2.58%
Tolerance: 1.00
Penalization: 0.00
Robustness without uncertainty: 1.00
Robustness with uncertainty: 0.00
Effectiveness: 10.27





In [16]:
test_acc_bcnn_ensemble_complete = complete_evaluate_model_bcnn_ensemble_with_uncertainty(model_bcnn, 
                                                                                         test_loader, 
                                                                                         n_samples=20, 
                                                                                         alpha=0.8, 
                                                                                         accepted_accuracy=0)

Evaluating: 100%|██████████| 100/100 [02:08<00:00,  1.28s/it]

Test Ensemble Accuracy: 67.63%
Indecision Rate: 0.74
Accuracy in presence of indecision: 2.65%
Tolerance: 1.00
Penalization: 0.00
Robustness without uncertainty: 1.00
Robustness with uncertainty: 0.00
Effectiveness: 9.90





# BITFLIP SIMULATION

In [17]:
import numpy as np

In [18]:
def flip_bit_in_tensor(tensor, bit_position=1, flip_count=1):
    flat = tensor.view(-1)
    idx = torch.randint(0, flat.numel(), (flip_count,))
    for i in idx:
        val = flat[i].item()
        int_val = np.frombuffer(np.float32(val).tobytes(), dtype=np.uint32)[0]
        flipped = int_val ^ (1 << bit_position)
        flipped_val = np.frombuffer(np.uint32(flipped).tobytes(), dtype=np.float32)[0]
        flat[i] = torch.tensor(flipped_val)
    return tensor

def inject_seu_conv_layer(layer, bit_position=10, flip_count=1):
    with torch.no_grad():
        layer.weight.data = flip_bit_in_tensor(layer.weight.data.clone(), bit_position, flip_count)

In [19]:
# position 30 is the biggest
original_weights = model_cnn.conv1.weight.data.clone()
inject_seu_conv_layer(model_cnn.conv1, bit_position=30, flip_count=1)
changed = not torch.equal(model_cnn.conv1.weight.data, original_weights)
print("Weights changed:", changed)
diff = (model_cnn.conv1.weight.data - original_weights).abs().sum().item()
print("Total absolute difference:", diff)
test_acc_cnn_seu = evaluate_model(model_cnn, test_loader)
with torch.no_grad():
    model_cnn.conv1.weight.data.copy_(original_weights)
#print the difference of accuracy
print("Accuracy difference for CNN after SEU injection: {:.2f}%".format(test_acc_cnn_seu - test_acc_cnn))

Weights changed: True
Total absolute difference: 2.0026096990538524e+37
Test Accuracy: 15.46%
Accuracy difference for CNN after SEU injection: -68.59%


In [20]:
def inject_seu_mu_kernel(layer, bit_position=30, flip_count=1):
    with torch.no_grad():
        layer.mu_kernel.data = flip_bit_in_tensor(layer.mu_kernel.data.clone(), bit_position, flip_count)

In [21]:
# position 30 is the biggest
original_weights = model_bcnn.conv1.mu_kernel.data.clone()
inject_seu_mu_kernel( model_bcnn.conv1, bit_position=30, flip_count=1)
changed = not torch.equal( model_bcnn.conv1.mu_kernel, original_weights)
print("Weights changed:", changed)
diff = ( model_bcnn.conv1.mu_kernel - original_weights).abs().sum().item()
print("Total absolute difference:", diff)
test_acc_bcnn_seu = evaluate_model_bcnn_ensemble_with_uncertainty(model_bcnn, test_loader, n_samples=20, alpha=0.8)
with torch.no_grad():
     model_bcnn.conv1.mu_kernel.copy_(original_weights)
#print the difference of accuracy
print("Accuracy difference after SEU injection: {:.2f}%".format(test_acc_cnn_seu - test_acc_bcnn))

Weights changed: True
Total absolute difference: 4.840707879838647e+37


Evaluating: 100%|██████████| 100/100 [02:26<00:00,  1.47s/it]

Test Ensemble Accuracy: 12.11%
Accuracy difference after SEU injection: -46.07%





In [22]:
with torch.no_grad():
     model_bcnn.conv1.mu_kernel.copy_(original_weights)

In [23]:
print("Accuracy difference after SEU injection: {:.2f}%".format(abs(test_acc_bcnn - test_acc_cnn_seu)))

Accuracy difference after SEU injection: 46.07%


In [24]:
print(dir(model_bcnn.conv1))

['T_destination', '__annotations__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_apply', '_backward_hooks', '_backward_pre_hooks', '_buffers', '_call_impl', '_compiled_call_impl', '_dnn_to_bnn_flag', '_forward_hooks', '_forward_hooks_always_called', '_forward_hooks_with_kwargs', '_forward_pre_hooks', '_forward_pre_hooks_with_kwargs', '_get_backward_hooks', '_get_backward_pre_hooks', '_get_name', '_is_full_backward_hook', '_load_from_state_dict', '_load_state_dict_post_hooks', '_load_state_dict_pre_hooks', '_maybe_warn_non_full_backward_hook', '_modules', '_named_members', '_non_persistent_buffers_set', '_parameters', '_register_loa

In [25]:
test_acc_cnn

84.05555555555556

In [26]:
test_acc_cnn_seu

15.462962962962964

In [27]:
test_acc_bcnn

61.53703703703704

In [28]:
test_acc_bcnn_seu

12.11111111111111