# Example of Using Bayesian Neural Network

This is only an example of using Bayesian Torch. I use CIFAR10 to simplify the example. So DON'T FORGET to change the dataset to MS-ASL.

In [1]:
import torch
import torchvision
from bayesian_torch.models.dnn_to_bnn import dnn_to_bnn, get_kl_loss
from bayesian_torch.layers.variational_layers.linear_variational import LinearReparameterization

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

In [3]:
def get_layer_parameter_count(layer):
  """
  Calculates the number of parameters in a single PyTorch layer.
  Args:
    layer: A PyTorch nn.Module layer.

  Returns:
    int: The number of parameters in the layer.
  """
  return sum(p.numel() for p in layer.parameters())

In [4]:
def uint8_to_float(image_tensor):
    """Converts a uint8 tensor to a float tensor in the range [0, 1]."""
    return image_tensor.float() / 255.0

transform = torchvision.transforms.Compose([
    torchvision.transforms.Lambda(uint8_to_float),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
from utils.dataset import load_msasl

batch_size = 1
num_classes = 10

# If you want to load a subset of the MSASL Dataset with n classes
test_dataset, train_dataset, validation_dataset = load_msasl("data", num_classes)

## Change Last Layer to Bayesian FC layer
You can also use `dnn_to_bnn()` like the example IntelLabs gave. But I prefer this way.

In [None]:
model_bnn = torchvision.models.video.r3d_18(weights='DEFAULT')
ori_in_features = model_bnn.fc.in_features
ori_out_features = model_bnn.fc.out_features

model_bnn.fc = LinearReparameterization(in_features=ori_in_features,
                                out_features=num_classes,
                                prior_mean=0,
                                prior_variance=1,
                                posterior_mu_init=0,
                                posterior_rho_init=-3.0,
                                bias=True)

# Assign last layer as BNN
model_bnn.fc.dnn_to_bnn_flag = True
# If you didn't assign it, then you will get 2 outputs (out, kl) when you do the feedforward.

model_bnn.to(device)

## Variational Bayesian NN Parameters
- Number of parameters will be doubled for each layer. $(inFeatures*outFeatures + bias) * 2$
- Thus, we will have two weights for each layer, mu and rho (Section 3.2 of the paper)
- Updating the weights using KL (Section 3.4 of the paper)

In [None]:
num_params = get_layer_parameter_count(model_bnn.fc)
print("Number of Parameters:\n", num_params)
print("Mu Weight:\n", model_bnn.fc.mu_weight)
print("Rho Weight:\n", model_bnn.fc.rho_weight)

## Training Step
Like usual. The only difference is you need to compute KL Loss to update the weight

In [8]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_bnn.parameters(), lr=0.01)

In [None]:
for epoch in range(1):  # loop over the dataset multiple times

    running_loss = 0.0
    print("Number of train dataset:", train_dataset)
    for i, data in enumerate(train_dataset, 0):
        video, label, metadata = train_dataset[i]
        label = torch.LongTensor([label])
        
        # Transform video
        video = video.unsqueeze(0)
        video = transform(video)
        video = video.transpose(1, 2)
        video = video.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model_bnn(video).to("cpu")
        kl = get_kl_loss(model_bnn)
        loss = criterion(outputs, label)
        loss = loss + kl / batch_size
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        print(f'[Epoch {epoch + 1}, Batch {i + 1:5d}] loss: {running_loss:.3f}')
        running_loss = 0.0

print('Finished Training')