### Importing Essential Packages

In [2]:
import numpy as np
from sklearn import datasets
import torch
import torch.nn as nn
import torch.optim as optim
import torchbnn as bnn
import matplotlib.pyplot as plt

### Loading the dataset 

In [3]:
dataset = datasets.load_iris()

### Splitting the dataset, converting them to tensors

In [4]:
data = dataset.data
target = dataset.target 
data_tensor=torch.from_numpy(data).float()
target_tensor=torch.from_numpy(target).long()

### Defining a simple Bayesian model

prior_mu (Float) is the mean of prior normal distribution.

prior_sigma (Float) is the sigma of prior normal distribution.

In [5]:
model = nn.Sequential(
    bnn.BayesLinear(prior_mu=0, prior_sigma=0.1, in_features=4, out_features=100),
    nn.ReLU(),
    bnn.BayesLinear(prior_mu=0, prior_sigma=0.1, in_features=100, out_features=3),
)

### Defining loss function and optimizer

The two-loss functions used here are cross-entropy loss and the BKL loss which is used to compute the KL (Kullback–Leibler) divergence of the network.

In [6]:
cross_entropy_loss = nn.CrossEntropyLoss()
klloss = bnn.BKLLoss(reduction='mean', last_layer_only=False)
klweight = 0.01
optimizer = optim.Adam(model.parameters(), lr=0.01)

### Training the model

The model is trained for 3000 steps(this would have lead to overfitting for a traditional network)

In [12]:
for step in range(3000):
    models = model(data_tensor)
    cross_entropy = cross_entropy_loss(models, target_tensor)
    kl = klloss(model)
    total_cost = cross_entropy + klweight*kl

    optimizer.zero_grad()
    total_cost.backward()
    optimizer.step()
    
_, predicted = torch.max(models.data, 1)
final = target_tensor.size(0)
correct = (predicted == target_tensor).sum()
print('- Accuracy: %f %%' % (100 * float(correct) / final))
print('- CE : %2.2f, KL : %2.2f' % (cross_entropy.item(), kl.item()))

- Accuracy: 98.000000 %
- CE : 0.06, KL : 2.86


Source: https://analyticsindiamag.com/hands-on-guide-to-bayesian-neural-network-in-classification/