# Try Tiny BNN with MNIST

## 1. Dataset Loading

In [1]:
import torchvision
from torchvision import datasets
from torchvision import transforms
from torch.autograd import Variable
from torch.utils.data import DataLoader, random_split

In [2]:
full_data_train = torchvision.datasets.MNIST('./data/', 
                                        train=True, download=True,
                                        transform=torchvision.transforms.Compose
                                        ([
                                            torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize((0.5,), (0.5,))
                                        ]))

# Split the dataset into training and validation subsets
train_size = int(0.8 * len(full_data_train))
val_size = len(full_data_train) - train_size
data_train, data_valid = random_split(full_data_train, [train_size, val_size])

data_test = torchvision.datasets.MNIST('./data/', 
                                       train=False, download=True,
                                       transform=torchvision.transforms.Compose
                                       ([
                                            torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize((0.5,), (0.5,))
                                       ]))

## 2. Define MLP structure

In [3]:
import math
import torch
import torch.nn as nn
from torch.nn import Module
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import accuracy_score
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm, trange

In [4]:
import brevitas.nn as qnn
from brevitas.nn import QuantLinear, QuantReLU, QuantConv2d
from brevitas.quant.binary import SignedBinaryActPerTensorConst
from brevitas.quant.binary import SignedBinaryWeightPerTensorConst
from brevitas.inject.enum import QuantType

In [5]:
input_size = 1*28*28      
hidden1 = 64      
hidden2 = 64
hidden3 = 64
num_classes = 10  

In [6]:
class BiKA_MNIST(Module):
    def __init__(self):
        super(BiKA_MNIST, self).__init__()
        
        self.input = qnn.QuantIdentity(quant_type='binary', scaling_impl_type='const', bit_width=1, min_val=-1.0, max_val=1.0, return_quant_tensor=True)
        
        self.fc0   = qnn.QuantLinear(input_size, hidden1, weight_bit_width=1, weight_quant_type=QuantType.BINARY, bias=False)
        self.bn0   = nn.BatchNorm1d(hidden1)
        self.relu0 = qnn.QuantReLU(bit_width=1, return_quant_tensor=True)
        
        self.fc1   = qnn.QuantLinear(hidden1, hidden2, weight_bit_width=1, weight_quant_type=QuantType.BINARY, bias=False)
        self.bn1   = nn.BatchNorm1d(hidden2)
        self.relu1 = qnn.QuantReLU(bit_width=1, return_quant_tensor=True)
        
        self.fc2   = qnn.QuantLinear(hidden2, hidden3, weight_bit_width=1, weight_quant_type=QuantType.BINARY, bias=False)
        self.bn2   = nn.BatchNorm1d(hidden3)
        self.relu2 = qnn.QuantReLU(bit_width=1, return_quant_tensor=True)
        
        self.out   = qnn.QuantLinear(hidden3, num_classes, weight_bit_width=1, weight_quant_type=QuantType.BINARY, bias=False)

    def forward(self, x):
        
        out = x.reshape(x.shape[0], -1)
        out = self.input(out)
        out = self.relu0(self.bn0(self.fc0(out)))
        out = self.relu1(self.bn1(self.fc1(out)))
        out = self.relu2(self.bn2(self.fc2(out)))
        out = self.out(out)
        
        return out

## 3. Define Training Function

In [7]:
num_of_gpus = torch.cuda.device_count()
print(num_of_gpus)

# Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Setting seeds for reproducibility
torch.manual_seed(0)

2
Using device: cuda:0


<torch._C.Generator at 0x7fe34e2eaad0>

In [8]:
def display_loss_plot(losses, title="Training loss", xlabel="Iterations", ylabel="Loss"):
    x_axis = [i for i in range(len(losses))]
    plt.plot(x_axis,losses)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

In [9]:
def train_and_validate(model, train_loader, val_loader, criterion, learning_rate):
    model.train()
    
    optimizer = optim.Adam(model.parameters(), lr=learning_rate[0])
    
    for epoch in range(100):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            
        # Adjust learning rate at epoch 100
        if epoch+1 == 50:
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate[1]
                print(f"Learning rate changed to {param_group['lr']} at epoch {epoch+1}")
        
        # Adjust learning rate at epoch 150
        if epoch+1 == 75:
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate[2]
                print(f"Learning rate changed to {param_group['lr']} at epoch {epoch+1}")

        # Validation phase
        model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        val_acc = accuracy_score(all_labels, all_preds)
        print(f"Epoch [{epoch+1}/{100}], "
              f"Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Val Accuracy: {val_acc*100:.2f}%")
        
    return val_acc

## 4. Define Evaluation Function

In [10]:
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    test_acc = accuracy_score(all_labels, all_preds)
    print(f"Test Accuracy: {test_acc * 100:.2f}%")
    return test_acc

## 5. Train BNN for MNIST

In [11]:
batch_sizes = [64, 128, 256]
learning_rates = [[0.0100, 0.0010, 0.0010],
                  [0.0010, 0.0010, 0.0010],
                  [0.0010, 0.0010, 0.0001],
                  [0.0010, 0.0005, 0.0001]
                 ]

In [12]:
best_acc = 0.0
best_params = None

for batch_size, learning_rate in product(batch_sizes, learning_rates):
    print(f"Training with batch_size={batch_size}, learning_rate={learning_rate}")

    # Data loaders
    train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(data_valid, batch_size=batch_size, shuffle=False)

    # Initialize the model, loss, and optimizer
    model = BiKA_MNIST().to(device)
    criterion = nn.CrossEntropyLoss()

    # Train and validate
    val_acc = train_and_validate(model, train_loader, val_loader, criterion, learning_rate)

    # Update best parameters
    if val_acc > best_acc:
        best_acc = val_acc
        best_params = (batch_size, learning_rate)

print(f"Best Accuracy: {best_acc*100:.2f}%")
print(f"Best Parameters: Batch Size={best_params[0]}, Learning Rate={best_params[1]}")

Training with batch_size=64, learning_rate=[0.01, 0.001, 0.001]


  return super().rename(names)
  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/200], Train Loss: 1.0652, Val Accuracy: 87.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/200], Train Loss: 2.2904, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50
Epoch [50/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 75
Epoch [75/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/200], Train Loss: 2.3026, Val Accuracy: 10.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/200], Train Loss: 2.3026, Val Accuracy: 10.06%
Training with batch_size=64, learning_rate=[0.001, 0.001, 0.001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/200], Train Loss: 1.4287, Val Accuracy: 81.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/200], Train Loss: 0.5441, Val Accuracy: 87.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/200], Train Loss: 0.4321, Val Accuracy: 87.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/200], Train Loss: 0.4136, Val Accuracy: 88.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/200], Train Loss: 0.4009, Val Accuracy: 87.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/200], Train Loss: 0.3823, Val Accuracy: 88.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/200], Train Loss: 0.3714, Val Accuracy: 89.71%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/200], Train Loss: 0.3658, Val Accuracy: 90.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/200], Train Loss: 0.3478, Val Accuracy: 88.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/200], Train Loss: 0.3456, Val Accuracy: 90.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/200], Train Loss: 0.3287, Val Accuracy: 91.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/200], Train Loss: 0.3378, Val Accuracy: 90.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/200], Train Loss: 0.3324, Val Accuracy: 91.76%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/200], Train Loss: 0.3272, Val Accuracy: 90.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/200], Train Loss: 0.3351, Val Accuracy: 89.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/200], Train Loss: 0.3297, Val Accuracy: 91.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/200], Train Loss: 0.3091, Val Accuracy: 89.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/200], Train Loss: 0.3218, Val Accuracy: 90.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/200], Train Loss: 0.3095, Val Accuracy: 89.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/200], Train Loss: 0.3106, Val Accuracy: 90.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/200], Train Loss: 0.3130, Val Accuracy: 90.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/200], Train Loss: 0.3136, Val Accuracy: 88.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/200], Train Loss: 0.3173, Val Accuracy: 90.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/200], Train Loss: 0.3110, Val Accuracy: 91.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/200], Train Loss: 0.3083, Val Accuracy: 90.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/200], Train Loss: 0.3123, Val Accuracy: 91.44%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/200], Train Loss: 0.3171, Val Accuracy: 89.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/200], Train Loss: 0.3090, Val Accuracy: 89.76%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/200], Train Loss: 0.3173, Val Accuracy: 91.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/200], Train Loss: 0.3170, Val Accuracy: 90.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/200], Train Loss: 0.3084, Val Accuracy: 90.37%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/200], Train Loss: 0.3196, Val Accuracy: 91.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/200], Train Loss: 0.3006, Val Accuracy: 90.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/200], Train Loss: 0.3122, Val Accuracy: 89.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/200], Train Loss: 0.3085, Val Accuracy: 90.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/200], Train Loss: 0.3171, Val Accuracy: 90.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/200], Train Loss: 0.3046, Val Accuracy: 89.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/200], Train Loss: 0.3150, Val Accuracy: 90.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/200], Train Loss: 0.3080, Val Accuracy: 90.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/200], Train Loss: 0.3070, Val Accuracy: 90.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/200], Train Loss: 0.3015, Val Accuracy: 90.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/200], Train Loss: 0.3009, Val Accuracy: 90.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/200], Train Loss: 0.3015, Val Accuracy: 90.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/200], Train Loss: 0.3064, Val Accuracy: 91.76%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/200], Train Loss: 0.2919, Val Accuracy: 90.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/200], Train Loss: 0.2942, Val Accuracy: 88.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/200], Train Loss: 0.2933, Val Accuracy: 89.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/200], Train Loss: 0.2907, Val Accuracy: 91.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/200], Train Loss: 0.2900, Val Accuracy: 89.71%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50
Epoch [50/200], Train Loss: 0.2903, Val Accuracy: 91.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/200], Train Loss: 0.2908, Val Accuracy: 91.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/200], Train Loss: 0.2931, Val Accuracy: 91.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/200], Train Loss: 0.2865, Val Accuracy: 91.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/200], Train Loss: 0.2864, Val Accuracy: 90.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/200], Train Loss: 0.2951, Val Accuracy: 91.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/200], Train Loss: 0.2822, Val Accuracy: 91.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/200], Train Loss: 0.2845, Val Accuracy: 91.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/200], Train Loss: 0.2915, Val Accuracy: 90.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/200], Train Loss: 0.2833, Val Accuracy: 91.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/200], Train Loss: 0.2742, Val Accuracy: 90.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/200], Train Loss: 0.2847, Val Accuracy: 90.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/200], Train Loss: 0.2839, Val Accuracy: 90.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/200], Train Loss: 0.2909, Val Accuracy: 89.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/200], Train Loss: 0.2797, Val Accuracy: 90.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/200], Train Loss: 0.2854, Val Accuracy: 91.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/200], Train Loss: 0.2769, Val Accuracy: 91.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/200], Train Loss: 0.2854, Val Accuracy: 91.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/200], Train Loss: 0.2881, Val Accuracy: 90.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/200], Train Loss: 0.2823, Val Accuracy: 90.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/200], Train Loss: 0.2798, Val Accuracy: 91.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/200], Train Loss: 0.2837, Val Accuracy: 90.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/200], Train Loss: 0.2735, Val Accuracy: 91.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/200], Train Loss: 0.2753, Val Accuracy: 91.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/200], Train Loss: 0.2768, Val Accuracy: 91.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 75
Epoch [75/200], Train Loss: 0.2811, Val Accuracy: 90.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/200], Train Loss: 0.2757, Val Accuracy: 91.40%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/200], Train Loss: 0.2739, Val Accuracy: 89.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/200], Train Loss: 0.2708, Val Accuracy: 91.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/200], Train Loss: 0.2809, Val Accuracy: 91.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/200], Train Loss: 0.2813, Val Accuracy: 91.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/200], Train Loss: 0.2725, Val Accuracy: 91.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/200], Train Loss: 0.2699, Val Accuracy: 90.40%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/200], Train Loss: 0.2845, Val Accuracy: 89.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/200], Train Loss: 0.2763, Val Accuracy: 90.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/200], Train Loss: 0.2797, Val Accuracy: 89.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/200], Train Loss: 0.2749, Val Accuracy: 89.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/200], Train Loss: 0.2725, Val Accuracy: 91.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/200], Train Loss: 0.2731, Val Accuracy: 91.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/200], Train Loss: 0.2786, Val Accuracy: 91.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/200], Train Loss: 0.2673, Val Accuracy: 90.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/200], Train Loss: 0.2737, Val Accuracy: 90.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/200], Train Loss: 0.2744, Val Accuracy: 89.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/200], Train Loss: 0.2817, Val Accuracy: 91.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/200], Train Loss: 0.2801, Val Accuracy: 91.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/200], Train Loss: 0.2725, Val Accuracy: 90.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/200], Train Loss: 0.2783, Val Accuracy: 91.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/200], Train Loss: 0.2762, Val Accuracy: 92.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/200], Train Loss: 0.2771, Val Accuracy: 91.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/200], Train Loss: 0.2762, Val Accuracy: 91.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/200], Train Loss: 0.2739, Val Accuracy: 91.95%
Training with batch_size=64, learning_rate=[0.001, 0.001, 0.0001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/200], Train Loss: 1.4128, Val Accuracy: 81.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/200], Train Loss: 0.5540, Val Accuracy: 87.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/200], Train Loss: 0.4405, Val Accuracy: 88.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/200], Train Loss: 0.4148, Val Accuracy: 87.76%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/200], Train Loss: 0.4031, Val Accuracy: 89.49%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/200], Train Loss: 0.3864, Val Accuracy: 90.37%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/200], Train Loss: 0.3699, Val Accuracy: 89.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/200], Train Loss: 0.3621, Val Accuracy: 90.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/200], Train Loss: 0.3520, Val Accuracy: 89.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/200], Train Loss: 0.3425, Val Accuracy: 85.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/200], Train Loss: 0.3399, Val Accuracy: 89.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/200], Train Loss: 0.3410, Val Accuracy: 87.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/200], Train Loss: 0.3295, Val Accuracy: 91.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/200], Train Loss: 0.3258, Val Accuracy: 90.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/200], Train Loss: 0.3171, Val Accuracy: 90.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/200], Train Loss: 0.3227, Val Accuracy: 90.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/200], Train Loss: 0.3216, Val Accuracy: 89.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/200], Train Loss: 0.3123, Val Accuracy: 88.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/200], Train Loss: 0.3189, Val Accuracy: 90.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/200], Train Loss: 0.3150, Val Accuracy: 89.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/200], Train Loss: 0.3196, Val Accuracy: 90.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/200], Train Loss: 0.2999, Val Accuracy: 90.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/200], Train Loss: 0.3043, Val Accuracy: 91.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/200], Train Loss: 0.3055, Val Accuracy: 89.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/200], Train Loss: 0.3053, Val Accuracy: 89.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/200], Train Loss: 0.3057, Val Accuracy: 90.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/200], Train Loss: 0.3112, Val Accuracy: 90.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/200], Train Loss: 0.2968, Val Accuracy: 90.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/200], Train Loss: 0.2939, Val Accuracy: 91.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/200], Train Loss: 0.3064, Val Accuracy: 90.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/200], Train Loss: 0.2943, Val Accuracy: 88.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/200], Train Loss: 0.3045, Val Accuracy: 90.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/200], Train Loss: 0.3023, Val Accuracy: 91.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/200], Train Loss: 0.3031, Val Accuracy: 89.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/200], Train Loss: 0.2961, Val Accuracy: 91.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/200], Train Loss: 0.2968, Val Accuracy: 91.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/200], Train Loss: 0.2953, Val Accuracy: 91.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/200], Train Loss: 0.2954, Val Accuracy: 89.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/200], Train Loss: 0.2929, Val Accuracy: 91.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/200], Train Loss: 0.2943, Val Accuracy: 91.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/200], Train Loss: 0.2842, Val Accuracy: 91.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/200], Train Loss: 0.2908, Val Accuracy: 89.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/200], Train Loss: 0.2875, Val Accuracy: 91.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/200], Train Loss: 0.2822, Val Accuracy: 91.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/200], Train Loss: 0.2874, Val Accuracy: 90.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/200], Train Loss: 0.2895, Val Accuracy: 91.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/200], Train Loss: 0.2931, Val Accuracy: 90.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/200], Train Loss: 0.2864, Val Accuracy: 91.10%


  output_tensor = linear(x, quant_weight, quant_bias)


KeyboardInterrupt: 

## 6. Evaluate BNN for MNIST

In [None]:
train_loader = DataLoader(data_train, batch_size=best_params[0], shuffle=True)
val_loader = DataLoader(data_valid, batch_size=best_params[0], shuffle=False)
test_loader = DataLoader(data_test, batch_size=best_params[0], shuffle=False)

model = BiKA_MNIST().to(device)
criterion = nn.CrossEntropyLoss()

train_and_validate(model, train_loader, val_loader, criterion, best_params[1])

In [None]:
print(f"Best Validation Accuracy: {best_acc*100:.2f}%")
print(f"Best Parameters: Batch Size={best_params[0]}, Learning Rate={best_params[1]}")

evaluate_model(model, test_loader)