# Try Tiny BNN with MNIST

## 1. Dataset Loading

In [1]:
import torchvision
from torchvision import datasets
from torchvision import transforms
from torch.autograd import Variable
from torch.utils.data import DataLoader, random_split

In [2]:
full_data_train = torchvision.datasets.MNIST('./data/', 
                                        train=True, download=True,
                                        transform=torchvision.transforms.Compose
                                        ([
                                            torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize((0.5,), (0.5,))
                                        ]))

# Split the dataset into training and validation subsets
train_size = int(0.8 * len(full_data_train))
val_size = len(full_data_train) - train_size
data_train, data_valid = random_split(full_data_train, [train_size, val_size])

data_test = torchvision.datasets.MNIST('./data/', 
                                       train=False, download=True,
                                       transform=torchvision.transforms.Compose
                                       ([
                                            torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize((0.5,), (0.5,))
                                       ]))

## 2. Define MLP structure

In [3]:
import math
import torch
import torch.nn as nn
from torch.nn import Module
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import accuracy_score
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm, trange

In [4]:
import brevitas.nn as qnn
from brevitas.nn import QuantLinear, QuantReLU, QuantConv2d
from brevitas.quant.binary import SignedBinaryActPerTensorConst
from brevitas.quant.binary import SignedBinaryWeightPerTensorConst
from brevitas.inject.enum import QuantType

In [5]:
input_size = 1*28*28      
hidden1 = 512      
hidden2 = 512
hidden3 = 512
num_classes = 10  

In [6]:
class BiKA_MNIST(Module):
    def __init__(self):
        super(BiKA_MNIST, self).__init__()
        
        self.input = qnn.QuantIdentity(quant_type='binary', scaling_impl_type='const', bit_width=1, min_val=-1.0, max_val=1.0, return_quant_tensor=True)
        
        self.fc0   = qnn.QuantLinear(input_size, hidden1, weight_bit_width=1, weight_quant_type=QuantType.BINARY, bias=False)
        self.bn0   = nn.BatchNorm1d(hidden1)
        self.relu0 = qnn.QuantReLU(bit_width=1, return_quant_tensor=True)
        
        self.fc1   = qnn.QuantLinear(hidden1, hidden2, weight_bit_width=1, weight_quant_type=QuantType.BINARY, bias=False)
        self.bn1   = nn.BatchNorm1d(hidden2)
        self.relu1 = qnn.QuantReLU(bit_width=1, return_quant_tensor=True)
        
        self.fc2   = qnn.QuantLinear(hidden2, hidden3, weight_bit_width=1, weight_quant_type=QuantType.BINARY, bias=False)
        self.bn2   = nn.BatchNorm1d(hidden3)
        self.relu2 = qnn.QuantReLU(bit_width=1, return_quant_tensor=True)
        
        self.out   = qnn.QuantLinear(hidden3, num_classes, weight_bit_width=1, weight_quant_type=QuantType.BINARY, bias=False)

    def forward(self, x):
        
        out = x.reshape(x.shape[0], -1)
        out = self.input(out)
        out = self.relu0(self.bn0(self.fc0(out)))
        out = self.relu1(self.bn1(self.fc1(out)))
        out = self.relu2(self.bn2(self.fc2(out)))
        out = self.out(out)
        
        return out

## 3. Define Training Function

In [7]:
num_of_gpus = torch.cuda.device_count()
print(num_of_gpus)

# Check for GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Setting seeds for reproducibility
torch.manual_seed(0)

2
Using device: cuda:0


<torch._C.Generator at 0x7f6cb39bb9f0>

In [8]:
def display_loss_plot(losses, title="Training loss", xlabel="Iterations", ylabel="Loss"):
    x_axis = [i for i in range(len(losses))]
    plt.plot(x_axis,losses)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

In [9]:
def train_and_validate(model, train_loader, val_loader, criterion, learning_rate):
    model.train()
    
    optimizer = optim.Adam(model.parameters(), lr=learning_rate[0])
    
    for epoch in range(100):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            
        # Adjust learning rate at epoch 100
        if epoch+1 == 50:
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate[1]
                print(f"Learning rate changed to {param_group['lr']} at epoch {epoch+1}")
        
        # Adjust learning rate at epoch 150
        if epoch+1 == 75:
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate[2]
                print(f"Learning rate changed to {param_group['lr']} at epoch {epoch+1}")

        # Validation phase
        model.eval()
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        val_acc = accuracy_score(all_labels, all_preds)
        print(f"Epoch [{epoch+1}/{100}], "
              f"Train Loss: {running_loss/len(train_loader):.4f}, "
              f"Val Accuracy: {val_acc*100:.2f}%")
        
    return val_acc

## 4. Define Evaluation Function

In [10]:
def evaluate_model(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    test_acc = accuracy_score(all_labels, all_preds)
    print(f"Test Accuracy: {test_acc * 100:.2f}%")
    return test_acc

## 5. Train BNN for MNIST

In [11]:
batch_sizes = [64, 128, 256]
learning_rates = [[0.0100, 0.0010, 0.0010],
                  [0.0010, 0.0010, 0.0010],
                  [0.0010, 0.0010, 0.0001],
                  [0.0010, 0.0005, 0.0001]
                 ]

In [12]:
best_acc = 0.0
best_params = None

for batch_size, learning_rate in product(batch_sizes, learning_rates):
    print(f"Training with batch_size={batch_size}, learning_rate={learning_rate}")

    # Data loaders
    train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(data_valid, batch_size=batch_size, shuffle=False)

    # Initialize the model, loss, and optimizer
    model = BiKA_MNIST().to(device)
    criterion = nn.CrossEntropyLoss()

    # Train and validate
    val_acc = train_and_validate(model, train_loader, val_loader, criterion, learning_rate)

    # Update best parameters
    if val_acc > best_acc:
        best_acc = val_acc
        best_params = (batch_size, learning_rate)

print(f"Best Accuracy: {best_acc*100:.2f}%")
print(f"Best Parameters: Batch Size={best_params[0]}, Learning Rate={best_params[1]}")

Training with batch_size=64, learning_rate=[0.01, 0.001, 0.001]


  return super().rename(names)


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 2.9023, Val Accuracy: 84.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 3.6613, Val Accuracy: 77.36%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 1.7800, Val Accuracy: 83.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 1.3325, Val Accuracy: 77.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.6930, Val Accuracy: 87.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.4068, Val Accuracy: 89.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.3442, Val Accuracy: 89.49%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.3987, Val Accuracy: 88.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.4785, Val Accuracy: 84.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.5327, Val Accuracy: 82.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.5890, Val Accuracy: 86.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.6452, Val Accuracy: 77.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 1.1775, Val Accuracy: 38.13%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 2.0820, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 2.3018, Val Accuracy: 9.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 2.3017, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 2.3025, Val Accuracy: 9.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 2.3036, Val Accuracy: 9.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 2.3037, Val Accuracy: 9.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 2.3060, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 2.3050, Val Accuracy: 18.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 2.3060, Val Accuracy: 9.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 2.3072, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 2.3071, Val Accuracy: 9.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 2.3100, Val Accuracy: 9.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 2.3069, Val Accuracy: 9.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 2.3078, Val Accuracy: 9.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 2.3056, Val Accuracy: 9.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 2.3097, Val Accuracy: 10.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 2.3095, Val Accuracy: 9.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 2.3051, Val Accuracy: 9.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 2.3090, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 2.3072, Val Accuracy: 18.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 2.3137, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 2.3074, Val Accuracy: 18.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 2.3070, Val Accuracy: 10.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 2.3091, Val Accuracy: 9.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 2.3110, Val Accuracy: 18.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 2.3095, Val Accuracy: 9.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 2.3134, Val Accuracy: 10.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 2.3109, Val Accuracy: 19.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 2.3077, Val Accuracy: 10.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 2.3197, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 2.3089, Val Accuracy: 9.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 2.3080, Val Accuracy: 9.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 2.3130, Val Accuracy: 10.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 2.3104, Val Accuracy: 10.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 2.3113, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 2.3092, Val Accuracy: 18.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 2.3133, Val Accuracy: 10.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 2.2922, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 2.2676, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 2.2656, Val Accuracy: 9.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 2.2662, Val Accuracy: 9.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 2.2607, Val Accuracy: 19.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 2.2770, Val Accuracy: 18.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 2.2724, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 2.2708, Val Accuracy: 19.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 2.2657, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 2.2610, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 2.2711, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 2.2769, Val Accuracy: 9.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 2.2635, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 2.2722, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 2.2675, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 2.2675, Val Accuracy: 18.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 2.2721, Val Accuracy: 9.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 2.2731, Val Accuracy: 18.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 2.2720, Val Accuracy: 18.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 2.2742, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 2.2769, Val Accuracy: 18.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 2.2716, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 2.2747, Val Accuracy: 19.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 2.2752, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 75


Epoch [75/100], Train Loss: 2.2778, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 2.2809, Val Accuracy: 18.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 2.2827, Val Accuracy: 18.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 2.2829, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 2.2813, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 2.2835, Val Accuracy: 19.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 2.2842, Val Accuracy: 19.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 2.2867, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 2.2858, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 2.2895, Val Accuracy: 19.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 2.2905, Val Accuracy: 18.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 2.2908, Val Accuracy: 19.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 2.2895, Val Accuracy: 10.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 2.2919, Val Accuracy: 18.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 2.2912, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 2.2920, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 2.2920, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 2.2913, Val Accuracy: 18.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 2.2913, Val Accuracy: 19.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 2.2922, Val Accuracy: 10.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 2.2919, Val Accuracy: 9.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 2.2934, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 2.2897, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 2.2925, Val Accuracy: 18.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 2.2926, Val Accuracy: 11.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 2.2931, Val Accuracy: 19.60%
Training with batch_size=64, learning_rate=[0.001, 0.001, 0.001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 1.5290, Val Accuracy: 78.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.5682, Val Accuracy: 88.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.3690, Val Accuracy: 90.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.3081, Val Accuracy: 91.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.2872, Val Accuracy: 92.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2560, Val Accuracy: 92.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2430, Val Accuracy: 92.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2245, Val Accuracy: 93.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2253, Val Accuracy: 93.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.2078, Val Accuracy: 91.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.2034, Val Accuracy: 93.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.1970, Val Accuracy: 93.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.2045, Val Accuracy: 93.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.1877, Val Accuracy: 93.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1992, Val Accuracy: 92.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.1995, Val Accuracy: 93.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.2043, Val Accuracy: 92.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.1970, Val Accuracy: 94.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1936, Val Accuracy: 93.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.2024, Val Accuracy: 92.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.2026, Val Accuracy: 92.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1931, Val Accuracy: 93.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1918, Val Accuracy: 91.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1950, Val Accuracy: 92.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1945, Val Accuracy: 93.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.1954, Val Accuracy: 93.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1923, Val Accuracy: 91.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1908, Val Accuracy: 93.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1831, Val Accuracy: 92.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1893, Val Accuracy: 92.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1912, Val Accuracy: 93.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.2012, Val Accuracy: 93.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1903, Val Accuracy: 93.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1895, Val Accuracy: 92.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1887, Val Accuracy: 92.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1927, Val Accuracy: 93.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.1881, Val Accuracy: 93.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1854, Val Accuracy: 92.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1904, Val Accuracy: 93.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.1920, Val Accuracy: 92.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1962, Val Accuracy: 92.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1905, Val Accuracy: 92.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1994, Val Accuracy: 92.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.2020, Val Accuracy: 91.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.1978, Val Accuracy: 91.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.1934, Val Accuracy: 93.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.2005, Val Accuracy: 90.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.2017, Val Accuracy: 93.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.2047, Val Accuracy: 93.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 0.2044, Val Accuracy: 91.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.2013, Val Accuracy: 93.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.2097, Val Accuracy: 93.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.2100, Val Accuracy: 90.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.2024, Val Accuracy: 93.11%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.2034, Val Accuracy: 93.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.2028, Val Accuracy: 93.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.2043, Val Accuracy: 92.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.2099, Val Accuracy: 91.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.2107, Val Accuracy: 90.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.2147, Val Accuracy: 92.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.2166, Val Accuracy: 92.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.2149, Val Accuracy: 93.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.2125, Val Accuracy: 92.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.2156, Val Accuracy: 92.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.2168, Val Accuracy: 92.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.2199, Val Accuracy: 91.40%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.2140, Val Accuracy: 91.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.2168, Val Accuracy: 91.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.2275, Val Accuracy: 92.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.2127, Val Accuracy: 93.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.2242, Val Accuracy: 91.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.2226, Val Accuracy: 92.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.2205, Val Accuracy: 92.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.2273, Val Accuracy: 90.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 75


Epoch [75/100], Train Loss: 0.2152, Val Accuracy: 89.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.2251, Val Accuracy: 92.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.2262, Val Accuracy: 92.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.2259, Val Accuracy: 91.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.2277, Val Accuracy: 92.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.2226, Val Accuracy: 92.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.2236, Val Accuracy: 91.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.2286, Val Accuracy: 91.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.2268, Val Accuracy: 92.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.2242, Val Accuracy: 90.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.2272, Val Accuracy: 92.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.2324, Val Accuracy: 91.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.2331, Val Accuracy: 92.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.2365, Val Accuracy: 92.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.2355, Val Accuracy: 91.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.2304, Val Accuracy: 92.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.2379, Val Accuracy: 92.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.2343, Val Accuracy: 91.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.2301, Val Accuracy: 92.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.2310, Val Accuracy: 91.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.2359, Val Accuracy: 92.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.2329, Val Accuracy: 90.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.2356, Val Accuracy: 92.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.2346, Val Accuracy: 92.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.2283, Val Accuracy: 93.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.2284, Val Accuracy: 92.32%
Training with batch_size=64, learning_rate=[0.001, 0.001, 0.0001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 1.4578, Val Accuracy: 80.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.5589, Val Accuracy: 90.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.3416, Val Accuracy: 89.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.2876, Val Accuracy: 92.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.2681, Val Accuracy: 93.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2519, Val Accuracy: 93.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2436, Val Accuracy: 90.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2310, Val Accuracy: 92.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2287, Val Accuracy: 93.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.2211, Val Accuracy: 93.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.2066, Val Accuracy: 92.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.2126, Val Accuracy: 92.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.2093, Val Accuracy: 93.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.2150, Val Accuracy: 92.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1979, Val Accuracy: 93.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.2039, Val Accuracy: 92.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.2142, Val Accuracy: 91.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.2027, Val Accuracy: 93.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1999, Val Accuracy: 93.36%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.1947, Val Accuracy: 91.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.2065, Val Accuracy: 92.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1916, Val Accuracy: 91.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1942, Val Accuracy: 92.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1903, Val Accuracy: 92.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1912, Val Accuracy: 93.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.2014, Val Accuracy: 92.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1925, Val Accuracy: 93.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1980, Val Accuracy: 92.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1907, Val Accuracy: 92.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1834, Val Accuracy: 93.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1900, Val Accuracy: 94.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.1838, Val Accuracy: 92.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1939, Val Accuracy: 93.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1937, Val Accuracy: 93.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1882, Val Accuracy: 92.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1976, Val Accuracy: 92.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.2061, Val Accuracy: 94.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1992, Val Accuracy: 94.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1984, Val Accuracy: 93.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.2015, Val Accuracy: 92.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1949, Val Accuracy: 93.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1921, Val Accuracy: 93.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1991, Val Accuracy: 93.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.2019, Val Accuracy: 92.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.2101, Val Accuracy: 92.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.2090, Val Accuracy: 91.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.2092, Val Accuracy: 92.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.2184, Val Accuracy: 93.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.2065, Val Accuracy: 93.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 0.1997, Val Accuracy: 92.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.2038, Val Accuracy: 92.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.2132, Val Accuracy: 91.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.2105, Val Accuracy: 93.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.2049, Val Accuracy: 91.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.2108, Val Accuracy: 92.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.2086, Val Accuracy: 92.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.2131, Val Accuracy: 92.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.2049, Val Accuracy: 92.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.2141, Val Accuracy: 91.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.2051, Val Accuracy: 92.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.2112, Val Accuracy: 92.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.2226, Val Accuracy: 92.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.2159, Val Accuracy: 90.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.2130, Val Accuracy: 93.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.2127, Val Accuracy: 93.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.2176, Val Accuracy: 93.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.2167, Val Accuracy: 92.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.2153, Val Accuracy: 92.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.2214, Val Accuracy: 91.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.2251, Val Accuracy: 91.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.2204, Val Accuracy: 93.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.2171, Val Accuracy: 92.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.2307, Val Accuracy: 91.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.2302, Val Accuracy: 91.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0001 at epoch 75


Epoch [75/100], Train Loss: 0.2225, Val Accuracy: 93.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.1242, Val Accuracy: 95.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.1120, Val Accuracy: 94.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.1112, Val Accuracy: 94.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.1076, Val Accuracy: 95.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.1091, Val Accuracy: 94.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.1050, Val Accuracy: 95.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.1038, Val Accuracy: 95.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.1059, Val Accuracy: 95.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.1018, Val Accuracy: 94.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.1022, Val Accuracy: 95.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.1009, Val Accuracy: 95.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.1001, Val Accuracy: 94.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.1019, Val Accuracy: 94.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.1007, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.1009, Val Accuracy: 95.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.1003, Val Accuracy: 94.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.0989, Val Accuracy: 95.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.0966, Val Accuracy: 95.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.0986, Val Accuracy: 95.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.0980, Val Accuracy: 95.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.0950, Val Accuracy: 94.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.0989, Val Accuracy: 94.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.0956, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.0977, Val Accuracy: 95.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.0942, Val Accuracy: 94.72%
Training with batch_size=64, learning_rate=[0.001, 0.0005, 0.0001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 1.4655, Val Accuracy: 81.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.5283, Val Accuracy: 89.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.3464, Val Accuracy: 90.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.2920, Val Accuracy: 91.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.2739, Val Accuracy: 90.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2486, Val Accuracy: 90.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2349, Val Accuracy: 91.36%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2353, Val Accuracy: 93.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2245, Val Accuracy: 93.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.2234, Val Accuracy: 92.86%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.2227, Val Accuracy: 93.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.2192, Val Accuracy: 93.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.2228, Val Accuracy: 92.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.2197, Val Accuracy: 92.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.2201, Val Accuracy: 92.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.2222, Val Accuracy: 92.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.2314, Val Accuracy: 91.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.2155, Val Accuracy: 90.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.2118, Val Accuracy: 93.40%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.2089, Val Accuracy: 93.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.2128, Val Accuracy: 92.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.2161, Val Accuracy: 92.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.2179, Val Accuracy: 91.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.2167, Val Accuracy: 91.37%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.2206, Val Accuracy: 92.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.2204, Val Accuracy: 92.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.2163, Val Accuracy: 93.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.2098, Val Accuracy: 91.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.2195, Val Accuracy: 93.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.2234, Val Accuracy: 92.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.2236, Val Accuracy: 91.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.2229, Val Accuracy: 91.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.2264, Val Accuracy: 91.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.2321, Val Accuracy: 89.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.2232, Val Accuracy: 92.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.2300, Val Accuracy: 92.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.2300, Val Accuracy: 92.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.2281, Val Accuracy: 92.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.2310, Val Accuracy: 92.37%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.2454, Val Accuracy: 92.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.2364, Val Accuracy: 92.40%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.2299, Val Accuracy: 91.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.2307, Val Accuracy: 93.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.2366, Val Accuracy: 92.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.2404, Val Accuracy: 93.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.2394, Val Accuracy: 92.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.2425, Val Accuracy: 92.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.2388, Val Accuracy: 92.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.2362, Val Accuracy: 93.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0005 at epoch 50


Epoch [50/100], Train Loss: 0.2528, Val Accuracy: 91.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.1794, Val Accuracy: 93.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.1759, Val Accuracy: 93.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.1782, Val Accuracy: 93.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.1785, Val Accuracy: 92.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.1757, Val Accuracy: 93.71%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.1741, Val Accuracy: 93.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.1749, Val Accuracy: 93.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.1782, Val Accuracy: 93.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.1739, Val Accuracy: 92.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.1726, Val Accuracy: 94.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.1688, Val Accuracy: 93.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.1742, Val Accuracy: 93.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.1751, Val Accuracy: 92.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.1673, Val Accuracy: 92.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.1661, Val Accuracy: 92.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.1724, Val Accuracy: 92.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.1720, Val Accuracy: 93.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.1718, Val Accuracy: 94.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.1677, Val Accuracy: 93.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.1659, Val Accuracy: 93.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.1736, Val Accuracy: 92.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.1718, Val Accuracy: 93.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.1702, Val Accuracy: 93.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.1616, Val Accuracy: 93.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0001 at epoch 75


Epoch [75/100], Train Loss: 0.1692, Val Accuracy: 92.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.1091, Val Accuracy: 95.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.1032, Val Accuracy: 95.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.1052, Val Accuracy: 94.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.1022, Val Accuracy: 95.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.1028, Val Accuracy: 94.86%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.0989, Val Accuracy: 94.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.1003, Val Accuracy: 94.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.1026, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.0998, Val Accuracy: 94.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.0975, Val Accuracy: 95.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.0965, Val Accuracy: 94.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.0979, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.0996, Val Accuracy: 94.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.1001, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.0970, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.0977, Val Accuracy: 95.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.0999, Val Accuracy: 95.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.0956, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.0999, Val Accuracy: 94.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.1005, Val Accuracy: 94.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.0951, Val Accuracy: 94.76%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.0980, Val Accuracy: 95.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.0944, Val Accuracy: 93.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.0966, Val Accuracy: 94.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.0964, Val Accuracy: 95.02%
Training with batch_size=128, learning_rate=[0.01, 0.001, 0.001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 2.3639, Val Accuracy: 68.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 6.4008, Val Accuracy: 83.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 3.7762, Val Accuracy: 83.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 2.6697, Val Accuracy: 85.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 2.1562, Val Accuracy: 86.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 1.8208, Val Accuracy: 87.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 1.5539, Val Accuracy: 85.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 1.2620, Val Accuracy: 88.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.8585, Val Accuracy: 87.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.7852, Val Accuracy: 88.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.6840, Val Accuracy: 89.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.5411, Val Accuracy: 90.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.4242, Val Accuracy: 89.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.3303, Val Accuracy: 92.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.2599, Val Accuracy: 92.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.2344, Val Accuracy: 92.79%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.2306, Val Accuracy: 92.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.2569, Val Accuracy: 89.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.2789, Val Accuracy: 89.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.3175, Val Accuracy: 89.76%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.3795, Val Accuracy: 81.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.4129, Val Accuracy: 85.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.4341, Val Accuracy: 88.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.4359, Val Accuracy: 88.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.4126, Val Accuracy: 86.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.4449, Val Accuracy: 86.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.4467, Val Accuracy: 82.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.4606, Val Accuracy: 85.13%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.4793, Val Accuracy: 81.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.4790, Val Accuracy: 81.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.4905, Val Accuracy: 85.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.4965, Val Accuracy: 86.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.4808, Val Accuracy: 87.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.4657, Val Accuracy: 85.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.4772, Val Accuracy: 84.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.4880, Val Accuracy: 84.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.4984, Val Accuracy: 84.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.5009, Val Accuracy: 86.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.4839, Val Accuracy: 85.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.5213, Val Accuracy: 86.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.4890, Val Accuracy: 82.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.5135, Val Accuracy: 85.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.5036, Val Accuracy: 83.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.5189, Val Accuracy: 81.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.5194, Val Accuracy: 83.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.5006, Val Accuracy: 85.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.5400, Val Accuracy: 81.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.5117, Val Accuracy: 85.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.4968, Val Accuracy: 84.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 0.5213, Val Accuracy: 86.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.3484, Val Accuracy: 89.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.3277, Val Accuracy: 89.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.3297, Val Accuracy: 89.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.3351, Val Accuracy: 90.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.3340, Val Accuracy: 89.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.3305, Val Accuracy: 90.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.3297, Val Accuracy: 89.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.3292, Val Accuracy: 90.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.3246, Val Accuracy: 89.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.3262, Val Accuracy: 90.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.3246, Val Accuracy: 89.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.3280, Val Accuracy: 88.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.3271, Val Accuracy: 89.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.3263, Val Accuracy: 89.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.3237, Val Accuracy: 90.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.3202, Val Accuracy: 90.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.3231, Val Accuracy: 89.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.3229, Val Accuracy: 90.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.3187, Val Accuracy: 90.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.3171, Val Accuracy: 90.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.3147, Val Accuracy: 89.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.3194, Val Accuracy: 90.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.3194, Val Accuracy: 90.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.3202, Val Accuracy: 90.13%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 75


Epoch [75/100], Train Loss: 0.3177, Val Accuracy: 90.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.3177, Val Accuracy: 90.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.3283, Val Accuracy: 89.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.3191, Val Accuracy: 89.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.3122, Val Accuracy: 90.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.3185, Val Accuracy: 90.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.3165, Val Accuracy: 90.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.3141, Val Accuracy: 90.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.3173, Val Accuracy: 89.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.3149, Val Accuracy: 90.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.3107, Val Accuracy: 89.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.3192, Val Accuracy: 90.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.3178, Val Accuracy: 90.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.3133, Val Accuracy: 90.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.3155, Val Accuracy: 89.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.3129, Val Accuracy: 90.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.3077, Val Accuracy: 90.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.3105, Val Accuracy: 90.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.3129, Val Accuracy: 90.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.3074, Val Accuracy: 90.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.3124, Val Accuracy: 90.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.3113, Val Accuracy: 89.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.3222, Val Accuracy: 90.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.3102, Val Accuracy: 90.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.3137, Val Accuracy: 90.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.3124, Val Accuracy: 89.44%
Training with batch_size=128, learning_rate=[0.001, 0.001, 0.001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 1.8013, Val Accuracy: 51.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.6734, Val Accuracy: 85.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.3826, Val Accuracy: 88.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.3151, Val Accuracy: 90.79%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.2837, Val Accuracy: 92.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2437, Val Accuracy: 93.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2227, Val Accuracy: 93.13%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2082, Val Accuracy: 93.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.1927, Val Accuracy: 93.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.1859, Val Accuracy: 94.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.1774, Val Accuracy: 93.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.1716, Val Accuracy: 92.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.1724, Val Accuracy: 94.40%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.1669, Val Accuracy: 92.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1659, Val Accuracy: 94.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.1560, Val Accuracy: 92.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.1442, Val Accuracy: 93.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.1484, Val Accuracy: 95.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1484, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.1513, Val Accuracy: 94.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.1442, Val Accuracy: 94.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1474, Val Accuracy: 94.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1424, Val Accuracy: 95.44%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1521, Val Accuracy: 94.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1501, Val Accuracy: 93.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.1379, Val Accuracy: 94.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1492, Val Accuracy: 94.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1495, Val Accuracy: 94.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1447, Val Accuracy: 94.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1494, Val Accuracy: 94.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1424, Val Accuracy: 94.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.1503, Val Accuracy: 94.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1464, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1431, Val Accuracy: 94.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1402, Val Accuracy: 93.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1437, Val Accuracy: 93.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.1414, Val Accuracy: 94.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1345, Val Accuracy: 94.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1318, Val Accuracy: 94.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.1387, Val Accuracy: 94.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1441, Val Accuracy: 94.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1412, Val Accuracy: 94.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1449, Val Accuracy: 95.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.1392, Val Accuracy: 94.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.1393, Val Accuracy: 92.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.1375, Val Accuracy: 94.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.1396, Val Accuracy: 93.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.1354, Val Accuracy: 92.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.1395, Val Accuracy: 94.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 0.1331, Val Accuracy: 94.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.1447, Val Accuracy: 93.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.1419, Val Accuracy: 94.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.1406, Val Accuracy: 94.71%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.1432, Val Accuracy: 94.44%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.1488, Val Accuracy: 94.44%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.1412, Val Accuracy: 94.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.1430, Val Accuracy: 94.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.1477, Val Accuracy: 93.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.1440, Val Accuracy: 94.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.1491, Val Accuracy: 95.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.1480, Val Accuracy: 94.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.1444, Val Accuracy: 94.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.1430, Val Accuracy: 94.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.1563, Val Accuracy: 93.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.1522, Val Accuracy: 94.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.1463, Val Accuracy: 94.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.1454, Val Accuracy: 94.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.1456, Val Accuracy: 93.44%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.1488, Val Accuracy: 94.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.1622, Val Accuracy: 94.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.1443, Val Accuracy: 93.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.1568, Val Accuracy: 93.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.1503, Val Accuracy: 93.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.1525, Val Accuracy: 93.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 75


Epoch [75/100], Train Loss: 0.1507, Val Accuracy: 92.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.1521, Val Accuracy: 93.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.1498, Val Accuracy: 93.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.1580, Val Accuracy: 92.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.1569, Val Accuracy: 94.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.1571, Val Accuracy: 93.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.1575, Val Accuracy: 93.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.1611, Val Accuracy: 94.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.1581, Val Accuracy: 92.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.1530, Val Accuracy: 92.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.1707, Val Accuracy: 92.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.1551, Val Accuracy: 94.13%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.1562, Val Accuracy: 93.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.1626, Val Accuracy: 94.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.1562, Val Accuracy: 94.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.1597, Val Accuracy: 94.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.1578, Val Accuracy: 93.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.1525, Val Accuracy: 92.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.1706, Val Accuracy: 93.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.1495, Val Accuracy: 93.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.1618, Val Accuracy: 94.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.1562, Val Accuracy: 94.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.1621, Val Accuracy: 93.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.1546, Val Accuracy: 94.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.1596, Val Accuracy: 93.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.1625, Val Accuracy: 93.63%
Training with batch_size=128, learning_rate=[0.001, 0.001, 0.0001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 1.8238, Val Accuracy: 49.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.6953, Val Accuracy: 88.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.4008, Val Accuracy: 89.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.3495, Val Accuracy: 91.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.2959, Val Accuracy: 90.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2555, Val Accuracy: 91.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2353, Val Accuracy: 92.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2256, Val Accuracy: 92.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2091, Val Accuracy: 93.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.1927, Val Accuracy: 94.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.1858, Val Accuracy: 94.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.1789, Val Accuracy: 94.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.1752, Val Accuracy: 94.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.1684, Val Accuracy: 94.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1587, Val Accuracy: 93.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.1569, Val Accuracy: 92.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.1519, Val Accuracy: 93.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.1466, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1504, Val Accuracy: 94.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.1418, Val Accuracy: 94.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.1449, Val Accuracy: 95.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1471, Val Accuracy: 94.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1414, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1404, Val Accuracy: 94.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1471, Val Accuracy: 92.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.1460, Val Accuracy: 95.13%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1427, Val Accuracy: 92.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1346, Val Accuracy: 94.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1369, Val Accuracy: 94.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1357, Val Accuracy: 94.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1388, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.1422, Val Accuracy: 92.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1366, Val Accuracy: 94.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1376, Val Accuracy: 94.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1414, Val Accuracy: 93.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1399, Val Accuracy: 94.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.1370, Val Accuracy: 95.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1299, Val Accuracy: 94.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1313, Val Accuracy: 94.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.1390, Val Accuracy: 93.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1425, Val Accuracy: 94.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1382, Val Accuracy: 94.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1423, Val Accuracy: 95.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.1337, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.1383, Val Accuracy: 94.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.1398, Val Accuracy: 95.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.1383, Val Accuracy: 95.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.1344, Val Accuracy: 95.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.1345, Val Accuracy: 93.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 0.1312, Val Accuracy: 94.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.1280, Val Accuracy: 94.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.1344, Val Accuracy: 93.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.1369, Val Accuracy: 94.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.1343, Val Accuracy: 94.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.1266, Val Accuracy: 94.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.1339, Val Accuracy: 94.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.1348, Val Accuracy: 93.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.1279, Val Accuracy: 94.49%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.1405, Val Accuracy: 94.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.1286, Val Accuracy: 94.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.1310, Val Accuracy: 94.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.1188, Val Accuracy: 94.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.1264, Val Accuracy: 94.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.1308, Val Accuracy: 94.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.1309, Val Accuracy: 93.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.1320, Val Accuracy: 94.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.1308, Val Accuracy: 93.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.1346, Val Accuracy: 95.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.1376, Val Accuracy: 94.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.1276, Val Accuracy: 93.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.1419, Val Accuracy: 94.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.1353, Val Accuracy: 93.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.1357, Val Accuracy: 94.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.1371, Val Accuracy: 93.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0001 at epoch 75


Epoch [75/100], Train Loss: 0.1304, Val Accuracy: 94.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.0663, Val Accuracy: 96.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.0514, Val Accuracy: 96.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.0489, Val Accuracy: 96.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.0467, Val Accuracy: 96.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.0441, Val Accuracy: 96.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.0436, Val Accuracy: 96.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.0413, Val Accuracy: 96.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.0428, Val Accuracy: 96.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.0419, Val Accuracy: 96.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.0430, Val Accuracy: 96.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.0398, Val Accuracy: 96.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.0388, Val Accuracy: 96.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.0392, Val Accuracy: 96.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.0379, Val Accuracy: 96.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.0387, Val Accuracy: 96.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.0360, Val Accuracy: 96.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.0374, Val Accuracy: 96.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.0348, Val Accuracy: 96.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.0360, Val Accuracy: 96.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.0345, Val Accuracy: 96.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.0357, Val Accuracy: 96.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.0342, Val Accuracy: 96.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.0348, Val Accuracy: 96.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.0331, Val Accuracy: 96.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.0332, Val Accuracy: 96.50%
Training with batch_size=128, learning_rate=[0.001, 0.0005, 0.0001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 1.8581, Val Accuracy: 46.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.7243, Val Accuracy: 87.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.4291, Val Accuracy: 89.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.3528, Val Accuracy: 90.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.3040, Val Accuracy: 91.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2706, Val Accuracy: 91.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2483, Val Accuracy: 93.37%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2330, Val Accuracy: 91.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2087, Val Accuracy: 93.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.2066, Val Accuracy: 94.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.1914, Val Accuracy: 93.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.1835, Val Accuracy: 93.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.1826, Val Accuracy: 94.11%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.1664, Val Accuracy: 94.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1764, Val Accuracy: 94.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.1725, Val Accuracy: 93.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.1698, Val Accuracy: 94.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.1757, Val Accuracy: 94.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1654, Val Accuracy: 93.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.1614, Val Accuracy: 94.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.1606, Val Accuracy: 93.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1679, Val Accuracy: 94.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1606, Val Accuracy: 94.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1639, Val Accuracy: 94.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1529, Val Accuracy: 94.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.1625, Val Accuracy: 94.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1607, Val Accuracy: 94.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1589, Val Accuracy: 92.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1629, Val Accuracy: 93.37%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1616, Val Accuracy: 93.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1614, Val Accuracy: 94.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.1586, Val Accuracy: 93.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1584, Val Accuracy: 93.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1535, Val Accuracy: 93.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1518, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1548, Val Accuracy: 94.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.1475, Val Accuracy: 94.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1548, Val Accuracy: 94.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1525, Val Accuracy: 94.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.1564, Val Accuracy: 93.86%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1513, Val Accuracy: 93.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1450, Val Accuracy: 94.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1590, Val Accuracy: 92.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.1479, Val Accuracy: 94.37%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.1485, Val Accuracy: 94.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.1566, Val Accuracy: 93.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.1502, Val Accuracy: 91.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.1501, Val Accuracy: 94.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.1497, Val Accuracy: 93.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0005 at epoch 50


Epoch [50/100], Train Loss: 0.1522, Val Accuracy: 94.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.1031, Val Accuracy: 95.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.0942, Val Accuracy: 95.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.0923, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.0937, Val Accuracy: 95.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.0903, Val Accuracy: 95.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.0928, Val Accuracy: 95.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.0899, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.0925, Val Accuracy: 95.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.0907, Val Accuracy: 95.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.0954, Val Accuracy: 94.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.0862, Val Accuracy: 95.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.0854, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.0879, Val Accuracy: 95.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.0851, Val Accuracy: 95.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.0837, Val Accuracy: 95.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.0856, Val Accuracy: 94.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.0854, Val Accuracy: 95.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.0859, Val Accuracy: 95.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.0815, Val Accuracy: 95.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.0878, Val Accuracy: 95.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.0846, Val Accuracy: 95.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.0854, Val Accuracy: 95.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.0828, Val Accuracy: 95.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.0826, Val Accuracy: 95.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0001 at epoch 75


Epoch [75/100], Train Loss: 0.0827, Val Accuracy: 95.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.0455, Val Accuracy: 96.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.0365, Val Accuracy: 96.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.0352, Val Accuracy: 96.44%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.0347, Val Accuracy: 96.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.0323, Val Accuracy: 96.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.0330, Val Accuracy: 96.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.0310, Val Accuracy: 96.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.0326, Val Accuracy: 96.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.0312, Val Accuracy: 96.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.0315, Val Accuracy: 96.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.0317, Val Accuracy: 96.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.0295, Val Accuracy: 96.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.0299, Val Accuracy: 96.36%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.0321, Val Accuracy: 96.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.0303, Val Accuracy: 96.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.0292, Val Accuracy: 96.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.0313, Val Accuracy: 96.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.0309, Val Accuracy: 96.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.0289, Val Accuracy: 96.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.0296, Val Accuracy: 96.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.0272, Val Accuracy: 96.40%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.0288, Val Accuracy: 96.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.0277, Val Accuracy: 96.44%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.0256, Val Accuracy: 96.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.0304, Val Accuracy: 96.40%
Training with batch_size=256, learning_rate=[0.01, 0.001, 0.001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 2.3655, Val Accuracy: 37.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 11.1451, Val Accuracy: 82.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 5.8616, Val Accuracy: 83.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 4.5801, Val Accuracy: 82.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 3.9774, Val Accuracy: 84.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 3.4068, Val Accuracy: 82.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 3.0866, Val Accuracy: 87.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 2.7616, Val Accuracy: 87.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 2.6028, Val Accuracy: 88.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 2.3450, Val Accuracy: 87.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 1.9604, Val Accuracy: 88.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 1.9324, Val Accuracy: 85.86%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 2.0299, Val Accuracy: 85.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 2.2637, Val Accuracy: 84.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 2.4210, Val Accuracy: 85.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 2.8084, Val Accuracy: 83.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 2.8549, Val Accuracy: 80.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 2.7881, Val Accuracy: 81.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 2.8879, Val Accuracy: 80.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 2.7365, Val Accuracy: 83.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 2.5244, Val Accuracy: 84.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 2.6973, Val Accuracy: 77.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 2.4166, Val Accuracy: 85.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 2.1569, Val Accuracy: 80.11%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 2.2007, Val Accuracy: 79.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 2.2741, Val Accuracy: 83.79%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 2.2350, Val Accuracy: 77.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 2.1180, Val Accuracy: 82.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 1.8584, Val Accuracy: 83.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 1.9315, Val Accuracy: 83.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 1.9315, Val Accuracy: 81.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 1.9423, Val Accuracy: 80.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 1.7890, Val Accuracy: 82.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 1.8448, Val Accuracy: 83.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 1.7180, Val Accuracy: 81.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 1.7753, Val Accuracy: 83.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 1.4580, Val Accuracy: 84.71%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 1.5048, Val Accuracy: 83.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 1.5326, Val Accuracy: 85.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 1.5679, Val Accuracy: 83.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 1.7815, Val Accuracy: 79.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 1.5183, Val Accuracy: 86.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 1.5471, Val Accuracy: 85.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 1.4015, Val Accuracy: 79.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 1.4332, Val Accuracy: 78.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 1.3618, Val Accuracy: 78.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 1.3904, Val Accuracy: 84.79%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 1.3652, Val Accuracy: 83.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 1.2640, Val Accuracy: 79.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 1.2878, Val Accuracy: 79.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.6564, Val Accuracy: 87.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.5579, Val Accuracy: 90.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.5125, Val Accuracy: 89.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.4754, Val Accuracy: 90.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.4489, Val Accuracy: 90.79%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.4244, Val Accuracy: 89.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.4095, Val Accuracy: 89.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.4030, Val Accuracy: 90.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.3803, Val Accuracy: 90.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.3660, Val Accuracy: 89.86%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.3624, Val Accuracy: 90.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.3388, Val Accuracy: 89.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.3404, Val Accuracy: 91.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.3213, Val Accuracy: 91.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.3128, Val Accuracy: 90.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.3164, Val Accuracy: 91.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.3043, Val Accuracy: 90.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.2971, Val Accuracy: 90.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.2961, Val Accuracy: 91.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.3127, Val Accuracy: 90.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.3317, Val Accuracy: 89.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.4107, Val Accuracy: 89.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.4482, Val Accuracy: 86.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.5312, Val Accuracy: 85.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 75


Epoch [75/100], Train Loss: 0.5291, Val Accuracy: 85.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.5549, Val Accuracy: 85.36%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.6001, Val Accuracy: 78.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.6534, Val Accuracy: 81.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.5882, Val Accuracy: 85.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.4752, Val Accuracy: 87.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.4162, Val Accuracy: 88.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.4107, Val Accuracy: 88.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.3759, Val Accuracy: 90.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.3595, Val Accuracy: 89.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.3548, Val Accuracy: 89.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.3475, Val Accuracy: 89.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.3528, Val Accuracy: 90.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.3386, Val Accuracy: 90.41%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.3150, Val Accuracy: 90.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.3172, Val Accuracy: 90.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.3332, Val Accuracy: 91.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.3120, Val Accuracy: 90.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.3090, Val Accuracy: 90.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.3231, Val Accuracy: 89.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.3226, Val Accuracy: 90.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.3070, Val Accuracy: 91.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.3057, Val Accuracy: 90.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.3010, Val Accuracy: 91.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.2886, Val Accuracy: 91.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.2801, Val Accuracy: 91.60%
Training with batch_size=256, learning_rate=[0.001, 0.001, 0.001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 2.0220, Val Accuracy: 47.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.8033, Val Accuracy: 87.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.4469, Val Accuracy: 88.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.3492, Val Accuracy: 90.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.2966, Val Accuracy: 91.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2735, Val Accuracy: 92.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2357, Val Accuracy: 92.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2219, Val Accuracy: 93.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2093, Val Accuracy: 92.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.1936, Val Accuracy: 93.79%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.1767, Val Accuracy: 94.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.1765, Val Accuracy: 93.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.1650, Val Accuracy: 94.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.1487, Val Accuracy: 95.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1592, Val Accuracy: 94.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.1604, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.1424, Val Accuracy: 93.42%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.1348, Val Accuracy: 94.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1353, Val Accuracy: 94.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.1399, Val Accuracy: 95.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.1387, Val Accuracy: 95.36%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1339, Val Accuracy: 95.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1358, Val Accuracy: 94.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1264, Val Accuracy: 95.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1202, Val Accuracy: 95.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.1364, Val Accuracy: 94.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1374, Val Accuracy: 94.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1267, Val Accuracy: 94.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1298, Val Accuracy: 94.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1292, Val Accuracy: 93.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1277, Val Accuracy: 95.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.1233, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1282, Val Accuracy: 94.81%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1216, Val Accuracy: 95.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1144, Val Accuracy: 94.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1231, Val Accuracy: 94.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.1240, Val Accuracy: 94.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1198, Val Accuracy: 95.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1229, Val Accuracy: 95.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.1246, Val Accuracy: 95.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1236, Val Accuracy: 95.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1313, Val Accuracy: 94.35%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1223, Val Accuracy: 94.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.1256, Val Accuracy: 95.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.1324, Val Accuracy: 95.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.1312, Val Accuracy: 94.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.1200, Val Accuracy: 95.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.1322, Val Accuracy: 93.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.1221, Val Accuracy: 94.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 0.1274, Val Accuracy: 94.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.1197, Val Accuracy: 93.86%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.1209, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.1335, Val Accuracy: 94.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.1347, Val Accuracy: 94.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.1183, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.1278, Val Accuracy: 95.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.1323, Val Accuracy: 94.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.1238, Val Accuracy: 94.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.1251, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.1282, Val Accuracy: 93.52%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.1275, Val Accuracy: 94.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.1199, Val Accuracy: 92.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.1350, Val Accuracy: 95.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.1217, Val Accuracy: 94.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.1252, Val Accuracy: 94.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.1224, Val Accuracy: 94.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.1206, Val Accuracy: 94.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.1227, Val Accuracy: 94.86%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.1268, Val Accuracy: 95.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.1152, Val Accuracy: 95.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.1188, Val Accuracy: 94.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.1181, Val Accuracy: 94.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.1172, Val Accuracy: 95.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.1194, Val Accuracy: 94.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 75


Epoch [75/100], Train Loss: 0.1139, Val Accuracy: 94.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.1266, Val Accuracy: 94.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.1203, Val Accuracy: 95.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.1161, Val Accuracy: 94.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.1259, Val Accuracy: 95.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.1252, Val Accuracy: 95.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.1174, Val Accuracy: 94.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.1231, Val Accuracy: 94.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.1123, Val Accuracy: 95.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.1148, Val Accuracy: 95.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.1148, Val Accuracy: 95.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.1178, Val Accuracy: 94.11%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.1166, Val Accuracy: 94.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.1254, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.1144, Val Accuracy: 94.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.1162, Val Accuracy: 95.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.1217, Val Accuracy: 94.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.1099, Val Accuracy: 94.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.1125, Val Accuracy: 94.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.1158, Val Accuracy: 95.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.1152, Val Accuracy: 95.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.1161, Val Accuracy: 95.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.1092, Val Accuracy: 94.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.1105, Val Accuracy: 95.64%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.1077, Val Accuracy: 94.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.1160, Val Accuracy: 94.89%
Training with batch_size=256, learning_rate=[0.001, 0.001, 0.0001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 2.0478, Val Accuracy: 48.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.8200, Val Accuracy: 86.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.4484, Val Accuracy: 89.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.3691, Val Accuracy: 92.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.3093, Val Accuracy: 92.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2689, Val Accuracy: 92.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2433, Val Accuracy: 92.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2240, Val Accuracy: 92.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2086, Val Accuracy: 93.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.1966, Val Accuracy: 93.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.1777, Val Accuracy: 92.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.1710, Val Accuracy: 94.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.1625, Val Accuracy: 94.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.1585, Val Accuracy: 94.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1520, Val Accuracy: 95.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.1402, Val Accuracy: 94.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.1353, Val Accuracy: 94.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.1318, Val Accuracy: 94.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1384, Val Accuracy: 94.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.1375, Val Accuracy: 94.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.1371, Val Accuracy: 94.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1300, Val Accuracy: 94.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1348, Val Accuracy: 93.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1445, Val Accuracy: 93.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1486, Val Accuracy: 93.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.1461, Val Accuracy: 94.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1435, Val Accuracy: 94.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1475, Val Accuracy: 95.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1328, Val Accuracy: 94.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1527, Val Accuracy: 93.44%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1451, Val Accuracy: 94.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.1441, Val Accuracy: 93.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1456, Val Accuracy: 93.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1436, Val Accuracy: 94.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1436, Val Accuracy: 94.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1476, Val Accuracy: 94.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.1429, Val Accuracy: 93.51%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1548, Val Accuracy: 94.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1430, Val Accuracy: 94.62%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.1390, Val Accuracy: 94.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1352, Val Accuracy: 94.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1351, Val Accuracy: 93.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1350, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.1416, Val Accuracy: 94.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.1484, Val Accuracy: 93.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.1474, Val Accuracy: 93.71%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.1517, Val Accuracy: 93.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.1488, Val Accuracy: 94.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.1469, Val Accuracy: 93.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.001 at epoch 50


Epoch [50/100], Train Loss: 0.1501, Val Accuracy: 93.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.1451, Val Accuracy: 95.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.1273, Val Accuracy: 94.70%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.1450, Val Accuracy: 94.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.1357, Val Accuracy: 95.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.1374, Val Accuracy: 93.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.1511, Val Accuracy: 94.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.1546, Val Accuracy: 93.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.1421, Val Accuracy: 93.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.1419, Val Accuracy: 94.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.1388, Val Accuracy: 93.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.1444, Val Accuracy: 93.15%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.1402, Val Accuracy: 94.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.1377, Val Accuracy: 93.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.1504, Val Accuracy: 94.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.1392, Val Accuracy: 94.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.1360, Val Accuracy: 94.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.1353, Val Accuracy: 94.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.1364, Val Accuracy: 94.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.1317, Val Accuracy: 94.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.1333, Val Accuracy: 94.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.1469, Val Accuracy: 94.66%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.1354, Val Accuracy: 93.27%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.1349, Val Accuracy: 93.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.1338, Val Accuracy: 93.71%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0001 at epoch 75


Epoch [75/100], Train Loss: 0.1317, Val Accuracy: 94.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.0672, Val Accuracy: 96.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.0518, Val Accuracy: 96.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.0484, Val Accuracy: 96.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.0472, Val Accuracy: 96.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.0458, Val Accuracy: 96.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.0433, Val Accuracy: 96.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.0430, Val Accuracy: 96.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.0436, Val Accuracy: 96.36%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.0455, Val Accuracy: 96.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.0423, Val Accuracy: 96.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.0405, Val Accuracy: 96.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.0409, Val Accuracy: 96.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.0412, Val Accuracy: 96.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.0386, Val Accuracy: 96.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.0382, Val Accuracy: 96.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.0399, Val Accuracy: 96.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.0396, Val Accuracy: 96.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.0373, Val Accuracy: 96.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.0391, Val Accuracy: 96.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.0358, Val Accuracy: 96.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.0359, Val Accuracy: 96.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.0361, Val Accuracy: 96.48%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.0338, Val Accuracy: 96.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.0379, Val Accuracy: 96.45%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.0356, Val Accuracy: 96.34%
Training with batch_size=256, learning_rate=[0.001, 0.0005, 0.0001]


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 2.0160, Val Accuracy: 50.65%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.8013, Val Accuracy: 86.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.4395, Val Accuracy: 89.69%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.3652, Val Accuracy: 88.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.3080, Val Accuracy: 92.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2692, Val Accuracy: 91.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2491, Val Accuracy: 92.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2196, Val Accuracy: 93.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2002, Val Accuracy: 93.74%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.1856, Val Accuracy: 93.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.1768, Val Accuracy: 94.47%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.1694, Val Accuracy: 94.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.1658, Val Accuracy: 93.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.1534, Val Accuracy: 94.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1545, Val Accuracy: 94.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.1484, Val Accuracy: 95.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.1359, Val Accuracy: 94.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.1304, Val Accuracy: 94.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1181, Val Accuracy: 94.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.1233, Val Accuracy: 94.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.1254, Val Accuracy: 94.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1256, Val Accuracy: 94.58%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1243, Val Accuracy: 95.09%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1278, Val Accuracy: 95.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1271, Val Accuracy: 93.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.1261, Val Accuracy: 95.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1292, Val Accuracy: 94.54%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1206, Val Accuracy: 94.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1233, Val Accuracy: 95.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1173, Val Accuracy: 95.79%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1247, Val Accuracy: 95.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.1225, Val Accuracy: 93.38%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1266, Val Accuracy: 94.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1150, Val Accuracy: 94.86%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1227, Val Accuracy: 94.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1171, Val Accuracy: 95.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.1187, Val Accuracy: 95.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1168, Val Accuracy: 95.61%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1194, Val Accuracy: 94.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.1229, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1269, Val Accuracy: 95.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1261, Val Accuracy: 95.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1214, Val Accuracy: 95.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.1184, Val Accuracy: 95.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.1210, Val Accuracy: 94.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.1262, Val Accuracy: 94.11%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.1255, Val Accuracy: 94.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.1179, Val Accuracy: 95.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.1140, Val Accuracy: 94.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0005 at epoch 50


Epoch [50/100], Train Loss: 0.1170, Val Accuracy: 94.26%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.0784, Val Accuracy: 96.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.0640, Val Accuracy: 96.31%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.0590, Val Accuracy: 95.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.0576, Val Accuracy: 96.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.0595, Val Accuracy: 96.11%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.0588, Val Accuracy: 95.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.0573, Val Accuracy: 96.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.0565, Val Accuracy: 95.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.0547, Val Accuracy: 96.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.0547, Val Accuracy: 96.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.0563, Val Accuracy: 96.13%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.0556, Val Accuracy: 95.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.0539, Val Accuracy: 96.34%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.0577, Val Accuracy: 96.07%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.0575, Val Accuracy: 95.57%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.0534, Val Accuracy: 95.82%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.0588, Val Accuracy: 95.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.0535, Val Accuracy: 96.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.0478, Val Accuracy: 96.16%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.0576, Val Accuracy: 95.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.0542, Val Accuracy: 96.24%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.0500, Val Accuracy: 96.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.0512, Val Accuracy: 96.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.0527, Val Accuracy: 96.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0001 at epoch 75


Epoch [75/100], Train Loss: 0.0458, Val Accuracy: 95.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.0207, Val Accuracy: 96.97%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.0141, Val Accuracy: 97.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.0130, Val Accuracy: 97.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.0121, Val Accuracy: 97.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.0130, Val Accuracy: 97.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.0114, Val Accuracy: 97.12%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.0115, Val Accuracy: 97.20%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.0125, Val Accuracy: 97.13%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.0141, Val Accuracy: 97.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.0122, Val Accuracy: 97.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.0109, Val Accuracy: 97.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.0105, Val Accuracy: 96.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.0134, Val Accuracy: 96.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.0116, Val Accuracy: 97.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.0097, Val Accuracy: 96.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.0117, Val Accuracy: 96.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.0121, Val Accuracy: 96.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.0108, Val Accuracy: 96.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.0119, Val Accuracy: 97.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.0120, Val Accuracy: 96.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.0114, Val Accuracy: 96.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.0132, Val Accuracy: 97.14%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.0093, Val Accuracy: 97.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.0097, Val Accuracy: 96.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.0115, Val Accuracy: 96.87%
Best Accuracy: 96.87%
Best Parameters: Batch Size=256, Learning Rate=[0.001, 0.0005, 0.0001]


## 6. Evaluate BNN for MNIST

In [13]:
train_loader = DataLoader(data_train, batch_size=best_params[0], shuffle=True)
val_loader = DataLoader(data_valid, batch_size=best_params[0], shuffle=False)
test_loader = DataLoader(data_test, batch_size=best_params[0], shuffle=False)

model = BiKA_MNIST().to(device)
criterion = nn.CrossEntropyLoss()

train_and_validate(model, train_loader, val_loader, criterion, best_params[1])

  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [1/100], Train Loss: 2.0332, Val Accuracy: 40.72%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [2/100], Train Loss: 0.7962, Val Accuracy: 87.19%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [3/100], Train Loss: 0.4311, Val Accuracy: 89.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [4/100], Train Loss: 0.3628, Val Accuracy: 89.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [5/100], Train Loss: 0.3130, Val Accuracy: 91.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [6/100], Train Loss: 0.2764, Val Accuracy: 91.43%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [7/100], Train Loss: 0.2557, Val Accuracy: 93.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [8/100], Train Loss: 0.2278, Val Accuracy: 92.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [9/100], Train Loss: 0.2053, Val Accuracy: 93.49%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [10/100], Train Loss: 0.1829, Val Accuracy: 94.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [11/100], Train Loss: 0.1749, Val Accuracy: 93.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [12/100], Train Loss: 0.1714, Val Accuracy: 93.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [13/100], Train Loss: 0.1585, Val Accuracy: 93.50%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [14/100], Train Loss: 0.1516, Val Accuracy: 95.01%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [15/100], Train Loss: 0.1484, Val Accuracy: 93.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [16/100], Train Loss: 0.1475, Val Accuracy: 93.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [17/100], Train Loss: 0.1369, Val Accuracy: 94.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [18/100], Train Loss: 0.1294, Val Accuracy: 95.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [19/100], Train Loss: 0.1319, Val Accuracy: 94.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [20/100], Train Loss: 0.1302, Val Accuracy: 94.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [21/100], Train Loss: 0.1311, Val Accuracy: 95.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [22/100], Train Loss: 0.1247, Val Accuracy: 94.59%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [23/100], Train Loss: 0.1320, Val Accuracy: 95.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [24/100], Train Loss: 0.1275, Val Accuracy: 94.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [25/100], Train Loss: 0.1388, Val Accuracy: 93.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [26/100], Train Loss: 0.1356, Val Accuracy: 94.33%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [27/100], Train Loss: 0.1406, Val Accuracy: 94.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [28/100], Train Loss: 0.1312, Val Accuracy: 94.77%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [29/100], Train Loss: 0.1365, Val Accuracy: 93.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [30/100], Train Loss: 0.1417, Val Accuracy: 94.67%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [31/100], Train Loss: 0.1356, Val Accuracy: 94.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [32/100], Train Loss: 0.1420, Val Accuracy: 94.94%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [33/100], Train Loss: 0.1348, Val Accuracy: 94.49%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [34/100], Train Loss: 0.1352, Val Accuracy: 95.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [35/100], Train Loss: 0.1289, Val Accuracy: 93.11%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [36/100], Train Loss: 0.1438, Val Accuracy: 95.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [37/100], Train Loss: 0.1303, Val Accuracy: 94.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [38/100], Train Loss: 0.1430, Val Accuracy: 92.71%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [39/100], Train Loss: 0.1361, Val Accuracy: 94.46%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [40/100], Train Loss: 0.1284, Val Accuracy: 95.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [41/100], Train Loss: 0.1254, Val Accuracy: 92.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [42/100], Train Loss: 0.1322, Val Accuracy: 95.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [43/100], Train Loss: 0.1335, Val Accuracy: 95.10%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [44/100], Train Loss: 0.1314, Val Accuracy: 94.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [45/100], Train Loss: 0.1337, Val Accuracy: 95.05%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [46/100], Train Loss: 0.1242, Val Accuracy: 94.53%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [47/100], Train Loss: 0.1253, Val Accuracy: 94.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [48/100], Train Loss: 0.1306, Val Accuracy: 94.22%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [49/100], Train Loss: 0.1377, Val Accuracy: 94.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0005 at epoch 50


Epoch [50/100], Train Loss: 0.1295, Val Accuracy: 95.06%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [51/100], Train Loss: 0.0809, Val Accuracy: 95.75%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [52/100], Train Loss: 0.0733, Val Accuracy: 96.23%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [53/100], Train Loss: 0.0661, Val Accuracy: 95.56%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [54/100], Train Loss: 0.0649, Val Accuracy: 96.18%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [55/100], Train Loss: 0.0671, Val Accuracy: 95.91%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [56/100], Train Loss: 0.0658, Val Accuracy: 96.29%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [57/100], Train Loss: 0.0641, Val Accuracy: 96.36%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [58/100], Train Loss: 0.0613, Val Accuracy: 96.08%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [59/100], Train Loss: 0.0600, Val Accuracy: 96.17%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [60/100], Train Loss: 0.0621, Val Accuracy: 96.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [61/100], Train Loss: 0.0601, Val Accuracy: 96.21%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [62/100], Train Loss: 0.0610, Val Accuracy: 95.85%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [63/100], Train Loss: 0.0619, Val Accuracy: 95.89%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [64/100], Train Loss: 0.0643, Val Accuracy: 95.76%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [65/100], Train Loss: 0.0624, Val Accuracy: 96.39%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [66/100], Train Loss: 0.0600, Val Accuracy: 95.55%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [67/100], Train Loss: 0.0541, Val Accuracy: 96.28%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [68/100], Train Loss: 0.0576, Val Accuracy: 96.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [69/100], Train Loss: 0.0556, Val Accuracy: 95.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [70/100], Train Loss: 0.0548, Val Accuracy: 96.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [71/100], Train Loss: 0.0558, Val Accuracy: 96.25%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [72/100], Train Loss: 0.0588, Val Accuracy: 96.03%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [73/100], Train Loss: 0.0580, Val Accuracy: 96.32%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [74/100], Train Loss: 0.0519, Val Accuracy: 95.90%


  output_tensor = linear(x, quant_weight, quant_bias)


Learning rate changed to 0.0001 at epoch 75


Epoch [75/100], Train Loss: 0.0555, Val Accuracy: 95.30%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [76/100], Train Loss: 0.0260, Val Accuracy: 96.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [77/100], Train Loss: 0.0185, Val Accuracy: 97.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [78/100], Train Loss: 0.0159, Val Accuracy: 96.93%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [79/100], Train Loss: 0.0167, Val Accuracy: 96.83%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [80/100], Train Loss: 0.0153, Val Accuracy: 97.04%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [81/100], Train Loss: 0.0159, Val Accuracy: 96.95%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [82/100], Train Loss: 0.0143, Val Accuracy: 96.63%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [83/100], Train Loss: 0.0155, Val Accuracy: 96.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [84/100], Train Loss: 0.0158, Val Accuracy: 96.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [85/100], Train Loss: 0.0143, Val Accuracy: 96.92%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [86/100], Train Loss: 0.0156, Val Accuracy: 97.00%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [87/100], Train Loss: 0.0130, Val Accuracy: 96.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [88/100], Train Loss: 0.0148, Val Accuracy: 96.88%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [89/100], Train Loss: 0.0140, Val Accuracy: 96.78%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [90/100], Train Loss: 0.0157, Val Accuracy: 97.02%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [91/100], Train Loss: 0.0156, Val Accuracy: 96.87%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [92/100], Train Loss: 0.0157, Val Accuracy: 96.99%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [93/100], Train Loss: 0.0142, Val Accuracy: 96.80%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [94/100], Train Loss: 0.0137, Val Accuracy: 96.96%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [95/100], Train Loss: 0.0145, Val Accuracy: 96.60%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [96/100], Train Loss: 0.0134, Val Accuracy: 96.84%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [97/100], Train Loss: 0.0133, Val Accuracy: 96.73%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [98/100], Train Loss: 0.0121, Val Accuracy: 96.98%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [99/100], Train Loss: 0.0147, Val Accuracy: 96.68%


  output_tensor = linear(x, quant_weight, quant_bias)


Epoch [100/100], Train Loss: 0.0123, Val Accuracy: 96.85%


0.9685

In [14]:
print(f"Best Validation Accuracy: {best_acc*100:.2f}%")
print(f"Best Parameters: Batch Size={best_params[0]}, Learning Rate={best_params[1]}")

evaluate_model(model, test_loader)

Best Validation Accuracy: 96.87%
Best Parameters: Batch Size=256, Learning Rate=[0.001, 0.0005, 0.0001]


  output_tensor = linear(x, quant_weight, quant_bias)


Test Accuracy: 97.01%


0.9701