In [1]:
# !pip install matplotlib torchvision



In [2]:
model_name = "ResNetSE"

In [3]:
print("hello")

hello


In [4]:
import matplotlib.pyplot as plt

import numpy as np

from collections import namedtuple


import pickle

from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init


In [5]:
import torchvision
import torchvision.transforms as transforms
from torch.optim import SGD, lr_scheduler, Adam
from torch.utils.data import TensorDataset

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
device

device(type='cuda')

In [8]:


# SE Block
class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channels, channels // reduction),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction, channels),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y

# Modified BasicBlock with SE
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=True)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.se = SEBlock(out_channels)  # Adding SE Block
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion * out_channels, kernel_size=1, stride=stride, bias=True),
                nn.BatchNorm2d(self.expansion * out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)  # Apply SE
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# Modified ResNet with SE and adjusted depth
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=True)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 254, num_blocks[2], stride=2)
        self.linear = nn.Linear(254 * block.expansion, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# Instantiate the model with adjusted num_blocks [4,5,3]
def ResNetSE():
    return ResNet(BasicBlock, [4,5,3])


# Training Setup

model = ResNetSE().to(device)
print(f"Parameters: {sum(p.numel() for p in model.parameters())/1e6:.2f}M")

# Data Augmentation
transform_train = transforms.Compose([
	transforms.RandomCrop(32, padding=4),
	transforms.RandomHorizontalFlip(),
	transforms.ToTensor(),
	transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])


test_batch_size = 128
train_batch_size = 128


# Download and load the training dataset
trainset = torchvision.datasets.CIFAR10(
	root='./data',		  # Path to store the dataset
	train=True,			 # Load the training set
	download=True,		  # Download the dataset if it doesn't exist
	transform=transform_train	 # Apply the defined transformation
)

# Create a DataLoader for the training set
trainloader = torch.utils.data.DataLoader(
	trainset,			   # Dataset to load
	batch_size=train_batch_size,		  # Batch size
	shuffle=True,		   # Shuffle the data
	num_workers=4		   # Number of subprocesses to use for data loading
)

# Download and load the test dataset
testset = torchvision.datasets.CIFAR10(
	root='./data',		  # Path to store the dataset
	train=False,			# Load the test set
	download=True,		  # Download the dataset if it doesn't exist
	transform=transform_test	 # Apply the defined transformation
)

# Create a DataLoader for the test set
testloader = torch.utils.data.DataLoader(
	testset,				# Dataset to load
	batch_size=train_batch_size,		  # Batch size
	shuffle=False,		  # Do not shuffle the data
	num_workers=4		   # Number of subprocesses to use for data loading
)



# def unpickle(file):

# 	with open(file, 'rb') as fo:
# 		dict = pickle.load(fo, encoding='bytes')
# 	return dict


# def convert_dict_to_namedtuple(d):
#   out = []
#   batch_labbel = d[b'batch_label']
#   for idx, item in enumerate(d[b'data']):
#	 tupl = Cifar10(d[b'data'][idx], d[b'labels'][idx])
#	 out.append(tupl)

#   return out






# from google.colab import drive
# drive.mount('/content/drive')

# # Select three random numbers between 1 & 100
# random_numbers = np.random.randint(1, 101, size=3)
# samples_images = []

# for i in random_numbers:
#   samples_images.append(trainset[i])


# # Create a figure and subplots
# fig, axes = plt.subplots(1, len(samples_images), figsize=(10, 1))  # 1 row, 3 columns

# # Display the images

# for i in range(len(samples_images)):
#   # The transpose(1, 2, 0) operation rearranges the dimensions so that the channels (color information) come last, which is typically the expected format for image display.

#   # reshape(3, 32, 32) assumes the image is a color image (3 channels - Red, Green, Blue) with a width and height of 32 pixels each.

#   axes[i].imshow(samples_images[i][0].cpu().view(3, 32, 32).permute(1, 2, 0))

#   # Label the subplots
#   axes[i].set_title("Label: " + str(samples_images[i][1].item()))

#   # Remove the axis ticks
#   axes[i].axis('off')


# # Adjust spacing between subplots
# plt.tight_layout()

# # Show the plot
# plt.show()





# Training Config
optimizer = Adam(model.parameters(), lr=0.0001, weight_decay=0.001)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)
cross_entropy_loss = nn.CrossEntropyLoss()





Parameters: 4.98M


In [9]:
print(f"Parameters: {sum(p.numel() for p in model.parameters())/1e6:.2f}M")

Parameters: 4.98M


In [10]:
# Initialize the model's weights with Xavier initialization
def init_weights_xavier(m):
	if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):  # Check if the layer is Linear or Conv2d
		init.xavier_uniform_(m.weight)  # Apply Xavier uniform initialization to weights
		if m.bias is not None:
			nn.init.zeros_(m.bias) # Initialize biases to zero (optional)

# Apply the initialization function to the model
model.apply(init_weights_xavier)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (se): SEBlock(
        (avg_pool): AdaptiveAvgPool2d(output_size=1)
        (fc): Sequential(
          (0): Linear(in_features=64, out_features=4, bias=True)
          (1): ReLU(inplace=True)
          (2): Linear(in_features=4, out_features=64, bias=True)
          (3): Sigmoid()
        )
      )
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padd

In [11]:
from heapq import heapify, heappush, heappop
top_5_accuracy = [0]

In [13]:

training_loss_history = []
test_loss_history = []
training_accuracy_history = []
validation_accuracy_history = []




print(f"Trainable Parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)/1e6:.2f}M")

print(f"Training started >>> ")


# Training Loop
for epoch in tqdm(range(150)):
	model.train()
	total_loss = 0
	total_test_loss = 0
	for input, labels in trainloader:
		# inputs = torch.stack([torch.tensor(item[0], dtype=torch.float32) for item in batch])
		# labels = torch.tensor([item[1] for item in batch], dtype=torch.long)
		input, labels = input.to(device).float().view(-1, 3, 32, 32), labels.to(device)
		optimizer.zero_grad()
		outputs = model(input)
		loss = cross_entropy_loss(outputs, labels)
		loss.backward()
		optimizer.step()
		total_loss += loss.item()

	scheduler.step()
	print(f"Epoch {epoch+1}, Loss: {total_loss/len(trainloader):.3f}", end='\r')
	training_loss_history.append(total_loss/len(trainloader))

	# Validation
	model.eval()
	correct = 0
	total = 0
	with torch.no_grad():
		for inputs, labels in testloader:
			inputs, labels = inputs.to(device).view(-1, 3, 32, 32), labels.to(device)
			outputs = model(inputs)
			test_loss = cross_entropy_loss(outputs, labels)
			total_test_loss += test_loss.item()
			_, predicted = torch.max(outputs.data, 1)
			total += labels.size(0)
			correct += (predicted == labels).sum().item()
		
	accuracy = 100 * correct / total

	if accuracy > top_5_accuracy[0]:
		if len(top_5_accuracy) >= 5:
			heappop(top_5_accuracy)
			# torch.save({
   #              'epoch': epoch,
   #              'model_state_dict': model.state_dict(),
   #              'optimizer_state_dict': optimizer.state_dict(),
   #              'loss': loss,
   #          }, f'./checkpoints/resnet_full_checkpoint_{accuracy}.pth')
			
		heappush(top_5_accuracy, accuracy)
		# torch.save(model.state_dict(), f'/scratch/ar6316/DeepLearning_Project_1/models/{model_name}_model_weights_accuracy_{(accuracy*100)//1}.pth')
		
	test_loss_history.append(total_test_loss/len(testloader))
	print(f"Test Accuracy: {100 * correct / total:.2f}%")
	validation_accuracy_history.append(accuracy)

Trainable Parameters: 4.98M
Training started >>> 


  0%|          | 0/150 [00:00<?, ?it/s]

Epoch 1, Loss: 0.565

  1%|          | 1/150 [00:25<1:04:01, 25.78s/it]

Test Accuracy: 75.63%
Epoch 2, Loss: 0.515

  1%|▏         | 2/150 [00:51<1:04:05, 25.98s/it]

Test Accuracy: 81.54%
Epoch 3, Loss: 0.474

  2%|▏         | 3/150 [01:17<1:03:32, 25.94s/it]

Test Accuracy: 78.75%
Epoch 4, Loss: 0.439

  3%|▎         | 4/150 [01:44<1:03:26, 26.07s/it]

Test Accuracy: 76.95%
Epoch 5, Loss: 0.416

  3%|▎         | 5/150 [02:10<1:03:11, 26.15s/it]

Test Accuracy: 81.53%
Epoch 6, Loss: 0.387

  4%|▍         | 6/150 [02:36<1:02:41, 26.12s/it]

Test Accuracy: 80.38%
Epoch 7, Loss: 0.366

  5%|▍         | 7/150 [03:02<1:02:19, 26.15s/it]

Test Accuracy: 82.58%
Epoch 8, Loss: 0.340

  5%|▌         | 8/150 [03:29<1:02:08, 26.26s/it]

Test Accuracy: 83.61%
Epoch 9, Loss: 0.322

  6%|▌         | 9/150 [03:55<1:01:43, 26.26s/it]

Test Accuracy: 82.40%
Epoch 10, Loss: 0.306

  7%|▋         | 10/150 [04:21<1:01:10, 26.22s/it]

Test Accuracy: 82.67%
Epoch 11, Loss: 0.292

  7%|▋         | 11/150 [04:47<1:00:40, 26.19s/it]

Test Accuracy: 82.10%
Epoch 12, Loss: 0.280

  8%|▊         | 12/150 [05:13<1:00:08, 26.15s/it]

Test Accuracy: 84.93%
Epoch 13, Loss: 0.265

  9%|▊         | 13/150 [05:39<59:40, 26.14s/it]  

Test Accuracy: 85.17%
Epoch 14, Loss: 0.256

  9%|▉         | 14/150 [06:06<59:19, 26.17s/it]

Test Accuracy: 84.02%
Epoch 15, Loss: 0.241

 10%|█         | 15/150 [06:32<59:04, 26.26s/it]

Test Accuracy: 86.24%
Epoch 16, Loss: 0.237

 11%|█         | 16/150 [06:58<58:47, 26.32s/it]

Test Accuracy: 83.76%
Epoch 17, Loss: 0.224

 11%|█▏        | 17/150 [07:25<58:12, 26.26s/it]

Test Accuracy: 83.99%
Epoch 18, Loss: 0.215

 12%|█▏        | 18/150 [07:51<57:36, 26.18s/it]

Test Accuracy: 85.07%
Epoch 19, Loss: 0.205

 13%|█▎        | 19/150 [08:16<56:56, 26.08s/it]

Test Accuracy: 82.50%
Epoch 20, Loss: 0.197

 13%|█▎        | 20/150 [08:43<56:32, 26.09s/it]

Test Accuracy: 86.93%
Epoch 21, Loss: 0.188

 14%|█▍        | 21/150 [09:09<56:21, 26.21s/it]

Test Accuracy: 84.08%
Epoch 22, Loss: 0.185

 15%|█▍        | 22/150 [09:35<56:02, 26.27s/it]

Test Accuracy: 86.81%
Epoch 23, Loss: 0.176

 15%|█▌        | 23/150 [10:02<55:33, 26.25s/it]

Test Accuracy: 87.78%
Epoch 24, Loss: 0.173

 16%|█▌        | 24/150 [10:28<55:05, 26.24s/it]

Test Accuracy: 87.88%
Epoch 25, Loss: 0.167

 17%|█▋        | 25/150 [10:54<54:44, 26.27s/it]

Test Accuracy: 87.23%
Epoch 26, Loss: 0.161

 17%|█▋        | 26/150 [11:21<54:29, 26.37s/it]

Test Accuracy: 87.93%
Epoch 27, Loss: 0.158

 18%|█▊        | 27/150 [11:47<53:51, 26.27s/it]

Test Accuracy: 87.74%
Epoch 28, Loss: 0.151

 19%|█▊        | 28/150 [12:13<53:30, 26.31s/it]

Test Accuracy: 86.13%
Epoch 29, Loss: 0.145

 19%|█▉        | 29/150 [12:40<53:16, 26.41s/it]

Test Accuracy: 84.99%
Epoch 30, Loss: 0.141

 20%|██        | 30/150 [13:06<52:47, 26.40s/it]

Test Accuracy: 86.84%
Epoch 31, Loss: 0.138

 21%|██        | 31/150 [13:33<52:17, 26.37s/it]

Test Accuracy: 88.73%
Epoch 32, Loss: 0.137

 21%|██▏       | 32/150 [13:59<51:57, 26.42s/it]

Test Accuracy: 85.85%
Epoch 33, Loss: 0.135

 22%|██▏       | 33/150 [14:26<51:35, 26.46s/it]

Test Accuracy: 85.89%
Epoch 34, Loss: 0.128

 23%|██▎       | 34/150 [14:52<51:13, 26.49s/it]

Test Accuracy: 87.42%
Epoch 35, Loss: 0.126

 23%|██▎       | 35/150 [15:18<50:34, 26.38s/it]

Test Accuracy: 87.03%
Epoch 36, Loss: 0.123

 24%|██▍       | 36/150 [15:45<50:03, 26.35s/it]

Test Accuracy: 86.20%
Epoch 37, Loss: 0.123

 25%|██▍       | 37/150 [16:11<49:31, 26.30s/it]

Test Accuracy: 88.88%
Epoch 38, Loss: 0.118

 25%|██▌       | 38/150 [16:37<49:00, 26.25s/it]

Test Accuracy: 88.16%
Epoch 39, Loss: 0.113

 26%|██▌       | 39/150 [17:03<48:43, 26.34s/it]

Test Accuracy: 88.38%
Epoch 40, Loss: 0.110

 27%|██▋       | 40/150 [17:30<48:13, 26.30s/it]

Test Accuracy: 87.56%
Epoch 41, Loss: 0.109

 27%|██▋       | 41/150 [17:56<47:50, 26.33s/it]

Test Accuracy: 88.60%
Epoch 42, Loss: 0.109

 28%|██▊       | 42/150 [18:22<47:19, 26.29s/it]

Test Accuracy: 89.24%
Epoch 43, Loss: 0.101

 29%|██▊       | 43/150 [18:48<46:48, 26.24s/it]

Test Accuracy: 87.55%
Epoch 44, Loss: 0.101

 29%|██▉       | 44/150 [19:15<46:30, 26.32s/it]

Test Accuracy: 87.16%
Epoch 45, Loss: 0.103

 30%|███       | 45/150 [19:41<46:02, 26.31s/it]

Test Accuracy: 87.58%
Epoch 46, Loss: 0.102

 31%|███       | 46/150 [20:08<45:44, 26.39s/it]

Test Accuracy: 87.78%
Epoch 47, Loss: 0.100

 31%|███▏      | 47/150 [20:34<45:00, 26.22s/it]

Test Accuracy: 88.64%
Epoch 48, Loss: 0.095

 32%|███▏      | 48/150 [21:00<44:45, 26.33s/it]

Test Accuracy: 89.12%
Epoch 49, Loss: 0.097

 33%|███▎      | 49/150 [21:27<44:25, 26.39s/it]

Test Accuracy: 89.12%
Epoch 50, Loss: 0.094

 33%|███▎      | 50/150 [21:53<44:03, 26.44s/it]

Test Accuracy: 85.65%
Epoch 51, Loss: 0.095

 34%|███▍      | 51/150 [22:20<43:39, 26.46s/it]

Test Accuracy: 88.55%
Epoch 52, Loss: 0.089

 35%|███▍      | 52/150 [22:46<42:57, 26.30s/it]

Test Accuracy: 89.39%
Epoch 53, Loss: 0.092

 35%|███▌      | 53/150 [23:12<42:35, 26.35s/it]

Test Accuracy: 87.90%
Epoch 54, Loss: 0.086

 36%|███▌      | 54/150 [23:38<42:00, 26.26s/it]

Test Accuracy: 88.17%
Epoch 55, Loss: 0.090

 37%|███▋      | 55/150 [24:04<41:31, 26.22s/it]

Test Accuracy: 88.45%
Epoch 56, Loss: 0.085

 37%|███▋      | 56/150 [24:31<41:12, 26.31s/it]

Test Accuracy: 86.94%
Epoch 57, Loss: 0.085

 38%|███▊      | 57/150 [24:57<40:45, 26.30s/it]

Test Accuracy: 88.66%
Epoch 58, Loss: 0.084

 39%|███▊      | 58/150 [25:24<40:26, 26.38s/it]

Test Accuracy: 88.65%
Epoch 59, Loss: 0.085

 39%|███▉      | 59/150 [25:50<40:04, 26.43s/it]

Test Accuracy: 88.10%
Epoch 60, Loss: 0.081

 40%|████      | 60/150 [26:17<39:33, 26.37s/it]

Test Accuracy: 89.46%
Epoch 61, Loss: 0.082

 41%|████      | 61/150 [26:43<39:02, 26.32s/it]

Test Accuracy: 89.13%
Epoch 62, Loss: 0.080

 41%|████▏     | 62/150 [27:09<38:40, 26.37s/it]

Test Accuracy: 88.51%
Epoch 63, Loss: 0.080

 42%|████▏     | 63/150 [27:36<38:17, 26.41s/it]

Test Accuracy: 90.19%
Epoch 64, Loss: 0.078

 43%|████▎     | 64/150 [28:02<37:53, 26.43s/it]

Test Accuracy: 89.97%
Epoch 65, Loss: 0.077

 43%|████▎     | 65/150 [28:29<37:30, 26.47s/it]

Test Accuracy: 88.68%
Epoch 66, Loss: 0.076

 44%|████▍     | 66/150 [28:55<37:02, 26.46s/it]

Test Accuracy: 88.63%
Epoch 67, Loss: 0.077

 45%|████▍     | 67/150 [29:21<36:29, 26.38s/it]

Test Accuracy: 88.53%
Epoch 68, Loss: 0.075

 45%|████▌     | 68/150 [29:48<36:05, 26.41s/it]

Test Accuracy: 87.51%
Epoch 69, Loss: 0.078

 46%|████▌     | 69/150 [30:14<35:34, 26.35s/it]

Test Accuracy: 88.91%
Epoch 70, Loss: 0.072

 47%|████▋     | 70/150 [30:41<35:13, 26.42s/it]

Test Accuracy: 89.73%
Epoch 71, Loss: 0.074

 47%|████▋     | 71/150 [31:07<34:44, 26.38s/it]

Test Accuracy: 86.03%
Epoch 72, Loss: 0.072

 48%|████▊     | 72/150 [31:33<34:14, 26.34s/it]

Test Accuracy: 88.78%
Epoch 73, Loss: 0.076

 49%|████▊     | 73/150 [31:59<33:47, 26.33s/it]

Test Accuracy: 89.48%
Epoch 74, Loss: 0.074

 49%|████▉     | 74/150 [32:26<33:23, 26.36s/it]

Test Accuracy: 90.71%
Epoch 75, Loss: 0.074

 50%|█████     | 75/150 [32:53<33:02, 26.43s/it]

Test Accuracy: 86.98%
Epoch 76, Loss: 0.070

 51%|█████     | 76/150 [33:19<32:39, 26.48s/it]

Test Accuracy: 89.26%
Epoch 77, Loss: 0.069

 51%|█████▏    | 77/150 [33:46<32:14, 26.50s/it]

Test Accuracy: 89.50%
Epoch 78, Loss: 0.067

 52%|█████▏    | 78/150 [34:12<31:44, 26.45s/it]

Test Accuracy: 89.83%
Epoch 79, Loss: 0.071

 53%|█████▎    | 79/150 [34:38<31:11, 26.35s/it]

Test Accuracy: 88.94%
Epoch 80, Loss: 0.072

 53%|█████▎    | 80/150 [35:05<30:50, 26.44s/it]

Test Accuracy: 88.90%
Epoch 81, Loss: 0.067

 54%|█████▍    | 81/150 [35:31<30:21, 26.40s/it]

Test Accuracy: 88.43%
Epoch 82, Loss: 0.068

 55%|█████▍    | 82/150 [35:58<29:57, 26.43s/it]

Test Accuracy: 87.30%
Epoch 83, Loss: 0.067

 55%|█████▌    | 83/150 [36:24<29:28, 26.40s/it]

Test Accuracy: 87.70%
Epoch 84, Loss: 0.064

 56%|█████▌    | 84/150 [36:51<29:06, 26.46s/it]

Test Accuracy: 90.67%
Epoch 85, Loss: 0.069

 57%|█████▋    | 85/150 [37:17<28:46, 26.56s/it]

Test Accuracy: 89.31%
Epoch 86, Loss: 0.066

 57%|█████▋    | 86/150 [37:44<28:22, 26.60s/it]

Test Accuracy: 89.08%
Epoch 87, Loss: 0.068

 58%|█████▊    | 87/150 [38:11<27:55, 26.59s/it]

Test Accuracy: 88.41%
Epoch 88, Loss: 0.068

 59%|█████▊    | 88/150 [38:37<27:21, 26.48s/it]

Test Accuracy: 87.56%
Epoch 89, Loss: 0.064

 59%|█████▉    | 89/150 [39:03<26:58, 26.54s/it]

Test Accuracy: 89.98%
Epoch 90, Loss: 0.066

 60%|██████    | 90/150 [39:30<26:32, 26.54s/it]

Test Accuracy: 88.20%
Epoch 91, Loss: 0.063

 61%|██████    | 91/150 [39:57<26:06, 26.55s/it]

Test Accuracy: 89.96%
Epoch 92, Loss: 0.067

 61%|██████▏   | 92/150 [40:23<25:41, 26.58s/it]

Test Accuracy: 90.05%
Epoch 93, Loss: 0.061

 62%|██████▏   | 93/150 [40:50<25:15, 26.58s/it]

Test Accuracy: 90.27%
Epoch 94, Loss: 0.065

 63%|██████▎   | 94/150 [41:17<24:50, 26.62s/it]

Test Accuracy: 90.70%
Epoch 95, Loss: 0.062

 63%|██████▎   | 95/150 [41:43<24:26, 26.67s/it]

Test Accuracy: 90.12%
Epoch 96, Loss: 0.065

 64%|██████▍   | 96/150 [42:10<23:58, 26.64s/it]

Test Accuracy: 89.67%
Epoch 97, Loss: 0.027

 65%|██████▍   | 97/150 [42:36<23:29, 26.60s/it]

Test Accuracy: 92.54%
Epoch 98, Loss: 0.016

 65%|██████▌   | 98/150 [43:03<22:57, 26.49s/it]

Test Accuracy: 92.69%
Epoch 99, Loss: 0.013

 66%|██████▌   | 99/150 [43:29<22:26, 26.41s/it]

Test Accuracy: 92.82%
Epoch 100, Loss: 0.011

 67%|██████▋   | 100/150 [43:55<21:58, 26.38s/it]

Test Accuracy: 92.92%
Epoch 101, Loss: 0.010

 67%|██████▋   | 101/150 [44:21<21:29, 26.32s/it]

Test Accuracy: 92.93%
Epoch 102, Loss: 0.009

 68%|██████▊   | 102/150 [44:47<20:56, 26.17s/it]

Test Accuracy: 93.04%
Epoch 103, Loss: 0.008

 69%|██████▊   | 103/150 [45:13<20:29, 26.15s/it]

Test Accuracy: 92.93%
Epoch 104, Loss: 0.008

 69%|██████▉   | 104/150 [45:40<20:05, 26.21s/it]

Test Accuracy: 92.84%
Epoch 105, Loss: 0.008

 70%|███████   | 105/150 [46:06<19:41, 26.26s/it]

Test Accuracy: 93.00%
Epoch 106, Loss: 0.007

 71%|███████   | 106/150 [46:32<19:18, 26.34s/it]

Test Accuracy: 92.97%
Epoch 107, Loss: 0.007

 71%|███████▏  | 107/150 [46:59<18:54, 26.39s/it]

Test Accuracy: 92.88%
Epoch 108, Loss: 0.007

 72%|███████▏  | 108/150 [47:25<18:22, 26.24s/it]

Test Accuracy: 93.01%
Epoch 109, Loss: 0.006

 73%|███████▎  | 109/150 [47:51<17:59, 26.33s/it]

Test Accuracy: 92.88%
Epoch 110, Loss: 0.006

 73%|███████▎  | 110/150 [48:18<17:35, 26.39s/it]

Test Accuracy: 93.11%
Epoch 111, Loss: 0.007

 74%|███████▍  | 111/150 [48:44<17:02, 26.22s/it]

Test Accuracy: 93.00%
Epoch 112, Loss: 0.007

 75%|███████▍  | 112/150 [49:10<16:39, 26.30s/it]

Test Accuracy: 92.94%
Epoch 113, Loss: 0.007

 75%|███████▌  | 113/150 [49:37<16:12, 26.29s/it]

Test Accuracy: 92.93%
Epoch 114, Loss: 0.006

 76%|███████▌  | 114/150 [50:03<15:49, 26.37s/it]

Test Accuracy: 93.02%
Epoch 115, Loss: 0.007

 77%|███████▋  | 115/150 [50:30<15:23, 26.39s/it]

Test Accuracy: 93.25%
Epoch 116, Loss: 0.006

 77%|███████▋  | 116/150 [50:56<14:58, 26.43s/it]

Test Accuracy: 93.07%
Epoch 117, Loss: 0.006

 78%|███████▊  | 117/150 [51:22<14:31, 26.40s/it]

Test Accuracy: 93.03%
Epoch 118, Loss: 0.007

 79%|███████▊  | 118/150 [51:49<14:05, 26.42s/it]

Test Accuracy: 92.91%
Epoch 119, Loss: 0.006

 79%|███████▉  | 119/150 [52:15<13:39, 26.44s/it]

Test Accuracy: 92.87%
Epoch 120, Loss: 0.006

 80%|████████  | 120/150 [52:42<13:12, 26.43s/it]

Test Accuracy: 93.02%
Epoch 121, Loss: 0.006

 81%|████████  | 121/150 [53:08<12:46, 26.45s/it]

Test Accuracy: 92.96%
Epoch 122, Loss: 0.006

 81%|████████▏ | 122/150 [53:36<12:31, 26.85s/it]

Test Accuracy: 92.91%
Epoch 123, Loss: 0.006

 82%|████████▏ | 123/150 [54:16<13:47, 30.65s/it]

Test Accuracy: 92.81%
Epoch 124, Loss: 0.006

 83%|████████▎ | 124/150 [54:55<14:26, 33.33s/it]

Test Accuracy: 92.86%
Epoch 125, Loss: 0.006

 83%|████████▎ | 125/150 [55:35<14:40, 35.21s/it]

Test Accuracy: 92.86%
Epoch 126, Loss: 0.006

 84%|████████▍ | 126/150 [56:14<14:36, 36.53s/it]

Test Accuracy: 93.01%
Epoch 127, Loss: 0.006

 85%|████████▍ | 127/150 [56:54<14:19, 37.39s/it]

Test Accuracy: 92.94%
Epoch 128, Loss: 0.006

 85%|████████▌ | 128/150 [57:33<13:55, 37.99s/it]

Test Accuracy: 92.89%
Epoch 129, Loss: 0.007

 86%|████████▌ | 129/150 [58:13<13:27, 38.46s/it]

Test Accuracy: 93.13%
Epoch 130, Loss: 0.006

 87%|████████▋ | 130/150 [58:52<12:55, 38.80s/it]

Test Accuracy: 92.89%
Epoch 131, Loss: 0.006

 87%|████████▋ | 131/150 [59:32<12:20, 39.00s/it]

Test Accuracy: 93.06%
Epoch 132, Loss: 0.006

 88%|████████▊ | 132/150 [1:00:11<11:44, 39.13s/it]

Test Accuracy: 92.83%
Epoch 133, Loss: 0.006

 89%|████████▊ | 133/150 [1:00:51<11:07, 39.26s/it]

Test Accuracy: 93.16%
Epoch 134, Loss: 0.006

 89%|████████▉ | 134/150 [1:01:30<10:29, 39.35s/it]

Test Accuracy: 92.87%
Epoch 135, Loss: 0.006

 90%|█████████ | 135/150 [1:02:10<09:50, 39.37s/it]

Test Accuracy: 92.98%
Epoch 136, Loss: 0.006

 91%|█████████ | 136/150 [1:02:49<09:11, 39.38s/it]

Test Accuracy: 93.10%
Epoch 137, Loss: 0.006

 91%|█████████▏| 137/150 [1:03:29<08:32, 39.44s/it]

Test Accuracy: 92.96%
Epoch 138, Loss: 0.006

 92%|█████████▏| 138/150 [1:04:08<07:54, 39.51s/it]

Test Accuracy: 93.10%
Epoch 139, Loss: 0.006

 93%|█████████▎| 139/150 [1:04:48<07:14, 39.48s/it]

Test Accuracy: 93.21%
Epoch 140, Loss: 0.006

 93%|█████████▎| 140/150 [1:05:27<06:35, 39.51s/it]

Test Accuracy: 92.93%
Epoch 141, Loss: 0.007

 94%|█████████▍| 141/150 [1:06:07<05:55, 39.51s/it]

Test Accuracy: 92.59%
Epoch 142, Loss: 0.006

 95%|█████████▍| 142/150 [1:06:46<05:15, 39.48s/it]

Test Accuracy: 93.01%
Epoch 143, Loss: 0.006

 95%|█████████▌| 143/150 [1:07:26<04:36, 39.45s/it]

Test Accuracy: 93.03%
Epoch 144, Loss: 0.006

 96%|█████████▌| 144/150 [1:08:05<03:56, 39.45s/it]

Test Accuracy: 92.87%
Epoch 145, Loss: 0.006

 97%|█████████▋| 145/150 [1:08:45<03:17, 39.47s/it]

Test Accuracy: 92.97%
Epoch 146, Loss: 0.006

 97%|█████████▋| 146/150 [1:09:21<02:34, 38.65s/it]

Test Accuracy: 92.76%
Epoch 147, Loss: 0.005

 98%|█████████▊| 147/150 [1:09:48<01:44, 34.92s/it]

Test Accuracy: 93.02%
Epoch 148, Loss: 0.005

 99%|█████████▊| 148/150 [1:10:14<01:04, 32.39s/it]

Test Accuracy: 93.21%
Epoch 149, Loss: 0.005

 99%|█████████▉| 149/150 [1:10:40<00:30, 30.55s/it]

Test Accuracy: 93.10%
Epoch 150, Loss: 0.005

100%|██████████| 150/150 [1:11:07<00:00, 28.45s/it]

Test Accuracy: 93.25%



