In [34]:
import torch
import torchvision
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms

import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR

from tqdm import tqdm

In [33]:
!nvidia-smi

Mon Apr 14 16:55:54 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.05              Driver Version: 560.35.05      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-80GB          Off |   00000000:01:00.0 Off |                   On |
| N/A   24C    P0             59W /  500W |    1841MiB /  81920MiB |     N/A      Default |
|                                         |                        |            Disabled* |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA A100-SXM4-80GB          Off |   00

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = "cuda" if torch.cuda.is_available() else "cpu"

## Data Loading

In [4]:
# # LOCAL CLUSTER DIRECTORY FILE LOADING
# import pickle

# # Function to unpickle the dataset files
# def unpickle(file):
#     with open(file, 'rb') as fo:
#         data_dict = pickle.load(fo, encoding='bytes')
#     return data_dict

# # Function to load all data batches
# def load_data(batch_files):
#     all_data = []
#     all_labels = []
#     for batch_file in batch_files:
#         batch_file = DATASET_PATH + batch_file
#         batch = unpickle(batch_file)
#         all_data.append(batch[b'data'])  # Image data
#         all_labels.append(batch[b'labels'])  # Labels
#     data = np.concatenate(all_data)
#     labels = np.concatenate(all_labels)
#     return data, labels

# # Function to reshape image data
# def reshape_images(data):
#     # Reshape from (10000, 3072) to (10000, 32, 32, 3)
#     data = data.reshape(-1, 3, 32, 32)  # (10000, 3, 32, 32)
#     data = data.transpose(0, 2, 3, 1)  # (10000, 32, 32, 3)
#     return data

# # Load metadata (label names)
# def load_meta(file):
#     file = DATASET_PATH + file
#     meta = unpickle(file)
#     label_names = [name.decode('utf-8') for name in meta[b'label_names']]  # Decode bytes to strings
#     return label_names

# # Example usage
# # Files for the training batches and metadata
# batch_files = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5']
# DATASET_PATH = "/datasets/CS747/cifar-10-batches-py/"
# test_batch_file = 'test_batch'
# meta_file = 'batches.meta'

# # Load training data
# data, labels = load_data(batch_files)
# data = reshape_images(data)  # Reshape image data into (10000, 32, 32, 3)

# # Load test data
# test_data, test_labels = load_data([test_batch_file])
# test_data = reshape_images(test_data)  # Reshape test data into (10000, 32, 32, 3)

# # Load label names (metadata)
# label_names = load_meta(meta_file)

# # Example of accessing one image and its label:
# image_index = 3
# image = data[image_index]  # 32x32x3 image
# label = labels[image_index]
# label_name = label_names[label]

# print(f"Image shape: {image.shape}, Label: {label}, Label name: {label_name}")

In [17]:
# setup data loader
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
])
transform_test = transforms.Compose([
    transforms.ToTensor(),
])


# LOAD FROM TORCHVISION

train_mnist = datasets.MNIST(root='./data', download=True, train=True, transform=transforms.ToTensor())
test_mnist = datasets.MNIST(root='./data', download=True, train=False, transform=transforms.ToTensor())


#transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
full_train_cifar = datasets.CIFAR10(root='./data', download=True, train=True, transform=transform_train)
test_cifar = datasets.CIFAR10(root='./data', download=True, train=False, transform=transform_test)

In [18]:
BATCH_SIZE = 128

In [19]:
# Calculate sizes for split (e.g., 90% training, 10% validation)
num_train = int(0.9 * len(full_train_cifar))  # 45,000 images
num_val = len(full_train_cifar) - num_train    # 5,000 images

train_dataset, val_dataset = random_split(full_train_cifar, [num_train, num_val])

train_loader_cifar = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader_cifar = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE*2, shuffle=False)
test_loader_cifar = DataLoader(dataset=test_cifar, batch_size=BATCH_SIZE*2, shuffle=False)

train_loader_mnist = DataLoader(dataset=train_mnist, batch_size=BATCH_SIZE, shuffle=True)

In [20]:
# MNIST CNN CONFIGS

mnist_cnn_config = {
    "learning_rate" : 0.01, # learning rate
    "weight_decay" : 3.5e-5,
    }

cifar_config = {
    "learning_rate" : 0.01,
    "weight_decay" : 3.5e-5,
    "epochs": 100
}



In [21]:
from collections import OrderedDict
import torch.nn as nn


class SmallCNN(nn.Module):
    def __init__(self, drop=0.5):
        super(SmallCNN, self).__init__()

        self.num_channels = 1
        self.num_labels = 10

        activ = nn.ReLU(True)

        self.feature_extractor = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(self.num_channels, 32, 3)),
            ('relu1', activ),
            ('conv2', nn.Conv2d(32, 32, 3)),
            ('relu2', activ),
            ('maxpool1', nn.MaxPool2d(2, 2)),
            ('conv3', nn.Conv2d(32, 64, 3)),
            ('relu3', activ),
            ('conv4', nn.Conv2d(64, 64, 3)),
            ('relu4', activ),
            ('maxpool2', nn.MaxPool2d(2, 2)),
        ]))

        self.classifier = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(64 * 4 * 4, 200)),
            ('relu1', activ),
            ('drop', nn.Dropout(drop)),
            ('fc2', nn.Linear(200, 200)),
            ('relu2', activ),
            ('fc3', nn.Linear(200, self.num_labels)),
        ]))

        for m in self.modules():
            if isinstance(m, (nn.Conv2d)):
                nn.init.kaiming_normal_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
        nn.init.constant_(self.classifier.fc3.weight, 0)
        nn.init.constant_(self.classifier.fc3.bias, 0)

    def forward(self, input):
        features = self.feature_extractor(input)
        #print(features.shape)
        logits = self.classifier(features.view(-1, 64 * 4 * 4))
        return logits

In [22]:
## Resnet Model

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.residual = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.residual = nn.Sequential(
                nn.Conv2d(in_channels, self.expansion * out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * out_channels)
            )
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = F.relu(self.bn2(self.conv2(out)), inplace=True)
        out = out + self.residual(x)
        out = F.relu(out, inplace=True)
        return out


class ResNet(nn.Module):
    def __init__(self, block: BasicBlock, num_blocks: list[int], num_classes: int=10):
        super().__init__()
        self.in_channels = 64

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer0 = self.make_layer(block, 64, num_blocks[0], stride=1)
        self.layer1 = self.make_layer(block, 128, num_blocks[1], stride=2)
        self.layer2 = self.make_layer(block, 256, num_blocks[2], stride=2)
        self.layer3 = self.make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion, num_classes)


    
    def make_layer(self, block, out_channels, num_blocks, stride):
        layers = []

        layers.append(block(self.in_channels, out_channels, stride=stride))
        self.in_channels = out_channels * block.expansion

        for _ in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = self.layer0(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        logits = self.linear(out)
        return logits

def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])


In [23]:
#model_test = ResNet18().to(device)

In [24]:
# x_rand = torch.randn((1, 3, 32, 32)).cuda()
# print(x_rand.shape)
# output = model_test(x_rand)
# print(output.shape)

In [None]:
def train_one_epoch(model,  dataloader, criterion, optimizer, device, max_grad_norm=None):
    model.train()

    running_loss = 0.0
    running_corrects = 0
    total_samples = 0

    for images, labels in tqdm(dataloader, desc="Training", leave=False):

        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        
        if max_grad_norm:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        optimizer.step()

        running_loss += loss.item() * images.size(0)
        preds = torch.argmax(outputs.detach(), -1)
        running_corrects += torch.sum(preds == labels.data)
        total_samples += images.size(0)

    epoch_loss = running_loss / total_samples
    epoch_acc = running_corrects / total_samples
    return epoch_loss, epoch_acc

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    running_corrects = 0.0
    total_samples = 0

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc = 'Validation', leave=False):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)

            preds = torch.argmax(outputs, -1)
            running_corrects += torch.sum((preds == labels)).item()
            total_samples += labels.size(0)
        
        epoch_loss = running_loss / total_samples
        epoch_acc = running_corrects / total_samples

        return epoch_loss, epoch_acc

# def adjust_learning_rate(optimizer, epoch, config):
#     """decrease the learning rate"""
#     lr = config["learning_rate"]

#     if epoch >= 100:
#         lr = config["learning_rate"] * 0.001
#     elif epoch >= 90:
#         lr = config["learning_rate"] * 0.01
#     elif epoch >= 75:
#         lr = config["learning_rate"] * 0.1

#     for param_group in optimizer.param_groups:
#         param_group['lr'] = lr

In [None]:
def save_checkpoint(model, optimizer, epoch, val_loss, checkpoint_dir, top_checkpoints, max_checkpoints=5):
    """
    Save a checkpoint if it's among the top best ones.
    
    Args:
        model (torch.nn.Module): The model to save.
        optimizer (torch.optim.Optimizer): The optimizer state to save.
        epoch (int): The current epoch.
        val_loss (float): The validation loss metric used for ranking.
        checkpoint_dir (str): Directory to save the checkpoints.
        top_checkpoints (list): List of tuples (val_loss, filename) for current top checkpoints.
        max_checkpoints (int, optional): Maximum number of checkpoints to save. Defaults to 5.
    """
    # Ensure the checkpoint directory exists.
    os.makedirs(checkpoint_dir, exist_ok=True)
    
    # Construct a filename that contains the epoch and validation loss.
    checkpoint_filename = os.path.join(checkpoint_dir, f"model_epoch_{epoch+1}_valloss_{val_loss:.4f}.pth")
    
    # Create the checkpoint dictionary
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'val_loss': val_loss,
    }
    
    # Save checkpoint to disk
    torch.save(checkpoint, checkpoint_filename)
    
    # Add the checkpoint to your top checkpoints list (we assume lower loss is better)
    top_checkpoints.append((val_loss, checkpoint_filename))
    
    # Sort the checkpoints by validation loss (ascending order)
    top_checkpoints.sort(key=lambda x: x[0])
    
    # If more than max_checkpoints, delete the worst checkpoint (last one in sorted list)
    if len(top_checkpoints) > max_checkpoints:
        worst_checkpoint = top_checkpoints.pop()  # pop the highest loss checkpoint
        if os.path.exists(worst_checkpoint[1]):
            os.remove(worst_checkpoint[1])
            print(f"Removed checkpoint: {worst_checkpoint[1]}")

In [None]:
def train_model(model, train_loader, val_loader, config, scheduler=None, device="cuda"):
    num_epochs = config["epochs"]
    
    top_checkpoints = []
    checkpoint_dir = config.get("checkpoint_dir", "./checkpoints")

    history = {
        "train_loss" : [],
        "train_acc" : [],
        "val_loss" : [],
        "val_acc" : []
    }

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(model.parameters(), 
                    lr = config["learning_rate"], 
                    momentum= 0.9,
                    weight_decay=config["weight_decay"])

    scheduler = MultiStepLR(optimizer=optimizer, milestones= [75, 90, 100], gamma=0.1)
    
    for epoch in range(1, num_epochs+1):
        print(f"Epoch {epoch}/{num_epochs}")

        #adjust_learning_rate(optimizer, epoch, config=config)

        train_loss, train_acc = train_one_epoch(
            model = model, dataloader = train_loader, 
            criterion=criterion, optimizer=optimizer, 
            device=device, max_grad_norm=1.0
        )

        val_loss, val_acc = validate(model=model, dataloader=val_loader, criterion=criterion, device=device)

        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        print(f"Epoch [{epoch+1}/{config['num_epochs']}] | "
              f"LR: {scheduler.get_last_lr()[0]:.6f} | "
              f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc*100:.2f}% | "
              f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc*100:.2f}%")

        if scheduler:
            scheduler.step()

        save_checkpoint(model, optimizer, epoch, val_loss, checkpoint_dir, top_checkpoints, max_checkpoints=5)
        
    

    return model, history, top_checkpoints

In [None]:
def main():
    model = ResNet18().to(device)

    model, history = train_model(model, 
                                 train_loader=train_loader_cifar, 
                                 val_loader=val_loader_cifar,
                                 config= cifar_config
    )

    print("Training Done")
    print('================================================================')

    test_loss, test_acc = validate(model, dataloader=test_loader_cifar, criterion=nn.CrossEntropyLoss(), device=device)
    print(f"Test accuracy: {test_acc}, Test loss: {test_loss}")

    return history


In [None]:
if __name__ == '__main__':
    main()

In [32]:
if __name__ == '__main__':
    main()

Epoch 1/100


Training:   0%|          | 0/352 [00:00<?, ?it/s]

                                                           

Train Loss: 1.5860, Train Acc: 41.17%
Val Loss:   1.5004, Val Acc:   44.54%
Best model updated at epoch 1
Epoch 2/100


                                                           

Train Loss: 1.1647, Train Acc: 57.83%
Val Loss:   1.2083, Val Acc:   57.50%
Best model updated at epoch 2
Epoch 3/100


                                                           

Train Loss: 0.9714, Train Acc: 65.34%
Val Loss:   1.1449, Val Acc:   59.84%
Best model updated at epoch 3
Epoch 4/100


                                                           

Train Loss: 0.8460, Train Acc: 69.89%
Val Loss:   1.4956, Val Acc:   53.96%
Epoch 5/100


                                                           

Train Loss: 0.7446, Train Acc: 73.59%
Val Loss:   2.1229, Val Acc:   46.92%
Epoch 6/100


                                                           

Train Loss: 0.6633, Train Acc: 76.64%
Val Loss:   0.8579, Val Acc:   70.32%
Best model updated at epoch 6
Epoch 7/100


                                                           

Train Loss: 0.6140, Train Acc: 78.24%
Val Loss:   1.0093, Val Acc:   66.42%
Epoch 8/100


                                                           

Train Loss: 0.5562, Train Acc: 80.49%
Val Loss:   0.9331, Val Acc:   69.82%
Epoch 9/100


                                                           

Train Loss: 0.5186, Train Acc: 81.90%
Val Loss:   0.8185, Val Acc:   72.96%
Best model updated at epoch 9
Epoch 10/100


                                                           

Train Loss: 0.4831, Train Acc: 83.25%
Val Loss:   0.8790, Val Acc:   71.74%
Epoch 11/100


                                                           

Train Loss: 0.4500, Train Acc: 84.32%
Val Loss:   0.7543, Val Acc:   75.30%
Best model updated at epoch 11
Epoch 12/100


                                                           

Train Loss: 0.4196, Train Acc: 85.38%
Val Loss:   0.7361, Val Acc:   74.76%
Best model updated at epoch 12
Epoch 13/100


                                                           

Train Loss: 0.3994, Train Acc: 86.07%
Val Loss:   0.7447, Val Acc:   75.18%
Epoch 14/100


                                                           

Train Loss: 0.3757, Train Acc: 87.05%
Val Loss:   0.6221, Val Acc:   79.38%
Best model updated at epoch 14
Epoch 15/100


                                                           

Train Loss: 0.3547, Train Acc: 87.65%
Val Loss:   0.4980, Val Acc:   83.06%
Best model updated at epoch 15
Epoch 16/100


                                                           

Train Loss: 0.3355, Train Acc: 88.38%
Val Loss:   0.5582, Val Acc:   81.64%
Epoch 17/100


                                                           

Train Loss: 0.3171, Train Acc: 89.15%
Val Loss:   0.4884, Val Acc:   83.84%
Best model updated at epoch 17
Epoch 18/100


                                                           

Train Loss: 0.3004, Train Acc: 89.53%
Val Loss:   0.5105, Val Acc:   82.94%
Epoch 19/100


                                                           

Train Loss: 0.2842, Train Acc: 90.15%
Val Loss:   0.5001, Val Acc:   84.26%
Epoch 20/100


                                                           

Train Loss: 0.2697, Train Acc: 90.64%
Val Loss:   0.5960, Val Acc:   81.70%
Epoch 21/100


                                                           

Train Loss: 0.2588, Train Acc: 90.96%
Val Loss:   0.5833, Val Acc:   81.32%
Epoch 22/100


                                                           

Train Loss: 0.2489, Train Acc: 91.25%
Val Loss:   0.4889, Val Acc:   84.64%
Epoch 23/100


                                                           

Train Loss: 0.2330, Train Acc: 91.83%
Val Loss:   0.6075, Val Acc:   81.12%
Epoch 24/100


                                                           

Train Loss: 0.2219, Train Acc: 92.33%
Val Loss:   0.4891, Val Acc:   84.38%
Epoch 25/100


                                                           

Train Loss: 0.2173, Train Acc: 92.38%
Val Loss:   0.4646, Val Acc:   85.10%
Best model updated at epoch 25
Epoch 26/100


                                                           

Train Loss: 0.2090, Train Acc: 92.71%
Val Loss:   0.5253, Val Acc:   84.20%
Epoch 27/100


                                                           

Train Loss: 0.2012, Train Acc: 93.09%
Val Loss:   0.4439, Val Acc:   86.10%
Best model updated at epoch 27
Epoch 28/100


                                                           

Train Loss: 0.1907, Train Acc: 93.38%
Val Loss:   0.4432, Val Acc:   85.96%
Best model updated at epoch 28
Epoch 29/100


                                                           

Train Loss: 0.1825, Train Acc: 93.70%
Val Loss:   0.4650, Val Acc:   85.90%
Epoch 30/100


                                                           

Train Loss: 0.1717, Train Acc: 93.97%
Val Loss:   0.4807, Val Acc:   85.44%
Epoch 31/100


                                                           

Train Loss: 0.1659, Train Acc: 94.25%
Val Loss:   0.4889, Val Acc:   85.06%
Epoch 32/100


                                                           

Train Loss: 0.1578, Train Acc: 94.52%
Val Loss:   0.4798, Val Acc:   85.90%
Epoch 33/100


                                                           

Train Loss: 0.1536, Train Acc: 94.53%
Val Loss:   0.5269, Val Acc:   84.54%
Epoch 34/100


                                                           

Train Loss: 0.1460, Train Acc: 94.79%
Val Loss:   0.4935, Val Acc:   85.58%
Epoch 35/100


                                                           

Train Loss: 0.1348, Train Acc: 95.20%
Val Loss:   0.4332, Val Acc:   87.46%
Best model updated at epoch 35
Epoch 36/100


                                                           

Train Loss: 0.1322, Train Acc: 95.27%
Val Loss:   0.5341, Val Acc:   84.62%
Epoch 37/100


                                                           

Train Loss: 0.1304, Train Acc: 95.36%
Val Loss:   0.4683, Val Acc:   86.34%
Epoch 38/100


                                                           

Train Loss: 0.1203, Train Acc: 95.60%
Val Loss:   0.4815, Val Acc:   86.64%
Epoch 39/100


                                                           

Train Loss: 0.1160, Train Acc: 95.94%
Val Loss:   0.4531, Val Acc:   87.18%
Epoch 40/100


                                                           

Train Loss: 0.1119, Train Acc: 96.13%
Val Loss:   0.4732, Val Acc:   86.78%
Epoch 41/100


                                                           

Train Loss: 0.1091, Train Acc: 96.17%
Val Loss:   0.4546, Val Acc:   87.54%
Epoch 42/100


                                                           

Train Loss: 0.1061, Train Acc: 96.29%
Val Loss:   0.5132, Val Acc:   85.80%
Epoch 43/100


                                                           

Train Loss: 0.1000, Train Acc: 96.42%
Val Loss:   0.4577, Val Acc:   87.34%
Epoch 44/100


                                                           

Train Loss: 0.0952, Train Acc: 96.66%
Val Loss:   0.5309, Val Acc:   85.80%
Epoch 45/100


                                                           

Train Loss: 0.0956, Train Acc: 96.61%
Val Loss:   0.4770, Val Acc:   86.98%
Epoch 46/100


                                                           

Train Loss: 0.0918, Train Acc: 96.73%
Val Loss:   0.5207, Val Acc:   86.46%
Epoch 47/100


                                                           

Train Loss: 0.0907, Train Acc: 96.79%
Val Loss:   0.5809, Val Acc:   85.50%
Epoch 48/100


                                                           

Train Loss: 0.0846, Train Acc: 96.98%
Val Loss:   0.4791, Val Acc:   87.18%
Epoch 49/100


                                                           

Train Loss: 0.0849, Train Acc: 96.96%
Val Loss:   0.5747, Val Acc:   85.80%
Epoch 50/100


                                                           

Train Loss: 0.0807, Train Acc: 97.22%
Val Loss:   0.5604, Val Acc:   85.92%
Epoch 51/100


                                                           

Train Loss: 0.0784, Train Acc: 97.27%
Val Loss:   0.5376, Val Acc:   86.22%
Epoch 52/100


                                                           

Train Loss: 0.0745, Train Acc: 97.36%
Val Loss:   0.5095, Val Acc:   87.48%
Epoch 53/100


                                                           

Train Loss: 0.0742, Train Acc: 97.44%
Val Loss:   0.4500, Val Acc:   88.38%
Epoch 54/100


                                                           

Train Loss: 0.0697, Train Acc: 97.56%
Val Loss:   0.4366, Val Acc:   88.60%
Epoch 55/100


                                                           

Train Loss: 0.0732, Train Acc: 97.42%
Val Loss:   0.4481, Val Acc:   87.90%
Epoch 56/100


                                                           

Train Loss: 0.0671, Train Acc: 97.63%
Val Loss:   0.5333, Val Acc:   86.42%
Epoch 57/100


                                                           

Train Loss: 0.0649, Train Acc: 97.74%
Val Loss:   0.4442, Val Acc:   88.52%
Epoch 58/100


                                                           

Train Loss: 0.0643, Train Acc: 97.75%
Val Loss:   0.6357, Val Acc:   85.70%
Epoch 59/100


                                                           

Train Loss: 0.0618, Train Acc: 97.77%
Val Loss:   0.5161, Val Acc:   87.38%
Epoch 60/100


                                                           

Train Loss: 0.0600, Train Acc: 97.88%
Val Loss:   0.4913, Val Acc:   87.74%
Epoch 61/100


                                                           

Train Loss: 0.0584, Train Acc: 97.99%
Val Loss:   0.4903, Val Acc:   88.02%
Epoch 62/100


                                                           

Train Loss: 0.0580, Train Acc: 97.93%
Val Loss:   0.4348, Val Acc:   88.76%
Epoch 63/100


                                                           

Train Loss: 0.0580, Train Acc: 98.00%
Val Loss:   0.5361, Val Acc:   86.88%
Epoch 64/100


                                                           

Train Loss: 0.0574, Train Acc: 97.92%
Val Loss:   0.5318, Val Acc:   87.58%
Epoch 65/100


                                                           

Train Loss: 0.0542, Train Acc: 98.12%
Val Loss:   0.5275, Val Acc:   87.20%
Epoch 66/100


                                                           

Train Loss: 0.0527, Train Acc: 98.08%
Val Loss:   0.4954, Val Acc:   88.36%
Epoch 67/100


                                                           

Train Loss: 0.0502, Train Acc: 98.28%
Val Loss:   0.4858, Val Acc:   88.48%
Epoch 68/100


                                                           

Train Loss: 0.0502, Train Acc: 98.24%
Val Loss:   0.5043, Val Acc:   88.00%
Epoch 69/100


                                                           

Train Loss: 0.0505, Train Acc: 98.22%
Val Loss:   0.4759, Val Acc:   89.36%
Epoch 70/100


                                                           

Train Loss: 0.0496, Train Acc: 98.24%
Val Loss:   0.4959, Val Acc:   88.04%
Epoch 71/100


                                                           

Train Loss: 0.0472, Train Acc: 98.32%
Val Loss:   0.5396, Val Acc:   87.52%
Epoch 72/100


                                                           

Train Loss: 0.0481, Train Acc: 98.34%
Val Loss:   0.5193, Val Acc:   87.96%
Epoch 73/100


                                                           

Train Loss: 0.0450, Train Acc: 98.42%
Val Loss:   0.5281, Val Acc:   88.16%
Epoch 74/100


                                                           

Train Loss: 0.0466, Train Acc: 98.37%
Val Loss:   0.5087, Val Acc:   88.18%
Epoch 75/100


                                                           

Train Loss: 0.0197, Train Acc: 99.38%
Val Loss:   0.4013, Val Acc:   90.56%
Best model updated at epoch 75
Epoch 76/100


                                                           

Train Loss: 0.0113, Train Acc: 99.67%
Val Loss:   0.3991, Val Acc:   90.74%
Best model updated at epoch 76
Epoch 77/100


                                                           

Train Loss: 0.0086, Train Acc: 99.80%
Val Loss:   0.3634, Val Acc:   91.46%
Best model updated at epoch 77
Epoch 78/100


                                                           

Train Loss: 0.0078, Train Acc: 99.77%
Val Loss:   0.3896, Val Acc:   90.96%
Epoch 79/100


                                                           

Train Loss: 0.0069, Train Acc: 99.82%
Val Loss:   0.3936, Val Acc:   90.96%
Epoch 80/100


                                                           

Train Loss: 0.0068, Train Acc: 99.83%
Val Loss:   0.3921, Val Acc:   91.10%
Epoch 81/100


                                                           

Train Loss: 0.0055, Train Acc: 99.90%
Val Loss:   0.4125, Val Acc:   91.06%
Epoch 82/100


                                                           

Train Loss: 0.0051, Train Acc: 99.91%
Val Loss:   0.4024, Val Acc:   91.18%
Epoch 83/100


                                                           

Train Loss: 0.0055, Train Acc: 99.86%
Val Loss:   0.4029, Val Acc:   90.98%
Epoch 84/100


                                                           

Train Loss: 0.0052, Train Acc: 99.88%
Val Loss:   0.3902, Val Acc:   91.10%
Epoch 85/100


                                                           

Train Loss: 0.0047, Train Acc: 99.90%
Val Loss:   0.4085, Val Acc:   90.74%
Epoch 86/100


                                                           

Train Loss: 0.0040, Train Acc: 99.93%
Val Loss:   0.4003, Val Acc:   91.22%
Epoch 87/100


                                                           

Train Loss: 0.0039, Train Acc: 99.94%
Val Loss:   0.4130, Val Acc:   91.02%
Epoch 88/100


                                                           

Train Loss: 0.0040, Train Acc: 99.92%
Val Loss:   0.4211, Val Acc:   91.06%
Epoch 89/100


                                                           

Train Loss: 0.0039, Train Acc: 99.93%
Val Loss:   0.4071, Val Acc:   90.66%
Epoch 90/100


                                                           

Train Loss: 0.0031, Train Acc: 99.94%
Val Loss:   0.3965, Val Acc:   91.26%
Epoch 91/100


                                                           

Train Loss: 0.0031, Train Acc: 99.95%
Val Loss:   0.4118, Val Acc:   90.84%
Epoch 92/100


                                                           

Train Loss: 0.0033, Train Acc: 99.93%
Val Loss:   0.4104, Val Acc:   91.00%
Epoch 93/100


                                                           

Train Loss: 0.0033, Train Acc: 99.93%
Val Loss:   0.4060, Val Acc:   91.62%
Epoch 94/100


                                                           

Train Loss: 0.0035, Train Acc: 99.94%
Val Loss:   0.3964, Val Acc:   91.20%
Epoch 95/100


                                                           

Train Loss: 0.0030, Train Acc: 99.95%
Val Loss:   0.4017, Val Acc:   91.24%
Epoch 96/100


                                                           

Train Loss: 0.0032, Train Acc: 99.95%
Val Loss:   0.3912, Val Acc:   91.34%
Epoch 97/100


                                                           

Train Loss: 0.0029, Train Acc: 99.96%
Val Loss:   0.3915, Val Acc:   91.52%
Epoch 98/100


                                                           

Train Loss: 0.0029, Train Acc: 99.94%
Val Loss:   0.4062, Val Acc:   91.06%
Epoch 99/100


                                                           

Train Loss: 0.0029, Train Acc: 99.96%
Val Loss:   0.4012, Val Acc:   91.12%
Epoch 100/100


                                                           

Train Loss: 0.0031, Train Acc: 99.94%
Val Loss:   0.3986, Val Acc:   91.38%
Training Done


                                                           

Test accuracy: 0.4150451172828674




In [None]:
_, test_acc = validate(model, dataloader=test_loader_cifar, criterion=nn.CrossEntropyLoss(), device=device)
print(f"Test accuracy: {test_acc}")