In [10]:
# You might be required to have numpy2 second version, so if any problems run bellow. Please do not run it now, but when you get numpy version issues.
# pip install "numpy<2"

^C
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/surenmnatsakanyan/Desktop/Warsaw_Courses/Semester2/Deep Learning/CNN_1-Project1/venv/lib/python3.11/site-packages/pip/__main__.py", line 29, in <module>
    from pip._internal.cli.main import main as _main
  File "/Users/surenmnatsakanyan/Desktop/Warsaw_Courses/Semester2/Deep Learning/CNN_1-Project1/venv/lib/python3.11/site-packages/pip/_internal/cli/main.py", line 9, in <module>
    from pip._internal.cli.autocompletion import autocomplete
  File "/Users/surenmnatsakanyan/Desktop/Warsaw_Courses/Semester2/Deep Learning/CNN_1-Project1/venv/lib/python3.11/site-packages/pip/_internal/cli/autocompletion.py", line 10, in <module>
    from pip._internal.cli.main_parser import create_main_parser
  File "/Users/surenmnatsakanyan/Desktop/Warsaw_Courses/Semester2/Deep Learning/CNN_1-Project1/venv/lib/python3.11/site-packages/pip/_

In [2]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms
import numpy as np
from torch.utils.data import Subset
import re
import matplotlib.pyplot as plt
from collections import defaultdict
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [3]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# 2. Define the ResNet18 Architecture
class ResNet18(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet18, self).__init__()
        # Conv1: 7x7, stride=2, padding=3
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        
        # MaxPool: 3x3, stride=2, padding=1
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        # Residual layers
        self.layer1 = self._make_layer(64, 64, 2, stride=1)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)
        self.layer4 = self._make_layer(256, 512, 2, stride=2)
        
        # Global Average Pooling & Fully Connected
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)
    
    def _make_layer(self, in_channels, out_channels, blocks, stride):
        layers = []
        # First block can downsample if stride != 1
        layers.append(ResidualBlock(in_channels, out_channels, stride))
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [15]:
DATA_DIR = "archive"  # Should contain train/, valid/, test/
TRAIN_DIR = os.path.join(DATA_DIR, "train")
VAL_DIR = os.path.join(DATA_DIR, "valid")
TEST_DIR = os.path.join(DATA_DIR, "test")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

basic_transform = transforms.ToTensor()
NUM_CLASSES = 10

train_dataset = ImageFolder(TRAIN_DIR, transform=basic_transform)
val_dataset   = ImageFolder(VAL_DIR, transform=basic_transform)
test_dataset  = ImageFolder(TEST_DIR, transform=basic_transform)

In [6]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0
    
    # Counter for batch logging
    batch_idx = 0
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Metrics
        total_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        # Logging every 60 batches
        batch_idx += 1
        if batch_idx % 60 == 0:
            current_loss = loss.item()
            print(f"Batch {batch_idx}: Loss = {current_loss:.4f}")
    
    avg_loss = total_loss / total
    accuracy = 100.0 * correct / total
    return avg_loss, accuracy


def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    avg_loss = total_loss / total
    accuracy = 100.0 * correct / total
    return avg_loss, accuracy

In [36]:
# 3. Hyper-Parameters
BATCH_SIZE = 20
LR = 0.001
EPOCHS = 5

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
model = ResNet18(num_classes=NUM_CLASSES).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

best_val_acc = 0.0
for epoch in range(EPOCHS):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "model_bs_20_lr_0001/best_resnet_model_bs_20_lr_0001_5epoch.pth")

Batch 60: Loss = 1.8056
Batch 120: Loss = 2.0977
Batch 180: Loss = 1.7997
Batch 240: Loss = 1.7087
Batch 300: Loss = 1.6217
Batch 360: Loss = 1.9710
Batch 420: Loss = 2.0206
Batch 480: Loss = 2.0764
Batch 540: Loss = 2.1607
Batch 600: Loss = 1.6640
Batch 660: Loss = 1.5909
Batch 720: Loss = 1.3208
Batch 780: Loss = 2.5268
Batch 840: Loss = 1.5386
Batch 900: Loss = 1.6482
Batch 960: Loss = 1.7658
Batch 1020: Loss = 1.7659
Batch 1080: Loss = 2.1486
Batch 1140: Loss = 1.7515
Batch 1200: Loss = 1.5361
Batch 1260: Loss = 1.7780
Batch 1320: Loss = 2.0902
Batch 1380: Loss = 1.5508
Batch 1440: Loss = 1.6111
Batch 1500: Loss = 1.4598
Batch 1560: Loss = 1.9385
Batch 1620: Loss = 1.6070
Batch 1680: Loss = 1.7043
Batch 1740: Loss = 1.5222
Batch 1800: Loss = 1.6921
Batch 1860: Loss = 1.5958
Batch 1920: Loss = 1.6879
Batch 1980: Loss = 1.4940
Batch 2040: Loss = 1.6723
Batch 2100: Loss = 1.4558
Batch 2160: Loss = 1.3828
Batch 2220: Loss = 1.3518
Batch 2280: Loss = 1.9867
Batch 2340: Loss = 1.6502
Bat

In [37]:
model.load_state_dict(torch.load("model_bs_20_lr_0001/best_resnet_model_bs_20_lr_0001_5epoch.pth"))
test_loss, test_acc = evaluate(model, test_loader, criterion)
print(f"\nFinal Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")


Final Test Loss: 1.1093, Test Acc: 60.79%


In [6]:
model.load_state_dict(torch.load("model_bs_20_lr_0001/best_resnet_model_bs_20_lr_0001_5epoch.pth"))
for epoch in range(8):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    print(f"Additional Epoch [{epoch+1}/8] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    torch.save(model.state_dict(), f"best_resnet18_extended_epoch{epoch+1}.pth")

Batch 60: Loss = 0.4394
Batch 120: Loss = 1.1485
Batch 180: Loss = 0.2155
Batch 240: Loss = 0.4654
Batch 300: Loss = 0.5661
Batch 360: Loss = 0.2631
Batch 420: Loss = 0.7390
Batch 480: Loss = 0.4772
Batch 540: Loss = 0.4001
Batch 600: Loss = 0.5324
Batch 660: Loss = 0.4020
Batch 720: Loss = 0.4426
Batch 780: Loss = 0.3682
Batch 840: Loss = 0.8991
Batch 900: Loss = 0.4453
Batch 960: Loss = 0.4470
Batch 1020: Loss = 0.2539
Batch 1080: Loss = 0.6637
Batch 1140: Loss = 0.2144
Batch 1200: Loss = 0.6035
Batch 1260: Loss = 0.3757
Batch 1320: Loss = 0.3518
Batch 1380: Loss = 0.6322
Batch 1440: Loss = 0.5881
Batch 1500: Loss = 0.4711
Batch 1560: Loss = 0.4010
Batch 1620: Loss = 0.8366
Batch 1680: Loss = 0.6814
Batch 1740: Loss = 0.9387
Batch 1800: Loss = 0.8650
Batch 1860: Loss = 0.4678
Batch 1920: Loss = 0.9068
Batch 1980: Loss = 0.4785
Batch 2040: Loss = 0.6485
Batch 2100: Loss = 0.5417
Batch 2160: Loss = 0.4534
Batch 2220: Loss = 0.4629
Batch 2280: Loss = 0.3838
Batch 2340: Loss = 0.4416
Bat

In [9]:
learning_rates = [0.0001,0.001, 0.01, 0.1]
batch_sizes = [40, 60, 80, 100]
EPOCHS = 10
for lr in learning_rates:
    for bs in batch_sizes:
        train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=2)
        val_loader   = DataLoader(val_dataset, batch_size=bs, shuffle=False, num_workers=2)
        test_loader  = DataLoader(test_dataset, batch_size=bs, shuffle=False, num_workers=2)
        
        # Initialize model and optimizer for current hyper-parameter combination
        model = ResNet18(num_classes=NUM_CLASSES).to(device)
        optimizer = optim.Adam(model.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()
        
        best_val_acc = 0.0
        checkpoint = f"resnet18_lr{lr}_bs{bs}_best.pth"
        
        for epoch in range(EPOCHS):
            print(f"Epoch {epoch+1}/{EPOCHS} LR {lr} and batch_size {bs}")
            train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
            val_loss, val_acc = evaluate(model, val_loader, criterion)
            print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
                  f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), checkpoint)
        
        model.load_state_dict(torch.load(checkpoint))
        test_loss, test_acc = evaluate(model, test_loader, criterion)
        print(f"Final Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")        

Epoch 1/10 LR 0.0001 and batch_size 40



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.4 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<string>", line 1, in <module>
  File "/usr/local/Cellar/python@3.11/3.11.9_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/usr/local/Cellar/python@3.11/3.11.9_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
  File "/Users/surenmnatsakanyan/Desktop/Warsaw_Courses/Semester2/Deep Learning/CNN_1-Project1/v

In [45]:
# Implementing weight decay
weight_decays = [0.0001, 0.001, 0.01, 0.1]
lr = 0.001
bs = 100
EPOCHS = 13
for wd in weight_decays:
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=2)
    val_loader   = DataLoader(val_dataset, batch_size=bs, shuffle=False, num_workers=2)
    test_loader  = DataLoader(test_dataset, batch_size=bs, shuffle=False, num_workers=2)
        
    # Initialize model and optimizer for current hyper-parameter combination
    model = ResNet18(num_classes=NUM_CLASSES).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=wd)
    criterion = nn.CrossEntropyLoss()
        
    best_val_acc = 0.0
    checkpoint = f"resnet18_lr{lr}_bs{bs}_weightDc{wd}.pth"
        
    for epoch in range(EPOCHS):
        print(f"Epoch {epoch+1}/{EPOCHS} LR {lr} and batch_size {bs} wd {wd}")
        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
        val_loss, val_acc = evaluate(model, val_loader, criterion)
        print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
                  f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
        if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), checkpoint)
        
    model.load_state_dict(torch.load(checkpoint))
    test_loss, test_acc = evaluate(model, test_loader, criterion)
    print(f"Final Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

Epoch 1/13 LR 0.001 and batch_size 100
Batch 60: Loss = 1.8833
Batch 120: Loss = 1.6927
Batch 180: Loss = 1.6174
Batch 240: Loss = 1.6187
Batch 300: Loss = 1.4798
Batch 360: Loss = 1.4686
Batch 420: Loss = 1.4245
Batch 480: Loss = 1.4150
Batch 540: Loss = 1.3438
Batch 600: Loss = 1.3819
Batch 660: Loss = 1.4622
Batch 720: Loss = 1.6318
Batch 780: Loss = 1.3914
Batch 840: Loss = 1.3612
Batch 900: Loss = 1.4747
  Train Loss: 1.5375 | Train Acc: 43.54% | Val Loss: 1.6150 | Val Acc: 42.62%
Epoch 2/13 LR 0.001 and batch_size 100
Batch 60: Loss = 1.3586
Batch 120: Loss = 1.2434
Batch 180: Loss = 1.3241
Batch 240: Loss = 1.2270
Batch 300: Loss = 1.3583
Batch 360: Loss = 1.2931
Batch 420: Loss = 1.4432
Batch 480: Loss = 1.2270
Batch 540: Loss = 1.2552
Batch 600: Loss = 0.9872
Batch 660: Loss = 1.2961
Batch 720: Loss = 1.3244
Batch 780: Loss = 1.3519
Batch 840: Loss = 1.2644
Batch 900: Loss = 1.2218
  Train Loss: 1.2592 | Train Acc: 54.28% | Val Loss: 1.4024 | Val Acc: 48.62%
Epoch 3/13 LR 0.00

In [7]:
class ResNet18_Dropout(nn.Module):
    def __init__(self, num_classes=10, dropout_prob=0.5):
        super(ResNet18_Dropout, self).__init__()
        # Conv1: 7x7, stride=2, padding=3
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        
        # MaxPool: 3x3, stride=2, padding=1
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        # Residual layers
        self.layer1 = self._make_layer(64, 64, 2, stride=1)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)
        self.layer4 = self._make_layer(256, 512, 2, stride=2)
        
        # Global Average Pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # Dropout layer added before the FC layer
        self.dropout = nn.Dropout(p=dropout_prob)
        # Fully Connected Layer
        self.fc = nn.Linear(512, num_classes)
    
    def _make_layer(self, in_channels, out_channels, blocks, stride):
        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride))
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)  # Apply dropout here
        x = self.fc(x)
        return x

In [51]:
lr = 0.001
bs = 100
EPOCHS = 18
model = ResNet18_Dropout(num_classes=NUM_CLASSES).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=bs, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset, batch_size=bs, shuffle=False, num_workers=2)  
checkpoint = f"resnet18_lr{lr}_bs{bs}_drop_out_0.5.pth"
best_val_acc = 0.0
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}/{EPOCHS}")
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), checkpoint)
        
model.load_state_dict(torch.load(checkpoint))
test_loss, test_acc = evaluate(model, test_loader, criterion)
print(f"\nFinal Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

Epoch 1/18
Batch 60: Loss = 1.9162
Batch 120: Loss = 2.0143
Batch 180: Loss = 1.8944
Batch 240: Loss = 1.6966
Batch 300: Loss = 1.6603
Batch 360: Loss = 1.5144
Batch 420: Loss = 1.4640
Batch 480: Loss = 1.3634
Batch 540: Loss = 1.5885
Batch 600: Loss = 1.4111
Batch 660: Loss = 1.4478
Batch 720: Loss = 1.4875
Batch 780: Loss = 1.4945
Batch 840: Loss = 1.4675
Batch 900: Loss = 1.4661
Epoch [1/18] Train Loss: 1.6176 | Train Acc: 41.15% | Val Loss: 1.4402 | Val Acc: 46.64%
Epoch 2/18
Batch 60: Loss = 1.2241
Batch 120: Loss = 1.3590
Batch 180: Loss = 1.3764
Batch 240: Loss = 1.1423
Batch 300: Loss = 1.3065
Batch 360: Loss = 1.1892
Batch 420: Loss = 1.3948
Batch 480: Loss = 1.2771
Batch 540: Loss = 1.4246
Batch 600: Loss = 1.4870
Batch 660: Loss = 1.2383
Batch 720: Loss = 1.3962
Batch 780: Loss = 1.3794
Batch 840: Loss = 1.1717
Batch 900: Loss = 1.4169
Epoch [2/18] Train Loss: 1.2982 | Train Acc: 53.12% | Val Loss: 1.2917 | Val Acc: 54.04%
Epoch 3/18
Batch 60: Loss = 0.9653
Batch 120: Loss =

In [60]:
num_train = len(train_dataset)
few_shot_ratio = 0.2
few_shot_size = int(num_train * few_shot_ratio)
few_shot_indices = np.random.choice(num_train, few_shot_size, replace=False)
few_shot_train_dataset = Subset(train_dataset, few_shot_indices)
len(few_shot_train_dataset)

18000

In [62]:
# Few-shot DataLoader: using the reduced dataset
lr = 0.001
bs = 100
EPOCHS = 10
few_shot_train_loader = DataLoader(few_shot_train_dataset, batch_size=bs, shuffle=True, num_workers=2)
val_loader  = DataLoader(val_dataset, batch_size=bs, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=bs, shuffle=False, num_workers=2)
model = ResNet18(num_classes=NUM_CLASSES).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
print(f"Few-Show Learning on 18000 samples")
checkpoint = f"resnet18_lr{lr}_bs{bs}_few_shot-18000.pth"
best_val_acc = 0.0
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}/{EPOCHS}")
    train_loss, train_acc = train_one_epoch(model, few_shot_train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), checkpoint)
        
model.load_state_dict(torch.load(checkpoint))
test_loss, test_acc = evaluate(model, test_loader, criterion)
print(f"\nFinal Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

Few-Show Learning on 18000 samples
Epoch 1/10
Batch 60: Loss = 1.8033
Batch 120: Loss = 1.7145
Batch 180: Loss = 1.9002
Epoch [1/10] Train Loss: 1.8200 | Train Acc: 33.17% | Val Loss: 1.8907 | Val Acc: 31.27%
Epoch 2/10
Batch 60: Loss = 1.6396
Batch 120: Loss = 1.6243
Batch 180: Loss = 1.6345
Epoch [2/10] Train Loss: 1.5981 | Train Acc: 41.15% | Val Loss: 1.6817 | Val Acc: 39.10%
Epoch 3/10
Batch 60: Loss = 1.4286
Batch 120: Loss = 1.5280
Batch 180: Loss = 1.4164
Epoch [3/10] Train Loss: 1.4772 | Train Acc: 45.71% | Val Loss: 1.6992 | Val Acc: 40.28%
Epoch 4/10
Batch 60: Loss = 1.4132
Batch 120: Loss = 1.4344
Batch 180: Loss = 1.2734
Epoch [4/10] Train Loss: 1.3653 | Train Acc: 50.02% | Val Loss: 1.8454 | Val Acc: 36.40%
Epoch 5/10
Batch 60: Loss = 1.2202
Batch 120: Loss = 0.9384
Batch 180: Loss = 1.2721
Epoch [5/10] Train Loss: 1.2746 | Train Acc: 54.22% | Val Loss: 1.4987 | Val Acc: 46.19%
Epoch 6/10
Batch 60: Loss = 1.0798
Batch 120: Loss = 1.2046
Batch 180: Loss = 0.8470
Epoch [6/1

In [11]:
#Let's test also Spatial Dropout
class ResNet18_SpatialDropout(nn.Module):
    def __init__(self, num_classes=10, dropout_prob=0.5):
        super(ResNet18_SpatialDropout, self).__init__()
        # Conv1: 7x7, stride=2, padding=3
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1   = nn.BatchNorm2d(64)
        self.relu  = nn.ReLU(inplace=True)
        # MaxPool: 3x3, stride=2, padding=1
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # Residual layers
        self.layer1 = self._make_layer(64, 64, blocks=2, stride=1)    # Output: (N, 64, 8, 8)
        self.layer2 = self._make_layer(64, 128, blocks=2, stride=2)   # Output: (N, 128, 4, 4)
        # Apply Spatial Dropout on intermediate feature maps after Layer 2
        self.spatial_dropout = nn.Dropout2d(p=dropout_prob)
        self.layer3 = self._make_layer(128, 256, blocks=2, stride=2)  # Output: (N, 256, 2, 2)
        self.layer4 = self._make_layer(256, 512, blocks=2, stride=2)  # Output: (N, 512, 1, 1)
        # Global Average Pooling and Fully Connected Layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)
    
    def _make_layer(self, in_channels, out_channels, blocks, stride):
        layers = []
        # The first block in the layer may downsample and/or change channels.
        layers.append(ResidualBlock(in_channels, out_channels, stride))
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x))) 
        x = self.maxpool(x)                   
        x = self.layer1(x)          
        x = self.layer2(x)               
        x = self.spatial_dropout(x)   
        x = self.layer3(x)           
        x = self.layer4(x)              
        x = self.avgpool(x)                
        x = torch.flatten(x, 1)               
        x = self.fc(x)                         
        return x

In [21]:
train_transform = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomCrop(32, padding=4),
        transforms.RandomRotation(degrees=15),
        transforms.ToTensor()
    ]
)
train_dataset = ImageFolder(TRAIN_DIR, transform=train_transform)
val_dataset   = ImageFolder(VAL_DIR, transform=basic_transform)
test_dataset  = ImageFolder(TEST_DIR, transform=basic_transform)
lr = 0.001
bs = 100
train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers= 2)
val_loader  = DataLoader(val_dataset, batch_size=bs, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=bs, shuffle=False, num_workers=2)
model = ResNet18_SpatialDropout(num_classes=10, dropout_prob=0.5).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [16]:
EPOCHS = 13
best_val_acc = 0.0
checkpoint = f"resnet18_lr{lr}_bs{bs}_data_aug_spatial_dropout.pth"
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}/{EPOCHS}")
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), checkpoint)
        
model.load_state_dict(torch.load(checkpoint))
test_loss, test_acc = evaluate(model, test_loader, criterion)
print(f"\nFinal Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

Epoch 1/13
Batch 60: Loss = 2.0431
Batch 120: Loss = 1.8371
Batch 180: Loss = 1.7491
Batch 240: Loss = 1.9268
Batch 300: Loss = 1.6043
Batch 360: Loss = 1.6432
Batch 420: Loss = 1.7053
Batch 480: Loss = 1.6597
Batch 540: Loss = 1.5237
Batch 600: Loss = 1.5422
Batch 660: Loss = 1.6333
Batch 720: Loss = 1.7824
Batch 780: Loss = 1.6826
Batch 840: Loss = 1.6450
Batch 900: Loss = 1.5491
Epoch [1/13] Train Loss: 1.7680 | Train Acc: 34.10% | Val Loss: 1.7762 | Val Acc: 37.77%
Epoch 2/13
Batch 60: Loss = 1.4788
Batch 120: Loss = 1.7629
Batch 180: Loss = 1.5412
Batch 240: Loss = 1.5569
Batch 300: Loss = 1.6144
Batch 360: Loss = 1.6789
Batch 420: Loss = 1.4230
Batch 480: Loss = 1.4854
Batch 540: Loss = 1.5872
Batch 600: Loss = 1.5668
Batch 660: Loss = 1.4766
Batch 720: Loss = 1.4374
Batch 780: Loss = 1.4660
Batch 840: Loss = 1.3530
Batch 900: Loss = 1.4471
Epoch [2/13] Train Loss: 1.5248 | Train Acc: 44.14% | Val Loss: 1.4378 | Val Acc: 47.57%
Epoch 3/13
Batch 60: Loss = 1.4244
Batch 120: Loss =

In [18]:
# Extra 10 epochs
EPOCHS = 10
best_val_acc = 0.0
model.load_state_dict(torch.load("drop-out/resnet18_lr0.001_bs100_data_aug_spatial_dropout.pth"))
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}/{EPOCHS}")
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), checkpoint)
        
model.load_state_dict(torch.load(checkpoint))
test_loss, test_acc = evaluate(model, test_loader, criterion)
print(f"\nFinal Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

Epoch 1/10
Batch 60: Loss = 1.0699
Batch 120: Loss = 1.0164
Batch 180: Loss = 0.8923
Batch 240: Loss = 0.9853
Batch 300: Loss = 0.9955
Batch 360: Loss = 0.9890
Batch 420: Loss = 0.9817
Batch 480: Loss = 0.9759
Batch 540: Loss = 0.8682
Batch 600: Loss = 1.0499
Batch 660: Loss = 1.0242
Batch 720: Loss = 0.8905
Batch 780: Loss = 1.0600
Batch 840: Loss = 0.9122
Batch 900: Loss = 1.0407
Epoch [1/10] Train Loss: 1.0202 | Train Acc: 63.90% | Val Loss: 1.0276 | Val Acc: 63.82%
Epoch 2/10
Batch 60: Loss = 1.0698
Batch 120: Loss = 0.9862
Batch 180: Loss = 1.0656
Batch 240: Loss = 1.1703
Batch 300: Loss = 1.1025
Batch 360: Loss = 0.8921
Batch 420: Loss = 0.8121
Batch 480: Loss = 1.0352
Batch 540: Loss = 1.0161
Batch 600: Loss = 0.9526
Batch 660: Loss = 1.0170
Batch 720: Loss = 1.3744
Batch 780: Loss = 1.0009
Batch 840: Loss = 1.0095
Batch 900: Loss = 0.9669
Epoch [2/10] Train Loss: 1.0024 | Train Acc: 64.52% | Val Loss: 0.9854 | Val Acc: 64.56%
Epoch 3/10
Batch 60: Loss = 0.9003
Batch 120: Loss =

OSError: Caught OSError in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "/usr/local/Cellar/jupyterlab/4.3.1_1/libexec/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
           ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/Cellar/jupyterlab/4.3.1_1/libexec/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "/usr/local/Cellar/jupyterlab/4.3.1_1/libexec/lib/python3.12/site-packages/torchvision/datasets/folder.py", line 229, in __getitem__
    sample = self.loader(path)
             ^^^^^^^^^^^^^^^^^
  File "/usr/local/Cellar/jupyterlab/4.3.1_1/libexec/lib/python3.12/site-packages/torchvision/datasets/folder.py", line 268, in default_loader
    return pil_loader(path)
           ^^^^^^^^^^^^^^^^
  File "/usr/local/Cellar/jupyterlab/4.3.1_1/libexec/lib/python3.12/site-packages/torchvision/datasets/folder.py", line 247, in pil_loader
    img = Image.open(f)
          ^^^^^^^^^^^^^
  File "/usr/local/Cellar/jupyterlab/4.3.1_1/libexec/lib/python3.12/site-packages/PIL/Image.py", line 3480, in open
    prefix = fp.read(16)
             ^^^^^^^^^^^
OSError: [Errno 11] Resource deadlock avoided


In [22]:
model.load_state_dict(torch.load("drop-out/resnet18_lr0.001_bs100_data_aug_spatial_dropout_acc_67.pth"))
test_loss, test_acc = evaluate(model, test_loader, criterion)
print(f"\nFinal Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")


Final Test Loss: 0.9168, Test Acc: 67.52%


In [20]:
# Extra ten epochs
EPOCHS = 10
best_val_acc = 0.0
model.load_state_dict(torch.load("drop-out/the-best-resnet18_lr0.001_bs100_data_aug_spatial_dropout.pth"))
checkpoint = f"resnet18_lr{lr}_bs{bs}_data_aug_spatial_dropout.pth"
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}/{EPOCHS}")
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    
    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), checkpoint)
        
model.load_state_dict(torch.load(checkpoint))
test_loss, test_acc = evaluate(model, test_loader, criterion)
print(f"\nFinal Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")

Epoch 1/10
Batch 60: Loss = 0.7987
Batch 120: Loss = 0.9822
Batch 180: Loss = 0.9593
Batch 240: Loss = 0.8798
Batch 300: Loss = 0.7022
Batch 360: Loss = 0.9033
Batch 420: Loss = 0.8978
Batch 480: Loss = 0.9722
Batch 540: Loss = 0.9636
Batch 600: Loss = 0.8018
Batch 660: Loss = 0.8686
Batch 720: Loss = 0.8972
Batch 780: Loss = 1.0834
Batch 840: Loss = 0.9211
Batch 900: Loss = 0.9056
Epoch [1/10] Train Loss: 0.9247 | Train Acc: 67.13% | Val Loss: 1.1080 | Val Acc: 62.00%
Epoch 2/10
Batch 60: Loss = 1.0011
Batch 120: Loss = 1.1084
Batch 180: Loss = 0.8666
Batch 240: Loss = 0.9083
Batch 300: Loss = 0.9009
Batch 360: Loss = 1.0323
Batch 420: Loss = 0.7668
Batch 480: Loss = 0.9138
Batch 540: Loss = 0.9409
Batch 600: Loss = 0.8340
Batch 660: Loss = 0.7921
Batch 720: Loss = 0.9307
Batch 780: Loss = 0.9577
Batch 840: Loss = 0.9988
Batch 900: Loss = 0.8040
Epoch [2/10] Train Loss: 0.9177 | Train Acc: 67.49% | Val Loss: 0.9761 | Val Acc: 65.50%
Epoch 3/10
Batch 60: Loss = 0.7785
Batch 120: Loss =

In [None]:
# For final metric if issue arises please try it on Colab
model.load_state_dict(torch.load("drop-out/resnet18_lr0.001_bs100_data_aug_spatial_dropout_72.pth", map_location=torch.device('cpu')))
model.eval()

all_preds = []
all_true = []
all_probs = []  

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        probs = F.softmax(outputs, dim=1)
        preds = probs.argmax(dim=1)
        
        all_preds.extend(preds.cpu().numpy())
        all_true.extend(labels.cpu().numpy())
        all_probs.append(probs.cpu())

# Concatenate all probability tensors
all_probs = torch.cat(all_probs, dim=0)  # Shape: (N, num_classes)

# Compute overall metrics using sklearn
accuracy = accuracy_score(all_true, all_preds)
precision = precision_score(all_true, all_preds, average='macro')
recall = recall_score(all_true, all_preds, average='macro')
f1 = f1_score(all_true, all_preds, average='macro')

print("Classification Report:")
print(classification_report(all_true, all_preds, digits=4))

In [None]:
# For Plotting
# Adjust the path if needed
log_file_path = "logs/weight_decay_logs.txt"
results = defaultdict(lambda: defaultdict(dict))
header_pattern = re.compile(r"Epoch\s+1/\d+\s+LR\s+([\d\.eE+-]+)\s+and\s+batch_size\s+(\d+)\s+wd\s+([\d\.]+)")
train_val_pattern = re.compile(
    r"^\s*Train Loss:\s*([\d\.]+)\s*\|\s*Train Acc:\s*([\d\.]+)%\s*\|\s*Val Loss:\s*([\d\.]+)\s*\|\s*Val Acc:\s*([\d\.]+)%"
)

current_wd = None
current_epoch = 0
with open(log_file_path, "r") as f:
    for line in f:
        header_match = header_pattern.search(line)
        if header_match:
            current_lr = float(header_match.group(1))
            current_bs = int(header_match.group(2))
            current_wd = float(header_match.group(3))
            print(current_wd)
            current_epoch = 0
            continue

        tv_match = train_val_pattern.search(line)
        if tv_match and current_lr is not None and current_bs is not None:
            current_epoch+=1;
            train_loss_str = tv_match.group(1)
            train_acc_str  = tv_match.group(2)
            val_loss_str   = tv_match.group(3)
            val_acc_str    = tv_match.group(4)

            # Convert to floats
            train_acc = float(train_acc_str)
            val_acc   = float(val_acc_str)

            # Store in our results dictionary
            results[current_wd][current_epoch] = {
                "train_acc": train_acc,
                "val_acc":   val_acc
            }
    

In [None]:
num_wds = len(results)
fig, axs = plt.subplots(nrows=num_wds, ncols=1, figsize=(10, 5*num_wds), sharex=True)
if num_wds == 1:
    axs = [axs]

for ax, wd in zip(axs, sorted(results.keys())):
    epochs = sorted(results[wd].keys())
    train_accs = [results[wd][e]["train_acc"] for e in epochs]
    val_accs = [results[wd][e]["val_acc"] for e in epochs]

    ax.plot(epochs, train_accs, label="Train Accuracy", linestyle='-', marker='o')
    ax.plot(epochs, val_accs, label="Validation Accuracy", linestyle='--', marker='x')
    ax.set_title(f"Weight Decay: {wd}")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Accuracy (%)")
    ax.legend()
    ax.grid(True)

plt.tight_layout()
plt.savefig("weight_decay_parm_validation_test.png")
plt.show()

In [None]:
# Adjust the path if needed
log_file_path = "logs/few-shots-18000.txt"
results = defaultdict(lambda: defaultdict(dict))
header_pattern = re.compile(r"Epoch\s+1/\d+")
train_val_pattern = re.compile(
    r"Epoch\s*\[(\d+)/(\d+)\]\s+Train Loss:\s*([\d\.]+)\s*\|\s*Train Acc:\s*([\d\.]+)%\s*\|\s*Val Loss:\s*([\d\.]+)\s*\|\s*Val Acc:\s*([\d\.]+)%"
)
current_epoch = 0
with open(log_file_path, "r") as f:
    for line in f:
        header_match = header_pattern.search(line)
        if header_match:
            print("in")
            current_epoch = 0
            continue

        tv_match = train_val_pattern.search(line)
        if tv_match and current_lr is not None:
            current_epoch+=1;
            train_acc_str  = tv_match.group(4)
            val_acc_str   = tv_match.group(6)

            # Convert to floats
            train_acc = float(train_acc_str)
            val_acc   = float(val_acc_str)

            # Store in our results dictionary
            results[current_epoch] = {
                "train_acc": train_acc,
                "val_acc":   val_acc
            }
           

In [None]:
epochs = len(results)

plt.figure(figsize=(10,6))
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Few Shots learning(18000samples) lr 0.001 bs 100")
plt.plot(results.keys(), [results[e]["train_acc"] for e in results.keys()], linestyle="-", marker=".") 
plt.plot(results.keys(), [results[e]["val_acc"] for e in results.keys()], linestyle="--", marker="x") 
plt.savefig("few_shots_lr_0001_bs_100.png")
plt.grid(True)
plt.show()

In [None]:
log_file_paths = [
    "logs/data_aug_spatial_dropot_lr_0001_bs_100.txt",
    "logs/data_aug_spatial_dropout_lr_0001_bs_100_extra10.txt",
    "logs/data_aug_spatial_droput_lr_0001_bs_100._extra10_after10.txt",
    "logs/colab_1_st_30ep_data_aug_spatial_drop_out_lr_0001_bs_100.txt",
    "logs/colab_2nd_40ep_data_aug_spatial_drop_out_lr_0001_bs_100.txt"
]
results = defaultdict(lambda: defaultdict(dict))
header_pattern = re.compile(r"Epoch\s+1/10\s+m")
train_val_pattern = re.compile(
    r"Epoch\s*\[(\d+)/(\d+)\]\s+Train Loss:\s*([\d\.]+)\s*\|\s*Train Acc:\s*([\d\.]+)%\s*\|\s*Val Loss:\s*([\d\.]+)\s*\|\s*Val Acc:\s*([\d\.]+)%"
)
current_epoch = 0
for file_path in log_file_paths:
    with open(file_path, "r") as f:
        for line in f:
            tv_match = train_val_pattern.search(line)
            if tv_match is not None:
                current_epoch+=1;
                train_acc_str  = tv_match.group(4)
                val_acc_str   = tv_match.group(6)

            # Convert to floats
                train_acc = float(train_acc_str)
                val_acc   = float(val_acc_str)

            # Store in our results dictionary
                results[current_epoch] = {
                    "train_acc": train_acc,
                    "val_acc":   val_acc
                }
               

In [None]:
results.keys()
epochs = len(results)

plt.figure(figsize=(10, 6))
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Data Augemnt. Spartial Drp. Out lr 0.001 bs 100")
plt.plot(results.keys(), [results[e]["train_acc"] for e in results.keys()], linestyle="-", marker=".")
plt.plot(results.keys(), [results[e]["val_acc"] for e in results.keys()], linestyle="--", marker="x")
plt.savefig("Data_Augument_Spatial_Drp_lr_0001_bs_100.png")
plt.grid(True)
plt.show()

In [None]:
log_file_paths = [
    "logs/5_epoch_trail_bs_20_lr_0001.txt",
    "logs/8_additional_epoch_trail_bs_20_lr_0001.txt",
]

results = defaultdict(lambda: defaultdict(dict))
header_pattern = re.compile(r"Epoch\s+1/10\s+m")
train_val_pattern = re.compile(
    r"Epoch\s*\[(\d+)/(\d+)\]\s+Train Loss:\s*([\d\.]+)\s*\|\s*Train Acc:\s*([\d\.]+)%\s*\|\s*Val Loss:\s*([\d\.]+)\s*\|\s*Val Acc:\s*([\d\.]+)%"
)
current_epoch = 0
for file_path in log_file_paths:
    with open(file_path, "r") as f:
        for line in f:
            tv_match = train_val_pattern.search(line)
            if tv_match is not None:
                current_epoch+=1;
                train_acc_str  = tv_match.group(4)
                val_acc_str   = tv_match.group(6)
                train_acc = float(train_acc_str)
                val_acc   = float(val_acc_str)
                results[current_epoch] = {
                    "train_acc": train_acc,
                    "val_acc":   val_acc
                }
  

In [None]:
epochs = len(results)

plt.figure(figsize=(10,6))
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Batche size 20 learning rate 0.001")
plt.plot(results.keys(), [results[e]["train_acc"] for e in results.keys()], linestyle="-", marker=".") 
plt.plot(results.keys(), [results[e]["val_acc"] for e in results.keys()], linestyle="--", marker="x") 
plt.savefig("Bs_20_lr_0001_accuracy.png")
plt.grid(True)
plt.show()