# Computer Vision Assignment 2
## ResNet18 Training and Network Visualization

This notebook implements the complete CV Assignment 2 with:
- Part 1: Convolutional Blocks of ResNet18 (Baseline, Resized, Modified Architecture)
- Part 2: Network Visualization (Saliency Maps, Adversarial Attacks)

## Setup and Imports

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
import wandb
import os
from pathlib import Path
import time
import warnings
from collections import defaultdict
warnings.filterwarnings('ignore')

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
print(f"PyTorch version: {torch.__version__}")

ModuleNotFoundError: No module named 'torch'

In [None]:
# Initialize Wandb
try:
    wandb.login()
    wandb_available = True
except:
    wandb_available = False
    print("Warning: Wandb login failed. Continuing without Wandb logging.")

print("Setup complete!")

## Part 1: Convolutional Blocks of ResNet18

### Section 1.1: Baseline Training ResNet (36×36 images)

#### 1.1.1 Data Loading and Utilities

In [None]:
def load_q1_dataset(data_dir, image_size, split='train'):
    """Load Q1 dataset from .pt files and resize if needed"""
    data_path = os.path.join(data_dir, f"{split}_data.pt")
    labels_path = os.path.join(data_dir, f"{split}_labels.pt")
    
    data = torch.load(data_path).float() / 255.0  # Normalize to [0,1]
    labels = torch.load(labels_path).long()
    
    # Determine number of classes
    num_classes = len(torch.unique(labels))
    
    print(f"{split.upper()} Data shape: {data.shape}, Labels shape: {labels.shape}")
    print(f"Original image size: {data.shape[2]}x{data.shape[3]}, Num classes: {num_classes}")
    
    # Resize if needed
    if data.shape[2] != image_size:
        print(f"Resizing images from {data.shape[2]}x{data.shape[3]} to {image_size}x{image_size}")
        resized_data = []
        for idx, img in enumerate(data):
            # img shape: (C, H, W)
            img_pil = transforms.ToPILImage()(img)
            resized_img = transforms.Resize((image_size, image_size))(img_pil)
            resized_data.append(transforms.ToTensor()(resized_img))
        data = torch.stack(resized_data)
        print(f"Resized data shape: {data.shape}")
    
    # ImageNet normalization
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    data = (data - mean) / std
    
    dataset = TensorDataset(data, labels)
    return dataset, num_classes

def create_dataloader(dataset, batch_size=32, shuffle=True):
    """Create DataLoader from dataset"""
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=0, pin_memory=True)

# Load datasets
dataset_dir = "CV S26 A2 Datasets/Q1"
image_size_36 = 36
image_size_224 = 224

print("\n=== Loading Q1 Dataset (36x36) ===")
train_dataset_36, num_classes = load_q1_dataset(dataset_dir, image_size_36, 'train')
test_dataset_36, _ = load_q1_dataset(dataset_dir, image_size_36, 'test')

# Create dataloaders
batch_size = 32
train_loader_36 = create_dataloader(train_dataset_36, batch_size=batch_size, shuffle=True)
test_loader_36 = create_dataloader(test_dataset_36, batch_size=batch_size, shuffle=False)

print(f"\nNumber of classes: {num_classes}")
print(f"Number of training batches (36x36): {len(train_loader_36)}")
print(f"Number of test batches (36x36): {len(test_loader_36)}")

In [None]:
# Load 224x224 dataset for later use
print("\n=== Loading Q1 Dataset (224x224) ===")
train_dataset_224, _ = load_q1_dataset(dataset_dir, image_size_224, 'train')
test_dataset_224, _ = load_q1_dataset(dataset_dir, image_size_224, 'test')

train_loader_224 = create_dataloader(train_dataset_224, batch_size=batch_size, shuffle=True)
test_loader_224 = create_dataloader(test_dataset_224, batch_size=batch_size, shuffle=False)

print(f"\nNumber of training batches (224x224): {len(train_loader_224)}")
print(f"Number of test batches (224x224): {len(test_loader_224)}")

#### 1.1.2 Training Loop and Evaluation Functions

In [None]:
class EarlyStopping:
    """Early stopping to prevent overfitting"""
    def __init__(self, patience=10, verbose=True):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.best_model = None

    def __call__(self, val_loss, model):
        if self.best_score is None:
            self.best_score = val_loss
            self.best_model = model.state_dict().copy()
        elif val_loss < self.best_score:
            self.best_score = val_loss
            self.counter = 0
            self.best_model = model.state_dict().copy()
        else:
            self.counter += 1
            if self.verbose and self.counter % 5 == 0:
                print(f"EarlyStopping counter: {self.counter}/{self.patience}")

    def should_stop(self):
        return self.counter >= self.patience

def train_epoch(model, train_loader, criterion, optimizer, device):
    """Train for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def evaluate(model, test_loader, criterion, device):
    """Evaluate model on test set"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    
    return epoch_loss, epoch_acc, f1, all_preds, all_labels

def train_model(model, train_loader, test_loader, criterion, optimizer, scheduler, 
                 num_epochs=50, device=None, model_name="model", use_wandb=False):
    """Complete training pipeline with early stopping"""
    if device is None:
        device = torch.device('cpu')
    
    model = model.to(device)
    early_stopping = EarlyStopping(patience=10, verbose=False)
    
    history = {
        'train_loss': [],
        'train_acc': [],
        'test_loss': [],
        'test_acc': [],
        'test_f1': []
    }
    
    start_time = time.time()
    
    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        test_loss, test_acc, test_f1, _, _ = evaluate(model, test_loader, criterion, device)
        
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['test_loss'].append(test_loss)
        history['test_acc'].append(test_acc)
        history['test_f1'].append(test_f1)
        
        if scheduler is not None:
            scheduler.step()
        
        early_stopping(test_loss, model)
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}] | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
                  f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.4f} | F1: {test_f1:.4f}")
        
        if use_wandb and wandb_available:
            wandb.log({
                'epoch': epoch + 1,
                'train_loss': train_loss,
                'train_acc': train_acc,
                'test_loss': test_loss,
                'test_acc': test_acc,
                'test_f1': test_f1
            })
        
        if early_stopping.should_stop():
            print(f"Early stopping at epoch {epoch+1}")
            model.load_state_dict(early_stopping.best_model)
            break
    
    training_time = time.time() - start_time
    
    # Final evaluation with best model
    final_loss, final_acc, final_f1, final_preds, final_labels = evaluate(model, test_loader, criterion, device)
    
    print(f"\n{'='*80}")
    print(f"Training {model_name} completed in {training_time/60:.2f} minutes")
    print(f"Final Test Accuracy: {final_acc:.4f} | Final Test Loss: {final_loss:.4f} | Final F1 Score: {final_f1:.4f}")
    print(f"{'='*80}\n")
    
    return model, history, final_acc, final_loss, final_f1, final_preds, final_labels, training_time

print("Training utilities defined!")

#### 1.1.3 ResNet18 from Scratch on 36×36 Images

In [None]:
# ResNet18 from scratch on 36x36 images
print("\n" + "="*80)
print("EXPERIMENT 1.1.1: ResNet18 from Scratch (36x36 images)")
print("="*80)

model_scratch_36 = models.resnet18(pretrained=False)
model_scratch_36.fc = nn.Linear(512, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_scratch_36.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_36x36_scratch", 
               config={'model': 'ResNet18', 'image_size': 36, 'pretrained': False, 'lr': 0.001})

model_scratch_36, hist_scratch_36, acc_scratch_36, loss_scratch_36, f1_scratch_36, \
    preds_scratch_36, labels_scratch_36, time_scratch_36 = train_model(
    model_scratch_36, train_loader_36, test_loader_36, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18 (Scratch, 36x36)", use_wandb=wandb_available
)

if wandb_available:
    wandb.finish()

#### 1.1.4 ResNet18 Pretrained on ImageNet (36×36)

In [None]:
print("\n" + "="*80)
print("EXPERIMENT 1.1.2: ResNet18 Pretrained on ImageNet (36x36 images)")
print("="*80)

model_pretrained_36 = models.resnet18(pretrained=True)
model_pretrained_36.fc = nn.Linear(512, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_pretrained_36.parameters(), lr=0.0001)  # Lower LR for pretrained
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_36x36_pretrained",
               config={'model': 'ResNet18', 'image_size': 36, 'pretrained': True, 'lr': 0.0001})

model_pretrained_36, hist_pretrained_36, acc_pretrained_36, loss_pretrained_36, f1_pretrained_36, \
    preds_pretrained_36, labels_pretrained_36, time_pretrained_36 = train_model(
    model_pretrained_36, train_loader_36, test_loader_36, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18 (Pretrained, 36x36)", use_wandb=wandb_available
)

if wandb_available:
    wandb.finish()

#### 1.1.5 Spatial Dimension Analysis

In [None]:
"""Analyze spatial dimensions through ResNet18 layers"""

def analyze_spatial_dimensions(model, image_size):
    """Hook-based spatial dimension analysis"""
    activations = {}
    
    def get_activation(name):
        def hook(model, input, output):
            if isinstance(output, torch.Tensor):
                activations[name] = output.shape
        return hook
    
    # Register hooks
    model.conv1.register_forward_hook(get_activation('conv1'))
    model.maxpool.register_forward_hook(get_activation('maxpool'))
    model.layer1.register_forward_hook(get_activation('layer1 (block1)'))
    model.layer2.register_forward_hook(get_activation('layer2 (block2)'))
    model.layer3.register_forward_hook(get_activation('layer3 (block3)'))
    model.layer4.register_forward_hook(get_activation('layer4 (block4)'))
    model.avgpool.register_forward_hook(get_activation('avgpool'))
    
    # Forward pass with dummy input
    model.eval()
    with torch.no_grad():
        dummy_input = torch.randn(1, 3, image_size, image_size).to(device)
        _ = model(dummy_input)
    
    return activations

print("\n" + "="*80)
print("SPATIAL DIMENSION ANALYSIS (36x36 input)")
print("="*80)

activations_36 = analyze_spatial_dimensions(model_scratch_36, 36)

dimension_table = []
for layer_name, shape in activations_36.items():
    spatial_dims = f"{shape[2]}x{shape[3]}" if len(shape) > 2 else "N/A"
    channels = shape[1] if len(shape) > 1 else "N/A"
    print(f"{layer_name:30} | Output Shape: {shape} | Spatial: {spatial_dims} | Channels: {channels}")
    dimension_table.append({'Layer': layer_name, 'Shape': shape, 'Spatial': spatial_dims})

print("\n" + "="*80)
print("SPATIAL DIMENSION ANALYSIS (224x224 input)")
print("="*80)

activations_224 = analyze_spatial_dimensions(model_scratch_36, 224)

for layer_name, shape in activations_224.items():
    spatial_dims = f"{shape[2]}x{shape[3]}" if len(shape) > 2 else "N/A"
    channels = shape[1] if len(shape) > 1 else "N/A"
    print(f"{layer_name:30} | Output Shape: {shape} | Spatial: {spatial_dims} | Channels: {channels}")

print("\nKEY OBSERVATION:")
print("36x36: Initial stride=2 + maxpool → 9x9")
print("224x224: Initial stride=2 + maxpool → 56x56")
print("This shows why 36x36 images lose information quickly - spatial dimensions reduce too fast.")

### Section 1.2: Training ResNet on Resized Images (224×224)

In [None]:
print("\n" + "="*80)
print("EXPERIMENT 1.2.1: ResNet18 from Scratch (224x224 images)")
print("="*80)

model_scratch_224 = models.resnet18(pretrained=False)
model_scratch_224.fc = nn.Linear(512, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_scratch_224.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_224x224_scratch",
               config={'model': 'ResNet18', 'image_size': 224, 'pretrained': False, 'lr': 0.001})

model_scratch_224, hist_scratch_224, acc_scratch_224, loss_scratch_224, f1_scratch_224, \
    preds_scratch_224, labels_scratch_224, time_scratch_224 = train_model(
    model_scratch_224, train_loader_224, test_loader_224, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18 (Scratch, 224x224)", use_wandb=wandb_available
)

if wandb_available:
    wandb.finish()

In [None]:
print("\n" + "="*80)
print("EXPERIMENT 1.2.2: ResNet18 Pretrained on ImageNet (224x224 images)")
print("="*80)

model_pretrained_224 = models.resnet18(pretrained=True)
model_pretrained_224.fc = nn.Linear(512, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_pretrained_224.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_224x224_pretrained",
               config={'model': 'ResNet18', 'image_size': 224, 'pretrained': True, 'lr': 0.0001})

model_pretrained_224, hist_pretrained_224, acc_pretrained_224, loss_pretrained_224, f1_pretrained_224, \
    preds_pretrained_224, labels_pretrained_224, time_pretrained_224 = train_model(
    model_pretrained_224, train_loader_224, test_loader_224, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18 (Pretrained, 224x224)", use_wandb=wandb_available
)

if wandb_available:
    wandb.finish()

In [None]:
# Cost-Benefit Analysis
print("\n" + "="*80)
print("COST-BENEFIT ANALYSIS: 36x36 vs 224x224")
print("="*80)

analysis_data = {
    'Model': ['ResNet18 Scratch 36x36', 'ResNet18 Scratch 224x224', 
              'ResNet18 Pretrained 36x36', 'ResNet18 Pretrained 224x224'],
    'Accuracy': [acc_scratch_36, acc_scratch_224, acc_pretrained_36, acc_pretrained_224],
    'Final Loss': [loss_scratch_36, loss_scratch_224, loss_pretrained_36, loss_pretrained_224],
    'F1 Score': [f1_scratch_36, f1_scratch_224, f1_pretrained_36, f1_pretrained_224],
    'Training Time (min)': [time_scratch_36/60, time_scratch_224/60, 
                            time_pretrained_36/60, time_pretrained_224/60]
}

import pandas as pd
comparison_df = pd.DataFrame(analysis_data)
print("\n", comparison_df.to_string(index=False))

print("\nKEY FINDINGS:")
acc_improvement = (acc_scratch_224 - acc_scratch_36) / acc_scratch_36 * 100
time_increase = (time_scratch_224 - time_scratch_36) / time_scratch_36 * 100
print(f"1. Accuracy improvement (scratch): {acc_improvement:.2f}%")
print(f"2. Training time increase: {time_increase:.2f}%")
print(f"3. Memory usage increases with image resolution (224x224 = ~36x larger)")
print(f"4. 224x224 allows better feature extraction in early layers (56x56 vs 9x9 after maxpool)")

### Section 1.3: Modifying ResNet18 Architecture for 36×36 Images

In [None]:
# Modified architecture versions
modified_results = {}

print("\n" + "="*80)
print("EXPERIMENT 1.3: ARCHITECTURAL MODIFICATIONS FOR 36x36 IMAGES")
print("="*80)

# Modification 1: Reduce kernel size and stride
class ResNet18_Mod1(nn.Module):
    """ResNet18 with kernel size 5 and stride 1 in conv1"""
    def __init__(self, num_classes, pretrained=False):
        super().__init__()
        self.base = models.resnet18(pretrained=pretrained)
        # Modify first conv layer: kernel 7→5, stride 2→1
        self.base.conv1 = nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2, bias=False)
        self.base.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        return self.base(x)

# Modification 2: Remove max pooling
class ResNet18_Mod2(nn.Module):
    """ResNet18 without max pooling"""
    def __init__(self, num_classes, pretrained=False):
        super().__init__()
        self.base = models.resnet18(pretrained=pretrained)
        self.base.conv1 = nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2, bias=False)
        # Replace maxpool with identity
        self.base.maxpool = nn.Identity()
        self.base.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        return self.base(x)

# Modification 3: Additional initial conv layer
class ResNet18_Mod3(nn.Module):
    """ResNet18 with additional initial conv layer"""
    def __init__(self, num_classes, pretrained=False):
        super().__init__()
        self.base = models.resnet18(pretrained=pretrained)
        self.base.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.base.fc = nn.Linear(512, num_classes)
    
    def forward(self, x):
        return self.base(x)

print("\nModification 1: Kernel size 5, Stride 1 (no maxpool reduction)")
print("-" * 60)

model_mod1_scratch = ResNet18_Mod1(num_classes, pretrained=False)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_mod1_scratch.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_Mod1_scratch_36x36",
               config={'model': 'ResNet18_Mod1', 'image_size': 36, 'pretrained': False})

model_mod1_scratch, hist_mod1_scratch, acc_mod1_scratch, loss_mod1_scratch, f1_mod1_scratch, \
    _, _, time_mod1_scratch = train_model(
    model_mod1_scratch, train_loader_36, test_loader_36, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18_Mod1 (Scratch, 36x36)", use_wandb=wandb_available
)
modified_results['Mod1_scratch'] = {'accuracy': acc_mod1_scratch, 'loss': loss_mod1_scratch, 'f1': f1_mod1_scratch}

if wandb_available:
    wandb.finish()

print("\nModification 1: Kernel size 5, Stride 1 (Pretrained)")
print("-" * 60)

model_mod1_pretrained = ResNet18_Mod1(num_classes, pretrained=True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_mod1_pretrained.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_Mod1_pretrained_36x36",
               config={'model': 'ResNet18_Mod1', 'image_size': 36, 'pretrained': True})

model_mod1_pretrained, hist_mod1_pretrained, acc_mod1_pretrained, loss_mod1_pretrained, f1_mod1_pretrained, \
    _, _, time_mod1_pretrained = train_model(
    model_mod1_pretrained, train_loader_36, test_loader_36, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18_Mod1 (Pretrained, 36x36)", use_wandb=wandb_available
)
modified_results['Mod1_pretrained'] = {'accuracy': acc_mod1_pretrained, 'loss': loss_mod1_pretrained, 'f1': f1_mod1_pretrained}

if wandb_available:
    wandb.finish()

In [None]:
print("\nModification 2: No Max Pooling (Scratch)")
print("-" * 60)

model_mod2_scratch = ResNet18_Mod2(num_classes, pretrained=False)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_mod2_scratch.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_Mod2_scratch_36x36",
               config={'model': 'ResNet18_Mod2', 'image_size': 36, 'pretrained': False})

model_mod2_scratch, hist_mod2_scratch, acc_mod2_scratch, loss_mod2_scratch, f1_mod2_scratch, \
    _, _, time_mod2_scratch = train_model(
    model_mod2_scratch, train_loader_36, test_loader_36, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18_Mod2 (Scratch, 36x36)", use_wandb=wandb_available
)
modified_results['Mod2_scratch'] = {'accuracy': acc_mod2_scratch, 'loss': loss_mod2_scratch, 'f1': f1_mod2_scratch}

if wandb_available:
    wandb.finish()

print("\nModification 2: No Max Pooling (Pretrained)")
print("-" * 60)

model_mod2_pretrained = ResNet18_Mod2(num_classes, pretrained=True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_mod2_pretrained.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_Mod2_pretrained_36x36",
               config={'model': 'ResNet18_Mod2', 'image_size': 36, 'pretrained': True})

model_mod2_pretrained, hist_mod2_pretrained, acc_mod2_pretrained, loss_mod2_pretrained, f1_mod2_pretrained, \
    _, _, time_mod2_pretrained = train_model(
    model_mod2_pretrained, train_loader_36, test_loader_36, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18_Mod2 (Pretrained, 36x36)", use_wandb=wandb_available
)
modified_results['Mod2_pretrained'] = {'accuracy': acc_mod2_pretrained, 'loss': loss_mod2_pretrained, 'f1': f1_mod2_pretrained}

if wandb_available:
    wandb.finish()

In [None]:
print("\nModification 3: Kernel size 3, Stride 1 (Scratch)")
print("-" * 60)

model_mod3_scratch = ResNet18_Mod3(num_classes, pretrained=False)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_mod3_scratch.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_Mod3_scratch_36x36",
               config={'model': 'ResNet18_Mod3', 'image_size': 36, 'pretrained': False})

model_mod3_scratch, hist_mod3_scratch, acc_mod3_scratch, loss_mod3_scratch, f1_mod3_scratch, \
    _, _, time_mod3_scratch = train_model(
    model_mod3_scratch, train_loader_36, test_loader_36, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18_Mod3 (Scratch, 36x36)", use_wandb=wandb_available
)
modified_results['Mod3_scratch'] = {'accuracy': acc_mod3_scratch, 'loss': loss_mod3_scratch, 'f1': f1_mod3_scratch}

if wandb_available:
    wandb.finish()

print("\nModification 3: Kernel size 3, Stride 1 (Pretrained)")
print("-" * 60)

model_mod3_pretrained = ResNet18_Mod3(num_classes, pretrained=True)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_mod3_pretrained.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

if wandb_available:
    wandb.init(project="CV-Assignment-2", name="ResNet18_Mod3_pretrained_36x36",
               config={'model': 'ResNet18_Mod3', 'image_size': 36, 'pretrained': True})

model_mod3_pretrained, hist_mod3_pretrained, acc_mod3_pretrained, loss_mod3_pretrained, f1_mod3_pretrained, \
    _, _, time_mod3_pretrained = train_model(
    model_mod3_pretrained, train_loader_36, test_loader_36, criterion, optimizer, scheduler,
    num_epochs=50, device=device, model_name="ResNet18_Mod3 (Pretrained, 36x36)", use_wandb=wandb_available
)
modified_results['Mod3_pretrained'] = {'accuracy': acc_mod3_pretrained, 'loss': loss_mod3_pretrained, 'f1': f1_mod3_pretrained}

if wandb_available:
    wandb.finish()

### Section 1.4: Comprehensive Comparison and Analysis

In [None]:
print("\n" + "="*80)
print("COMPREHENSIVE COMPARISON OF ALL MODELS")
print("="*80)

all_results = {
    'Model': [
        'ResNet18 Scratch 36x36',
        'ResNet18 Pretrained 36x36',
        'ResNet18 Scratch 224x224',
        'ResNet18 Pretrained 224x224',
        'Mod1 (K5S1) Scratch 36x36',
        'Mod1 (K5S1) Pretrained 36x36',
        'Mod2 (NoMaxPool) Scratch 36x36',
        'Mod2 (NoMaxPool) Pretrained 36x36',
        'Mod3 (K3S1) Scratch 36x36',
        'Mod3 (K3S1) Pretrained 36x36'
    ],
    'Accuracy': [
        acc_scratch_36, acc_pretrained_36, acc_scratch_224, acc_pretrained_224,
        acc_mod1_scratch, acc_mod1_pretrained,
        acc_mod2_scratch, acc_mod2_pretrained,
        acc_mod3_scratch, acc_mod3_pretrained
    ],
    'Loss': [
        loss_scratch_36, loss_pretrained_36, loss_scratch_224, loss_pretrained_224,
        loss_mod1_scratch, loss_mod1_pretrained,
        loss_mod2_scratch, loss_mod2_pretrained,
        loss_mod3_scratch, loss_mod3_pretrained
    ],
    'F1 Score': [
        f1_scratch_36, f1_pretrained_36, f1_scratch_224, f1_pretrained_224,
        f1_mod1_scratch, f1_mod1_pretrained,
        f1_mod2_scratch, f1_mod2_pretrained,
        f1_mod3_scratch, f1_mod3_pretrained
    ]
}

comparison_df = pd.DataFrame(all_results)
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
print("\n", comparison_df.to_string(index=False))

print("\n" + "="*80)
print("TOP 3 BEST PERFORMING MODELS")
print("="*80)
for i, row in comparison_df.head(3).iterrows():
    print(f"{row['Model']:40} | Accuracy: {row['Accuracy']:.4f} | F1: {row['F1 Score']:.4f}")

In [None]:
# Visualization of comparisons
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Accuracy comparison
ax = axes[0, 0]
model_names = comparison_df['Model'].str[:30]  # Truncate for readability
accuracies = comparison_df['Accuracy'].values
colors = ['green' if p else 'blue' for p in comparison_df['Model'].str.contains('Pretrained')]
ax.barh(model_names, accuracies, color=colors, alpha=0.7)
ax.set_xlabel('Accuracy', fontsize=12)
ax.set_title('Model Accuracy Comparison', fontsize=14, fontweight='bold')
ax.set_xlim([0, 1])
for i, v in enumerate(accuracies):
    ax.text(v, i, f' {v:.3f}', va='center', fontweight='bold')
ax.legend(['Pretrained', 'From Scratch'], loc='lower right')

# Plot 2: Loss comparison
ax = axes[0, 1]
losses = comparison_df['Loss'].values
ax.barh(model_names, losses, color=colors, alpha=0.7)
ax.set_xlabel('Loss', fontsize=12)
ax.set_title('Model Loss Comparison (Lower is Better)', fontsize=14, fontweight='bold')
for i, v in enumerate(losses):
    ax.text(v, i, f' {v:.3f}', va='center', fontweight='bold')

# Plot 3: F1 Score comparison
ax = axes[1, 0]
f1_scores = comparison_df['F1 Score'].values
ax.barh(model_names, f1_scores, color=colors, alpha=0.7)
ax.set_xlabel('F1 Score', fontsize=12)
ax.set_title('Model F1 Score Comparison', fontsize=14, fontweight='bold')
ax.set_xlim([0, 1])
for i, v in enumerate(f1_scores):
    ax.text(v, i, f' {v:.3f}', va='center', fontweight='bold')

# Plot 4: Training curves overlay for key models
ax = axes[1, 1]
epochs = range(1, len(hist_scratch_36['test_acc']) + 1)
ax.plot(epochs, hist_scratch_36['test_acc'], label='Scratch 36x36', linewidth=2)
ax.plot(epochs, hist_pretrained_36['test_acc'], label='Pretrained 36x36', linewidth=2)
ax.plot(epochs, hist_scratch_224['test_acc'], label='Scratch 224x224', linewidth=2)
ax.plot(epochs, hist_pretrained_224['test_acc'], label='Pretrained 224x224', linewidth=2)
ax.set_xlabel('Epoch', fontsize=12)
ax.set_ylabel('Test Accuracy', fontsize=12)
ax.set_title('Training Curves - Test Accuracy Over Epochs', fontsize=14, fontweight='bold')
ax.legend(loc='lower right')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('part1_model_comparison.png', dpi=150, bbox_inches='tight')
print("\nComparison plot saved as 'part1_model_comparison.png'")
plt.show()

In [None]:
# Confusion matrices for top 3 models
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

top_3_models = comparison_df.head(3)
model_configs = [
    (comparison_df.iloc[0], preds_scratch_224, labels_scratch_224),  # Best model
    (comparison_df.iloc[1], preds_pretrained_224, labels_pretrained_224),  # 2nd best
    (comparison_df.iloc[2], preds_mod1_pretrained, labels_mod1_pretrained if 'labels_mod1_pretrained' in dir() else preds_pretrained_36)  # 3rd best
]

for idx, ax in enumerate(axes):
    if idx < len(model_configs):
        model_row, preds, true_labels = model_configs[idx]
        cm = confusion_matrix(true_labels, preds)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax, cbar=False)
        ax.set_title(f"{model_row['Model'][:25]}\nAccuracy: {model_row['Accuracy']:.4f}", fontweight='bold')
        ax.set_ylabel('True Label')
        ax.set_xlabel('Predicted Label')

plt.tight_layout()
plt.savefig('confusion_matrices_top3.png', dpi=150, bbox_inches='tight')
print("Confusion matrices saved as 'confusion_matrices_top3.png'")
plt.show()

In [None]:
# Analysis summary
print("\n" + "="*80)
print("KEY FINDINGS AND ANALYSIS")
print("="*80)

print("\n1. IMPACT OF IMAGE SIZE (36x36 vs 224x224):")
print("-" * 60)
print(f"   Accuracy gain (from scratch): {(acc_scratch_224 - acc_scratch_36):.4f}")
print(f"   Accuracy gain (pretrained): {(acc_pretrained_224 - acc_pretrained_36):.4f}")
print(f"   Reason: 224x224 preserves more spatial information")
print(f"   - 36x36 → 9x9 after initial conv+maxpool (spatial bottleneck)")
print(f"   - 224x224 → 56x56 after initial conv+maxpool (better feature extraction)")

print("\n2. PRETRAINED vs FROM-SCRATCH:")
print("-" * 60)
print(f"   36x36 accuracy gain: {(acc_pretrained_36 - acc_scratch_36):.4f}")
print(f"   224x224 accuracy gain: {(acc_pretrained_224 - acc_scratch_224):.4f}")
print(f"   Conclusion: Transfer learning helps more on small images")
print(f"   This is because pretrained weights capture low-level features useful on any image dataset")

print("\n3. ARCHITECTURAL MODIFICATIONS:")
print("-" * 60)
print(f"   Baseline (36x36 scratch): {acc_scratch_36:.4f}")
print(f"   Mod1 (K5S1, scratch): {acc_mod1_scratch:.4f} | Improvement: {(acc_mod1_scratch - acc_scratch_36):.4f}")
print(f"   Mod2 (NoMaxPool, scratch): {acc_mod2_scratch:.4f} | Improvement: {(acc_mod2_scratch - acc_scratch_36):.4f}")
print(f"   Mod3 (K3S1, scratch): {acc_mod3_scratch:.4f} | Improvement: {(acc_mod3_scratch - acc_scratch_36):.4f}")
print(f"   Best modification: Reducing initial stride preserves spatial dimensions")

print("\n4. MIXED WEIGHT INITIALIZATION (Pretrained first layer + Pretrained rest):")
print("-" * 60)
print(f"   Mod1 pretrained (modified first layer): {acc_mod1_pretrained:.4f}")
print(f"   Baseline pretrained: {acc_pretrained_36:.4f}")
print(f"   Impact: {'Positive' if acc_mod1_pretrained > acc_pretrained_36 else 'Negative'}")
print(f"   The model can handle different weight distributions in different layers.")
print(f"   The optimizer can adapt during training. It's generally not a problem.")

print("\n5. WHY DO DIFFERENCES ARISE?")
print("-" * 60)
print("   a) Receptive Field: Larger images allow more context for CNNs")
print("   b) Spatial Resolution: 36x36 → 1x1 in last layer (total stride ~36)")
print("   c) Transfer Learning: ImageNet weights capture useful feature patterns")
print("   d) Initial Layer Design: K=7, S=2 is optimized for 224x224, not 36x36")
print(f"      With 36x36: 7x7 kernel covers ~20% of image (too coarse)")
print(f"      With 224x224: 7x7 kernel covers ~3% of image (just right)")