<a href="https://colab.research.google.com/github/rubymanderna/ML_ECGR5105/blob/main/Assignment_7/Assignment_7_2_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Problem 2 (50pts)
a. Build a ResNet-based Convolutional Neural Network, like what we built in lectures (with skip connections), to classify the images across all 10 classes in CIFAR 10. For this problem, let's use 10 blocks for ResNet and call it ResNet-10. Use similar dimensions and channels as we need in lectures.
Train your network for 300 epochs. Report your training time, training loss, and evaluation accuracy after 300 epochs. Analyze your results in your report and compare them against problem 1.b on training time, achieved accuracy, and model size.
Make sure to submit your code by providing the GitHub URL of your course repository for this course.

. Perform three additional training and evaluations for your ResNet-10 to assess the impacts of regularization on your ResNet-10.
* ﻿﻿Weight Decay with lambda of 0.001
* ﻿﻿Dropout with p=0.3
* ﻿﻿Batch Normalization
Report and compare your training time, training loss, and evaluation accuracy after 300 epochs across these three different pieces of training.
Analyze your results in your report and compare them against problem 1. On training time, you achieved accuracy.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Define the Residual Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.downsample(identity)
        out = self.relu(out)
        return out

# Define the ResNet-10 architecture
class ResNet10(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet10, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(16, 16, num_blocks=2, stride=1)
        self.layer2 = self.make_layer(16, 32, num_blocks=2, stride=2)
        self.layer3 = self.make_layer(32, 64, num_blocks=2, stride=2)
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def make_layer(self, in_channels, out_channels, num_blocks, stride):
        layers = []
        layers.append(ResidualBlock(in_channels, out_channels, stride))
        for _ in range(1, num_blocks):
            layers.append(ResidualBlock(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Set the device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)


# Function to train and evaluate the model with early stopping
def train_and_evaluate(model, criterion, optimizer, num_epochs=3, regularization_name="None", early_stop_threshold=5):
    print(f"Training ResNet-10 with {regularization_name} regularization:")

    best_val_loss = float('inf')
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader)}")

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item()

        avg_val_loss = val_loss / len(test_loader)
        print(f"Validation Loss: {avg_val_loss}")

        # Check for early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if epochs_no_improve == early_stop_threshold:
            print(f"Early stopping at epoch {epoch + 1} as validation loss did not improve for {early_stop_threshold} epochs.")
            break

    # Evaluation (unchanged)
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Training Time: {num_epochs} epochs, Accuracy: {accuracy}\n")

# Initialize the ResNet-10 model, loss function, and optimizer for each scenario
model_no_regularization = ResNet10(num_classes=10).to(device)
model_weight_decay = ResNet10(num_classes=10).to(device)
model_dropout = ResNet10(num_classes=10).to(device)
model_batch_norm = ResNet10(num_classes=10).to(device)

criterion = nn.CrossEntropyLoss()

# Training loop for ResNet-10 without regularization with early stopping
optimizer_no_regularization = optim.Adam(model_no_regularization.parameters(), lr=0.001, weight_decay=0)
train_and_evaluate(model_no_regularization, criterion, optimizer_no_regularization, num_epochs=300, regularization_name="No Regularization", early_stop_threshold=5)

# Training loop for ResNet-10 without regularization for 100 epochs
optimizer_no_regularization = optim.Adam(model_no_regularization.parameters(), lr=0.001, weight_decay=0)
train_and_evaluate(model_no_regularization, criterion, optimizer_no_regularization, num_epochs=300, regularization_name="No Regularization")

# Training loop for ResNet-10 with Weight Decay (L2 regularization) with early stopping
optimizer_weight_decay = optim.Adam(model_weight_decay.parameters(), lr=0.001, weight_decay=0.001)
train_and_evaluate(model_weight_decay, criterion, optimizer_weight_decay, num_epochs=300, regularization_name="Weight Decay", early_stop_threshold=5)

# Training loop for ResNet-10 with Dropout with early stopping
model_dropout.fc = nn.Sequential(nn.Dropout(0.3), nn.Linear(64, 10))  # Add dropout to the fully connected layer
optimizer_dropout = optim.Adam(model_dropout.parameters(), lr=0.001)
train_and_evaluate(model_dropout, criterion, optimizer_dropout, num_epochs=300, regularization_name="Dropout", early_stop_threshold=5)

# Training loop for ResNet-10 with Batch Normalization with early stopping
optimizer_batch_norm = optim.Adam(model_batch_norm.parameters(), lr=0.001)
train_and_evaluate(model_batch_norm, criterion, optimizer_batch_norm, num_epochs=300, regularization_name="Batch Normalization", early_stop_threshold=5)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 77011744.64it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Training ResNet-10 with No Regularization regularization:




Epoch 1/300, Loss: 1.2863207671343517
Validation Loss: 1.125054174547742
Epoch 2/300, Loss: 0.9100795724355352
Validation Loss: 0.9267733343847239
Epoch 3/300, Loss: 0.7493533038193613
Validation Loss: 0.815650786563849
Epoch 4/300, Loss: 0.6432233471852129
Validation Loss: 0.6873523572068305
Epoch 5/300, Loss: 0.5686703488573699
Validation Loss: 0.6592048192100161
Epoch 6/300, Loss: 0.5094161417211414
Validation Loss: 0.617073956378706
Epoch 7/300, Loss: 0.4620920832047377
Validation Loss: 0.6377087274364605
Epoch 8/300, Loss: 0.4161109011953749
Validation Loss: 0.7349901172765501
Epoch 9/300, Loss: 0.3782496522835758
Validation Loss: 0.689163667001542
Epoch 10/300, Loss: 0.3477563195197326
Validation Loss: 0.6302216660444904
Epoch 11/300, Loss: 0.3103035500516062
Validation Loss: 0.6360743996823669
Early stopping at epoch 11 as validation loss did not improve for 5 epochs.
Training Time: 300 epochs, Accuracy: 0.7985

Training ResNet-10 with No Regularization regularization:
Epoch 1/3