Q1) Implement L2 regularization on cat-dog classification neural network. Train the model on the dataset, and observe the impact of the regularization on the weight parameters. (Do not use data augmentation).
a. L2 regularization using optimizer’s weight decay
b. L2 regularization using loop to find L2 norm of weights

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Define transformations (No Data Augmentation)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Load dataset (Change path accordingly)
train_dataset = datasets.ImageFolder(root="/home/mustafa/dllab/w7prac/cats_and_dogs_filtered/train", transform=transform)
test_dataset = datasets.ImageFolder(root="/home/mustafa/dllab/w7prac/cats_and_dogs_filtered/validation", transform=transform)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Check number of classes
num_classes = len(train_dataset.classes)
print(f"Classes: {train_dataset.classes}, Number: {num_classes}")


Classes: ['cats', 'dogs'], Number: 2


In [3]:
class CatDogCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(CatDogCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)  
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Instantiate model
model = CatDogCNN(num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


CatDogCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=32768, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=2, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
)

In [4]:
# Define loss and optimizer (L2 regularization using weight_decay)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # L2 Regularization

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")


Epoch [1/10], Loss: 0.6976
Epoch [2/10], Loss: 0.6924
Epoch [3/10], Loss: 0.6836
Epoch [4/10], Loss: 0.6557
Epoch [5/10], Loss: 0.6162
Epoch [6/10], Loss: 0.5897
Epoch [7/10], Loss: 0.5386
Epoch [8/10], Loss: 0.5275
Epoch [9/10], Loss: 0.4832
Epoch [10/10], Loss: 0.4551


In [5]:
# Define optimizer without weight_decay
optimizer = optim.Adam(model.parameters(), lr=0.001)

lambda_l2 = 1e-4  # L2 Regularization factor

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Compute L2 norm manually
        l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())
        loss += lambda_l2 * l2_norm

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")


Epoch [1/10], Loss: 0.4429
Epoch [2/10], Loss: 0.4066
Epoch [3/10], Loss: 0.3618
Epoch [4/10], Loss: 0.3428
Epoch [5/10], Loss: 0.3034
Epoch [6/10], Loss: 0.2703
Epoch [7/10], Loss: 0.2530
Epoch [8/10], Loss: 0.2008
Epoch [9/10], Loss: 0.1955
Epoch [10/10], Loss: 0.1623


In [6]:
def compute_weight_norms(model):
    norms = {name: param.norm().item() for name, param in model.named_parameters() if param.requires_grad}
    return norms

print("Weight Norms with weight_decay:", compute_weight_norms(model))


Weight Norms with weight_decay: {'conv1.weight': 3.1693837642669678, 'conv1.bias': 0.5760012269020081, 'conv2.weight': 5.506408214569092, 'conv2.bias': 0.30440694093704224, 'conv3.weight': 4.9195122718811035, 'conv3.bias': 0.25522398948669434, 'fc1.weight': 21.993850708007812, 'fc1.bias': 0.22109541296958923, 'fc2.weight': 1.2369147539138794, 'fc2.bias': 0.03193413093686104}


Q2) 2. Implement L1 regularization on cat-dog classification neural network. Train the model on the dataset, and observe the impact of the regularization on the weight parameters. (Do not use data augmentation).
a. L1 regularization using optimizer’s weight decay
b. L1regularization using loop to find L1 norm of weights

In [None]:
# Define optimizer (without weight_decay)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define loss function
criterion = nn.CrossEntropyLoss()

# L1 Regularization parameter
lambda_l1 = 1e-4  # Adjust this value to control regularization strength

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Compute L1 norm manually
        l1_norm = sum(p.abs().sum() for p in model.parameters())
        loss += lambda_l1 * l1_norm  # Add L1 regularization term

        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")


Q3)  Implement dropout regularization on cat-dog classification neural network. Train the model with and without dropout on a dataset, and compare the performance and overfitting tendencies. 

In [None]:
class CatDogCNN(nn.Module):
    def __init__(self, num_classes=2, use_dropout=False):
        super(CatDogCNN, self).__init__()
        self.use_dropout = use_dropout
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)  # Dropout with 50% probability
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)  
        x = self.relu(self.fc1(x))
        if self.use_dropout:
            x = self.dropout(x)  # Apply dropout only if enabled
        x = self.fc2(x)
        return x


Q4) Implement your own version of the dropout layer by using Bernoulli distribution and compare
the performance with the library.

In [None]:


class CustomDropout(nn.Module):
    def __init__(self, p=0.5):
        super(CustomDropout, self).__init__()
        self.p = p

    def forward(self, x):
        if not self.training:  # No dropout in evaluation mode
            return x
        # Generate Bernoulli mask (same shape as x)
        mask = torch.bernoulli(torch.full_like(x, self.p)) / self.p
        return x * mask  # Apply dropout


Q5) Implement early stopping as a form of regularization. Train a neural network and monitor the validation loss. Stop training when the validation loss starts increasing, and compare the performance with a model trained without early stopping.

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Define transformations (NO data augmentation)
transform = transforms.Compose([
    transforms.Resize((128, 128)),  
    transforms.ToTensor()
])

# Load dataset
train_dataset = datasets.ImageFolder(root="/home/mustafa/dllab/w7prac/cats_and_dogs_filtered/train", transform=transform)
val_dataset = datasets.ImageFolder(root="/home/mustafa/dllab/w7prac/cats_and_dogs_filtered/validation", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Define a simple CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(32 * 64 * 64, 2)  # Assuming image size is 128x128

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = torch.flatten(x, start_dim=1)
        x = self.fc1(x)
        return x

# Instantiate model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Early stopping parameters
patience = 5  # Stop if no improvement for 5 epochs
best_val_loss = float("inf")
epochs_no_improve = 0
best_model_weights = None

# Training loop with early stopping
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validation phase
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    
    print(f"Epoch {epoch+1}: Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

    # Early stopping check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        epochs_no_improve = 0
        best_model_weights = model.state_dict()  # Save best model weights
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs!")
            model.load_state_dict(best_model_weights)  # Restore best model
            break


Epoch 1: Train Loss: 2.5658, Val Loss: 0.6800
Epoch 2: Train Loss: 0.6118, Val Loss: 0.6224
Epoch 3: Train Loss: 0.5235, Val Loss: 0.6462
Epoch 4: Train Loss: 0.4373, Val Loss: 0.6012
Epoch 5: Train Loss: 0.3630, Val Loss: 0.6134
Epoch 6: Train Loss: 0.2964, Val Loss: 0.6511
Epoch 7: Train Loss: 0.2354, Val Loss: 0.6603
Epoch 8: Train Loss: 0.1859, Val Loss: 0.7334
Epoch 9: Train Loss: 0.1487, Val Loss: 0.7196
Early stopping triggered after 9 epochs!
