Implement L2 regularization on cat-dog classification neural network. Train the model on the dataset, and observe the impact of the regularization on the weight parameters. (Do not use data augmentation).

a. L2 regularization using optimizer’s weight decay

In [1]:
import warnings

warnings.filterwarnings("ignore")

In [2]:
import PIL.Image as Image
import pandas as pd
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision.models import AlexNet_Weights
from torchvision import transforms as T

In [3]:
EPOCHS = 10
BATCH_SIZE_TRAIN = 16
BATCH_SIZE_TEST = 100
LR = 0.001
WEIGHT_DECAY = 0.001
LOG_INTERVAL = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TRANSFORM = T.Compose([
    T.ToTensor(),
    T.Resize([224, 224]),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
CLASSES = {'dogs': 0, 'cats': 1}

In [4]:
def get_df(path, classes=['dogs', 'cats']):
    paths = pd.DataFrame({'class': [], 'path': []})
    for c in classes:
        df = pd.DataFrame({
            'class': c,
            'path': glob.glob(path + c + '/*')
        })

        paths = pd.concat([paths, df])

    paths.reset_index(inplace=False)

    return paths


class CatDogDataset(Dataset):
    def __init__(self, df, classes, transform=None):
        self.paths = df
        self.classes = classes
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        row = self.paths.iloc[idx]
        img = Image.open(row['path'])
        if self.transform is not None:
            return self.transform(img), self.classes[row['class']]
        else:
            return img, self.classes[row['class']]


def get_model():
    model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', weights=AlexNet_Weights.DEFAULT)
    model.features.requires_grad = False
    model.classifier = nn.Sequential(
        *model.classifier[:-1],
        nn.Linear(4096, 2, bias=True)
    )
    return model

In [5]:
def train(model, train_loader, criterion, optimizer, device="cpu"):
    model.train()
    running_loss = 0.0
    correct = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * len(data)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).sum()
    running_loss /= len(train_loader.dataset)
    acc = 100. * correct / len(train_loader.dataset)
    return acc, running_loss

def test(model, test_loader, criterion, device="cpu"):
    model.eval()
    running_loss = 0.0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            running_loss += criterion(output, target).item() * len(data)
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
    running_loss /= len(test_loader.dataset)
    acc = 100. * correct / len(test_loader.dataset)
    return acc, running_loss

In [6]:
train_dataset = CatDogDataset(get_df('./data/cats_and_dogs_filtered/train/'), CLASSES, TRANSFORM)
test_dataset = CatDogDataset(get_df('./data/cats_and_dogs_filtered/validation/'), CLASSES, TRANSFORM)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE_TRAIN, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE_TEST, shuffle=True)

In [7]:
modela = get_model()

modela = modela.to(DEVICE)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [8]:
print(modela)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [9]:
criterion = nn.CrossEntropyLoss()
optimizera = optim.SGD(modela.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)

In [10]:
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch: {epoch}")

    print("\tTraining: ")
    train_acc, train_loss = train(modela, train_loader, criterion, optimizera, DEVICE)
    print("\t\tAccuracy: {:.4}%".format(train_acc))
    print("\t\tLoss: {:.4}".format(train_loss))

    print("\tValidation: ")
    test_acc, test_loss = test(modela, test_loader, criterion, DEVICE)
    print("\t\tAccuracy: {:.4}%".format(test_acc))
    print("\t\tLoss: {:.4}".format(test_loss))

Epoch: 1
	Training: 
		Accuracy: 91.1%
		Loss: 0.2129
	Validation: 
		Accuracy: 94.9%
		Loss: 0.1248
Epoch: 2
	Training: 
		Accuracy: 95.4%
		Loss: 0.1177
	Validation: 
		Accuracy: 95.2%
		Loss: 0.1008
Epoch: 3
	Training: 
		Accuracy: 96.85%
		Loss: 0.0925
	Validation: 
		Accuracy: 96.4%
		Loss: 0.0888
Epoch: 4
	Training: 
		Accuracy: 97.05%
		Loss: 0.07402
	Validation: 
		Accuracy: 96.3%
		Loss: 0.08454
Epoch: 5
	Training: 
		Accuracy: 98.2%
		Loss: 0.05555
	Validation: 
		Accuracy: 96.5%
		Loss: 0.08318
Epoch: 6
	Training: 
		Accuracy: 98.55%
		Loss: 0.04512
	Validation: 
		Accuracy: 96.1%
		Loss: 0.08872
Epoch: 7
	Training: 
		Accuracy: 98.3%
		Loss: 0.0422
	Validation: 
		Accuracy: 96.1%
		Loss: 0.08744
Epoch: 8
	Training: 
		Accuracy: 98.85%
		Loss: 0.03598
	Validation: 
		Accuracy: 96.3%
		Loss: 0.0863
Epoch: 9
	Training: 
		Accuracy: 99.3%
		Loss: 0.03099
	Validation: 
		Accuracy: 96.4%
		Loss: 0.08352
Epoch: 10
	Training: 
		Accuracy: 98.95%
		Loss: 0.03094
	Validation: 
		Accu

b. L2 regularization using loop to find L2 norm of weights

In [11]:
REGULARIZATION = 2

In [12]:
def train_with_regularization(model, train_loader, criterion, optimizer, regularization=2, lambda_val=0.001, device="cpu"):
    model.train()
    running_loss = 0.0
    correct = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        running_loss += loss.item() * len(data)
        norm = sum(torch.norm(p, regularization) for p in model.parameters())
        loss = loss + lambda_val * norm
        loss.backward()
        optimizer.step()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).sum()
    running_loss /= len(train_loader.dataset)
    acc = 100. * correct / len(train_loader.dataset)
    return acc, running_loss

In [13]:
modelb = get_model()

modelb = modelb.to(DEVICE)

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [14]:
print(modelb)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [15]:
optimizerb = optim.SGD(modelb.parameters(), lr=LR)

In [16]:
for epoch in range(1, EPOCHS + 1):
    print(f"Epoch: {epoch}")

    print("\tTraining: ")
    train_acc, train_loss = train_with_regularization(modelb, train_loader, criterion, optimizerb, REGULARIZATION, WEIGHT_DECAY, DEVICE)
    print("\t\tAccuracy: {:.4}%".format(train_acc))
    print("\t\tLoss: {:.4}".format(train_loss))

    print("\tValidation: ")
    test_acc, test_loss = test(modelb, test_loader, criterion, DEVICE)
    print("\t\tAccuracy: {:.4}%".format(test_acc))
    print("\t\tLoss: {:.4}".format(test_loss))

Epoch: 1
	Training: 
		Accuracy: 90.85%
		Loss: 0.2171
	Validation: 
		Accuracy: 94.1%
		Loss: 0.1377
Epoch: 2
	Training: 
		Accuracy: 95.95%
		Loss: 0.1113
	Validation: 
		Accuracy: 95.5%
		Loss: 0.1019
Epoch: 3
	Training: 
		Accuracy: 96.55%
		Loss: 0.0862
	Validation: 
		Accuracy: 95.3%
		Loss: 0.1141
Epoch: 4
	Training: 
		Accuracy: 97.45%
		Loss: 0.07516
	Validation: 
		Accuracy: 95.4%
		Loss: 0.09334
Epoch: 5
	Training: 
		Accuracy: 97.85%
		Loss: 0.05586
	Validation: 
		Accuracy: 96.3%
		Loss: 0.0867
Epoch: 6
	Training: 
		Accuracy: 98.25%
		Loss: 0.05063
	Validation: 
		Accuracy: 96.1%
		Loss: 0.0875
Epoch: 7
	Training: 
		Accuracy: 98.85%
		Loss: 0.0378
	Validation: 
		Accuracy: 96.5%
		Loss: 0.08001
Epoch: 8
	Training: 
		Accuracy: 98.85%
		Loss: 0.03285
	Validation: 
		Accuracy: 96.1%
		Loss: 0.09089
Epoch: 9
	Training: 
		Accuracy: 99.25%
		Loss: 0.02813
	Validation: 
		Accuracy: 96.6%
		Loss: 0.08209
Epoch: 10
	Training: 
		Accuracy: 99.05%
		Loss: 0.02852
	Validation: 
		