Lab 7 - Regularization

Q1

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets

transform=transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229,0.224,0.225])
])

train_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/train", transform=transform)
test_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/validation", transform=transform)

train_loader=data.DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader=data.DataLoader(test_dataset,batch_size=32,shuffle=False)

class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.net=nn.Sequential(
            nn.Conv2d(3,16,3,1),
            nn.Conv2d(16,32,3,1),
            nn.Flatten(),
            nn.Linear(124*124*32,512),
            nn.Linear(512,2)
        )

    def forward(self,x):
        return self.net(x)


def train_weight_decay(model,train_loader,criterion,optimizer,epochs,loss_list):
    for epoch in range(epochs):
        model.train()
        epoch_loss=0.0

        for images,labels in train_loader:
            optimizer.zero_grad()

            outputs=model(images)
            loss=criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss+=loss.item()

        loss_list.append(epoch_loss/len(train_loader))
        print(f"epoch {epoch+1}/{epochs}, loss = {loss_list[-1]:.4f}")

def train_manual(model, train_loader, criterion, optimizer, epochs, loss_list, lambda_l2=0.01):
    for epoch in range(epochs):
        model.train()
        epoch_loss=0.0

        for images, labels in train_loader:
            optimizer.zero_grad()

            outputs=model(images)
            loss=criterion(outputs, labels)

            l2_penalty=0
            for param in model.parameters():
                l2_penalty+=torch.sum(param**2)

            total_loss=loss+lambda_l2*l2_penalty
            total_loss.backward()
            optimizer.step()

            epoch_loss+=total_loss.item()

        loss_list.append(epoch_loss / len(train_loader))
        print(f"epoch {epoch + 1}/{epochs}, loss with l2 penalty = {loss_list[-1]:.4f}")

model=CNN()
criterion=nn.CrossEntropyLoss()

optimizer_weight_decay=optim.SGD(model.parameters(),lr=0.01,weight_decay=0.001)
optimizer_no_weight_decay=optim.SGD(model.parameters(),lr=0.01)
epochs=10
loss_wd=[]
loss_manual=[]

print("training model with optimizer weight decay...")
train_weight_decay(model,train_loader,criterion,optimizer_weight_decay,epochs,loss_wd)

print("training model with manual L2 regularization...")
train_manual(model,train_loader,criterion,optimizer_weight_decay,epochs,loss_manual)

training model with optimizer weight decay...


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


epoch 1/10, loss = 0.7828
epoch 2/10, loss = 0.6043
epoch 3/10, loss = 0.5477
epoch 4/10, loss = 0.4663
epoch 5/10, loss = 0.3704
epoch 6/10, loss = 0.2686
epoch 7/10, loss = 0.1975
epoch 8/10, loss = 0.1201
epoch 9/10, loss = 0.0691
epoch 10/10, loss = 0.0379
training model with manual L2 regularization...
epoch 1/10, loss with l2 penalty = 1.8940
epoch 2/10, loss with l2 penalty = 1.8402
epoch 3/10, loss with l2 penalty = 1.7920
epoch 4/10, loss with l2 penalty = 1.7462
epoch 5/10, loss with l2 penalty = 1.7018
epoch 6/10, loss with l2 penalty = 1.6587
epoch 7/10, loss with l2 penalty = 1.6170
epoch 8/10, loss with l2 penalty = 1.5765
epoch 9/10, loss with l2 penalty = 1.5369
epoch 10/10, loss with l2 penalty = 1.4985


Q2

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets

transform=transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229,0.224,0.225])
])

train_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/train", transform=transform)
test_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/validation", transform=transform)

train_loader=data.DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader=data.DataLoader(test_dataset,batch_size=32,shuffle=False)

class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.net=nn.Sequential(
            nn.Conv2d(3,16,3,1),
            nn.Conv2d(16,32,3,1),
            nn.Flatten(),
            nn.Linear(124*124*32,512),
            nn.Linear(512,2)
        )

    def forward(self,x):
        return self.net(x)

def train_manual(model, train_loader, criterion, optimizer, epochs, loss_list, lambda_l1=0.01):
    for epoch in range(epochs):
        model.train()
        epoch_loss=0.0

        total_correct=0
        total_samples=0

        for images, labels in train_loader:
            optimizer.zero_grad()

            outputs=model(images)
            loss=criterion(outputs, labels)

            l1_penalty=0
            for param in model.parameters():
                l1_penalty+=torch.sum(torch.abs(param))

            total_loss=loss+lambda_l1*l1_penalty
            total_loss.backward()
            optimizer.step()

            epoch_loss+=total_loss.item()

            _,predicted=torch.max(outputs,1)
            total_samples += labels.size(0)
            total_correct+=(predicted==labels).sum().item()

        accuracy=100*total_correct/total_samples
        loss_list.append(epoch_loss / len(train_loader))
        print(f"epoch {epoch + 1}/{epochs}, loss with L1 penalty = {loss_list[-1]:.4f}, accuracy = {accuracy:.2f}%")

model=CNN()
criterion=nn.CrossEntropyLoss()

optimizer_weight_decay=optim.SGD(model.parameters(),lr=0.01,weight_decay=0.001)
optimizer_no_weight_decay=optim.SGD(model.parameters(),lr=0.01)
epochs=2
loss_manual=[]

print("training model with manual L1 regularization...")
train_manual(model,train_loader,criterion,optimizer_weight_decay,epochs,loss_manual)

training model with manual L1 regularization...
epoch 1/2, loss with L1 penalty = 264.0457, accuracy = 51.50%
epoch 2/2, loss with L1 penalty = 128.6703, accuracy = 52.50%


Q3

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms

train_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/train", transform=transform)
test_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/validation", transform=transform)

train_loader=data.DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader=data.DataLoader(test_dataset,batch_size=32,shuffle=False)

train_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/train", transform=transform)
test_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/validation", transform=transform)

train_loader=data.DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader=data.DataLoader(test_dataset,batch_size=32,shuffle=False)

class CNN(nn.Module):
    def __init__(self,with_dropout=True):
        super(CNN,self).__init__()
        if with_dropout:
            self.net=nn.Sequential(
                nn.Conv2d(3,16,3,1),
                nn.Conv2d(16,32,3,1),
                nn.Flatten(),
                nn.Linear(124*124*32,512),
                nn.Dropout(0.5),
                nn.Linear(512,2)
            )
        else:
            self.net = nn.Sequential(
                nn.Conv2d(3, 16, 3, 1),
                nn.Conv2d(16, 32, 3, 1),
                nn.Flatten(),
                nn.Linear(124 * 124 * 32, 512),
                nn.Linear(512, 2)
            )

    def forward(self,x):
        return self.net(x)

def epoch_train(model, train_loader, criterion, optimizer, epochs, loss_list, lambda_l1=0.01):
    for epoch in range(epochs):
        model.train()
        epoch_loss=0.0

        total_correct=0
        total_samples=0

        for images, labels in train_loader:
            optimizer.zero_grad()

            outputs=model(images)
            loss=criterion(outputs, labels)

            l1_penalty=0
            for param in model.parameters():
                l1_penalty+=torch.sum(torch.abs(param))

            loss.backward()
            optimizer.step()

            epoch_loss+=loss.item()

            _,predicted=torch.max(outputs,1)
            total_samples += labels.size(0)
            total_correct+=(predicted==labels).sum().item()

        accuracy=100*total_correct/total_samples
        loss_list.append(epoch_loss / len(train_loader))
        print(f"epoch {epoch + 1}/{epochs}, loss = {loss_list[-1]:.4f}, accuracy = {accuracy:.2f}%")

model_dropout=CNN(with_dropout=True)
model_no_dropout=CNN(with_dropout=False)
criterion=nn.CrossEntropyLoss()

optimizer=optim.SGD(model_dropout.parameters(),lr=0.01)

epochs=2
loss_manual=[]

print("training model with dropout regularization...")
epoch_train(model_dropout,train_loader,criterion,optimizer,epochs,loss_manual)

print("training model without dropout regularization...")
epoch_train(model_no_dropout,train_loader,criterion,optimizer,epochs,loss_manual)

training model with dropout regularization...
epoch 1/2, loss = 0.7985, accuracy = 55.20%
epoch 2/2, loss = 0.5857, accuracy = 70.90%
training model without dropout regularization...
epoch 1/2, loss = 0.6932, accuracy = 52.60%
epoch 2/2, loss = 0.6929, accuracy = 52.60%


Q4

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(root="./cats_and_dogs_filtered/train", transform=transform)
test_dataset = datasets.ImageFolder(root="./cats_and_dogs_filtered/validation", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

class CustomDropout(nn.Module):
    def __init__(self,rate):
        super(CustomDropout,self).__init__()
        self.rate=rate

    def forward(self,x):
        if self.training:
            mask=(torch.rand_like(x)<(1-self.rate)).float()
            x=x*mask/(1-self.rate)
        return x

class CustomCNN(nn.Module):
    def __init__(self):
        super(CustomCNN,self).__init__()
        self.net=nn.Sequential(
            nn.Conv2d(3,16,3,1),
            CustomDropout(0.5),
            nn.Conv2d(16,32,3,1),
            nn.Flatten(),
            nn.Linear(124*124*32,512),
            nn.Linear(512,2)
        )

    def forward(self,x):
        return self.net(x)

class BuiltInCNN(nn.Module):
    def __init__(self):
        super(BuiltInCNN,self).__init__()
        self.net=nn.Sequential(
            nn.Conv2d(3,16,3,1),
            nn.Dropout(0.5),
            nn.Conv2d(16,32,3,1),
            nn.Flatten(),
            nn.Linear(124*124*32,512),
            nn.Linear(512,2)
        )

    def forward(self,x):
        return self.net(x)


def train(model, train_loader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0.0
        total_correct = 0
        total_samples = 0

        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()

        accuracy = 100 * total_correct / total_samples
        print(f"epoch {epoch + 1}/{epochs}, loss: {epoch_loss / len(train_loader):.4f}, accuracy: {accuracy:.2f}%")


def test(model, test_loader):
    model.eval()
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()

    accuracy = 100 * total_correct / total_samples
    return accuracy

custom_model=CustomCNN()
builtin_model=BuiltInCNN()

criterion=nn.CrossEntropyLoss()
optimizer_custom = optim.SGD(custom_model.parameters(), lr=0.01)
optimizer_builtin = optim.SGD(builtin_model.parameters(), lr=0.01)

epochs = 5
print("training custom model with custom dropout...")
train(custom_model, train_loader, criterion, optimizer_custom, epochs)

print("training model with built-in dropout...")
train(builtin_model, train_loader, criterion, optimizer_builtin, epochs)

print("\ntesting custom model with custom dropout...")
custom_model_accuracy = test(custom_model, test_loader)
print(f"custom model accuracy: {custom_model_accuracy:.2f}%")

print("\ntesting model with built-in dropout...")
builtin_model_accuracy = test(builtin_model, test_loader)
print(f"built-in model accuracy: {builtin_model_accuracy:.2f}%")

training custom model with custom dropout...
epoch 1/5, loss: 0.8576, accuracy: 54.10%
epoch 2/5, loss: 0.6170, accuracy: 65.95%
epoch 3/5, loss: 0.5525, accuracy: 73.20%
epoch 4/5, loss: 0.4786, accuracy: 78.90%
epoch 5/5, loss: 0.3835, accuracy: 85.20%
training model with built-in dropout...
epoch 1/5, loss: 0.8435, accuracy: 56.70%
epoch 2/5, loss: 0.6048, accuracy: 67.30%
epoch 3/5, loss: 0.5326, accuracy: 74.65%
epoch 4/5, loss: 0.4619, accuracy: 80.00%
epoch 5/5, loss: 0.3630, accuracy: 85.15%

testing custom model with custom dropout...
custom model accuracy: 56.00%

testing model with built-in dropout...
built-in model accuracy: 55.30%


Q5

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets

transform=transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229,0.224,0.225])
])

train_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/train", transform=transform)
test_dataset=datasets.ImageFolder(root="./cats_and_dogs_filtered/validation", transform=transform)

train_loader=data.DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader=data.DataLoader(test_dataset,batch_size=32,shuffle=False)

class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.net=nn.Sequential(
            nn.Conv2d(3,16,3,1),
            nn.Conv2d(16,32,3,1),
            nn.Flatten(),
            nn.Linear(124*124*32,512),
            nn.Linear(512,2)
        )

    def forward(self,x):
        return self.net(x)

def train_es(model,train_loader, val_loader,criterion,optimizer,epochs, patience):
    best_val_loss=float("inf")
    static_epochs=0

    for epoch in range(epochs):
        model.train()
        epoch_loss=0.0

        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs=model(images)
            loss=criterion(outputs,labels)
            loss.backward()
            optimizer.step()

            epoch_loss+=loss.item()

        model.eval()
        val_loss=0.0
        total_correct=0
        total_samples=0

        with torch.no_grad():
            for images, labels in val_loader:
                outputs=model(images)
                loss=criterion(outputs,labels)
                val_loss+=loss.item()

                _, predicted = torch.max(outputs, 1)
                total_samples += labels.size(0)
                total_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        accuracy = 100 * total_correct / total_samples
        print(f"epoch {epoch + 1}/{epochs}, train loss: {epoch_loss / len(train_loader):.4f}, validation loss: {val_loss:.4f}, accuracy: {accuracy:.2f}%")

        if val_loss<best_val_loss:
            best_val_loss=val_loss
            static_epochs=0
        else:
            static_epochs+=1

        if static_epochs>=patience:
            print(f"early stopping at epoch {epoch+1}")
            break

def train_no_es(model, train_loader, criterion, optimizer, epochs):
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0

        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        # Validation phase
        model.eval()
        val_loss = 0.0
        total_correct = 0
        total_samples = 0
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total_samples += labels.size(0)
                total_correct += (predicted == labels).sum().item()

        val_loss /= len(test_loader)
        accuracy = 100 * total_correct / total_samples
        print(f"epoch {epoch+1}/{epochs}, train loss: {epoch_loss/len(train_loader):.4f}, validation loss: {val_loss:.4f}, accuracy: {accuracy:.2f}%")


model_es=CNN()
model_no_es=CNN()

criterion = nn.CrossEntropyLoss()
optimizer_with_early_stopping = optim.SGD(model_es.parameters(), lr=0.01)
optimizer_without_early_stopping = optim.SGD(model_no_es.parameters(), lr=0.01)

epochs = 10
patience = 3  # Number of epochs to wait before stopping if no improvement in validation loss

print("training model with early stopping...")
train_es(model_es, train_loader, test_loader, criterion, optimizer_with_early_stopping, epochs, patience)

print("\ntraining model without early stopping...")
train_no_es(model_no_es, train_loader, criterion, optimizer_without_early_stopping, epochs)


training model with early stopping...


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


epoch 1/10, train loss: 0.8051, validation loss: 0.6765, accuracy: 58.80%
epoch 2/10, train loss: 0.6031, validation loss: 0.6733, accuracy: 59.30%
epoch 3/10, train loss: 0.5330, validation loss: 0.7581, accuracy: 57.10%
epoch 4/10, train loss: 0.4364, validation loss: 0.7765, accuracy: 56.20%
epoch 5/10, train loss: 0.3299, validation loss: 0.9492, accuracy: 56.00%
early stopping at epoch 5

training model without early stopping...
epoch 1/10, train loss: 0.7753, validation loss: 0.6992, accuracy: 58.30%
epoch 2/10, train loss: 0.5901, validation loss: 0.6899, accuracy: 56.40%
epoch 3/10, train loss: 0.5062, validation loss: 0.7657, accuracy: 56.50%
epoch 4/10, train loss: 0.4204, validation loss: 0.8097, accuracy: 56.50%
epoch 5/10, train loss: 0.3272, validation loss: 1.0154, accuracy: 55.00%
epoch 6/10, train loss: 0.2245, validation loss: 1.2001, accuracy: 55.70%
epoch 7/10, train loss: 0.1460, validation loss: 1.3875, accuracy: 52.20%
epoch 8/10, train loss: 0.0934, validation l