# Lab 6 - Transfer Learning

### Q1

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

def MNIST_CNN():
    model = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(32),
        nn.MaxPool2d(2, 2),
        nn.Dropout(0.25),
        nn.Conv2d(32, 64, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(64),
        nn.MaxPool2d(2, 2),
        nn.Dropout(0.25),
        nn.Flatten(),
        nn.Linear(64 * 7 * 7, 128),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(128, 10)
    )
    return model

model = MNIST_CNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

torch.save(model.state_dict(), './model_weights.pth')
print("Model weights saved to 'model_weights.pth'")

Epoch 1, Loss: 0.4766126511209428
Epoch 2, Loss: 0.34617566657282395
Epoch 3, Loss: 0.31446842814305187
Epoch 4, Loss: 0.29103892898635825
Epoch 5, Loss: 0.2779804645602637
Model weights saved to 'model_weights.pth'


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

# Define transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load datasets
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define CNN Model (same as before)
def MNIST_CNN():
    model = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(32),
        nn.MaxPool2d(2, 2),
        nn.Dropout(0.25),
        nn.Conv2d(32, 64, kernel_size=3, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(64),
        nn.MaxPool2d(2, 2),
        nn.Dropout(0.25),
        nn.Flatten(),
        nn.Linear(64 * 7 * 7, 128),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(128, 10)
    )
    return model

# Initialize the model and move it to the appropriate device
model = MNIST_CNN().to(device)

# Check if saved weights exist
if os.path.exists('./model_weights.pth'):
    model.load_state_dict(torch.load('./model_weights.pth'))
    print("Model weights loaded successfully!")
else:
    print("No pre-trained model found, please train the model first.")
    # Optionally: You could exit here or train the model if weights don't exist.
    exit()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()

# Fine-tune only the last layer
optimizer = optim.Adam(model[-1].parameters(), lr=0.001)  # Fine-tuning only last layer

# Fine-tuning loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")

# Testing the model after fine-tuning
correct = 0
total = 0
with torch.no_grad():
    model.eval()
    for data in test_loader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on the test images: {100 * correct / total:.2f}%')

Model weights loaded successfully!
Epoch 1, Loss: 0.25409031931017
Epoch 2, Loss: 0.25084169825781255
Epoch 3, Loss: 0.25027809344502144
Epoch 4, Loss: 0.2522076402288447
Epoch 5, Loss: 0.2532311462556947
Epoch 6, Loss: 0.24756420165030305
Epoch 7, Loss: 0.25057930184770494
Epoch 8, Loss: 0.25061074967609287
Epoch 9, Loss: 0.2491190151603364
Epoch 10, Loss: 0.25017006182943835
Accuracy on the test images: 91.14%


### Q2

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_dir = './cats_and_dogs_filtered/train'
valid_dir = './cats_and_dogs_filtered/validation'

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

image_datasets = {
    'train': datasets.ImageFolder(train_dir, data_transforms['train']),
    'valid': datasets.ImageFolder(valid_dir, data_transforms['valid'])
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=32, shuffle=True),
    'valid': DataLoader(image_datasets['valid'], batch_size=32, shuffle=False)
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes

model_ft = torch.hub.load('pytorch/vision', 'alexnet', weights='IMAGENET1K_V1')

num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs, len(class_names))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.0001)


def train_model(model, criterion, optimizer, dataloaders, dataset_sizes, num_epochs=10, device='cpu'):
   for e in range(num_epochs):

       model.train()
       running_loss = 0.0
       running_corrects = 0

       for inputs, labels in dataloaders['train']:
           inputs = inputs.to(device)
           labels = labels.to(device)
           optimizer.zero_grad()

           outputs = model(inputs)
           _, preds = torch.max(outputs, 1)
           loss = criterion(outputs, labels)

           loss.backward()
           optimizer.step()

           running_loss += loss.item() * inputs.size(0)
       running_corrects += torch.sum(preds == labels.data)

       train_loss = running_loss / dataset_sizes['train']
       train_acc = running_corrects.double() / dataset_sizes['train']

       print(f'Train Loss at epoch {e}: {train_loss:.4f} Acc: {train_acc:.4f}')

       model.eval()
       running_corrects = 0

       for inputs, labels in dataloaders['valid']:
           inputs = inputs.to(device)
           labels = labels.to(device)

           with torch.no_grad():
               outputs = model(inputs)
               _, preds = torch.max(outputs, 1)
               running_corrects += torch.sum(preds == labels.data)

       val_acc = running_corrects.double() / dataset_sizes['valid']

       print(f'Valid Acc at epoch {e}: {val_acc:.4f}')

   return model


model_ft = train_model(model_ft, criterion, optimizer_ft, dataloaders, dataset_sizes, num_epochs=5, device=device)

def evaluate_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the {} test images: {} %'.format(dataset_sizes['valid'], 100 * correct / total))


evaluate_model(model_ft, dataloaders['valid'])

Downloading: "https://github.com/pytorch/vision/zipball/main" to /home/student/.cache/torch/hub/main.zip
Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /home/student/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100.0%


Train Loss at epoch 0: 0.2305 Acc: 0.0075
Valid Acc at epoch 0: 0.9660
Train Loss at epoch 1: 0.0505 Acc: 0.0080
Valid Acc at epoch 1: 0.9550
Train Loss at epoch 2: 0.0413 Acc: 0.0080
Valid Acc at epoch 2: 0.9570
Train Loss at epoch 3: 0.0109 Acc: 0.0080
Valid Acc at epoch 3: 0.9670
Train Loss at epoch 4: 0.0063 Acc: 0.0080
Valid Acc at epoch 4: 0.9590
Accuracy of the network on the 1000 test images: 100.0 %


### Q3

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import os

model = CNNClassifier().to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

os.makedirs("./checkpoints", exist_ok=True)

EPOCHS = 2

for epoch in range(EPOCHS):
    print(f'EPOCH {epoch + 1}')

    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total_preds += labels.size(0)
        correct_preds += (predicted == labels).sum().item()

    avg_loss = running_loss / len(train_loader)
    accuracy = 100 * correct_preds / total_preds
    print(f'Average Loss: {avg_loss}, Accuracy: {accuracy}%')

    checkpoint = {
        "last_loss": avg_loss,
        "last_accuracy": accuracy, 
        "last_epoch": epoch + 1,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
    }

    torch.save(checkpoint, "./checkpoints/checkpoint.pt")
    print(f'Checkpoint saved at epoch {epoch + 1}')

EPOCH 1
Average Loss: 2.195639673580747, Accuracy: 38.24166666666667%
Checkpoint saved at epoch 1
EPOCH 2
Average Loss: 1.5454540382316118, Accuracy: 62.055%
Checkpoint saved at epoch 2


In [24]:
model = CNNClassifier().to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

checkpoint = torch.load("./checkpoints/checkpoint.pt")

model.load_state_dict(checkpoint["model_state"])
optimizer.load_state_dict(checkpoint["optimizer_state"])

last_loss = checkpoint["last_loss"]
last_accuracy = checkpoint["last_accuracy"]
last_epoch = checkpoint["last_epoch"]
print(f'Resuming training from epoch {last_epoch} with last loss {last_loss} and accuracy {last_accuracy}%')

NEW_EPOCHS = 10

for epoch in range(last_epoch, NEW_EPOCHS):
    print(f'EPOCH {epoch + 1}')

    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, labels)

        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total_preds += labels.size(0)
        correct_preds += (predicted == labels).sum().item()

    avg_loss = running_loss / len(train_loader)
    accuracy = 100 * correct_preds / total_preds
    print(f'Average Loss: {avg_loss}, Accuracy: {accuracy}%')

    checkpoint = {
        "last_loss": avg_loss,
        "last_accuracy": accuracy,  # Saving the accuracy
        "last_epoch": epoch + 1,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
    }

    torch.save(checkpoint, "./checkpoints/checkpoint.pt")
    print(f'Checkpoint saved at epoch {epoch + 1}')

Resuming training from epoch 2 with last loss 1.5454540382316118 and accuracy 62.055%
EPOCH 3
Average Loss: 0.8642180255735352, Accuracy: 72.69166666666666%
Checkpoint saved at epoch 3
EPOCH 4
Average Loss: 0.6956186698062587, Accuracy: 75.47166666666666%
Checkpoint saved at epoch 4
EPOCH 5
Average Loss: 0.6406074377583034, Accuracy: 76.94333333333333%
Checkpoint saved at epoch 5
EPOCH 6
Average Loss: 0.6070120503653341, Accuracy: 78.01333333333334%
Checkpoint saved at epoch 6
EPOCH 7
Average Loss: 0.5828836154518351, Accuracy: 79.01166666666667%
Checkpoint saved at epoch 7
EPOCH 8
Average Loss: 0.5630985355771172, Accuracy: 79.65166666666667%
Checkpoint saved at epoch 8
EPOCH 9
Average Loss: 0.5457978423661006, Accuracy: 80.22166666666666%
Checkpoint saved at epoch 9
EPOCH 10
Average Loss: 0.5311935498897455, Accuracy: 80.80666666666667%
Checkpoint saved at epoch 10
