1. Perform classification on FashionMNIST, fashion apparels dataset, using a pretrained model which is trained on MNIST handwritten digit classification dataset.

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR


# Define a more complex CNN model with Batch Normalization and ReLU activation
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        # Convolutional layer 1
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  # Batch normalization after conv1
        # Convolutional layer 2
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  # Batch normalization after conv2
        # Convolutional layer 3 (additional layer for more complexity)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)  # Batch normalization after conv3
        # Fully connected layers
        self.fc1 = nn.Linear(128 * 3 * 3, 512)
        self.fc2 = nn.Linear(512, 10)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)  # Dropout regularization to reduce overfitting


    def forward(self, x):
            # Conv1 -> BatchNorm -> ReLU -> MaxPool
            x = self.pool(torch.relu(self.bn1(self.conv1(x))))
            # Conv2 -> BatchNorm -> ReLU -> MaxPool
            x = self.pool(torch.relu(self.bn2(self.conv2(x))))
            # Conv3 -> BatchNorm -> ReLU -> MaxPool
            x = self.pool(torch.relu(self.bn3(self.conv3(x))))

            # Flatten the tensor to feed into fully connected layers
            x = x.view(-1, 128 * 3 * 3)  # This assumes the size is 128x7x7

            # Fully connected layer 1 -> ReLU activation
            x = torch.relu(self.fc1(x))

            # Apply dropout after the fully connected layer to prevent overfitting
            x = self.dropout(x)

            # Fully connected layer 2 (output layer)
            x = self.fc2(x)
            return x


# Set up data loaders for FashionMNIST with data augmentation
transform_train = transforms.Compose([
    transforms.RandomRotation(10),  # Random rotation between -10 and 10 degrees
    transforms.RandomHorizontalFlip(),  # Randomly flip the images
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Use FashionMNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize the model, loss function, optimizer, and learning rate scheduler
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNClassifier().to(device)  # Move the model to the GPU or CPU

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)  # Reduce the learning rate by a factor of 0.5 every 5 epochs

# Training the model
num_epochs = 5  # Increased number of epochs for better training
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # Zero the gradients

        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward(


)  # Backward pass
        optimizer.step()  # Update weights

        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    scheduler.step()  # Update the learning rate
    train_accuracy = 100 * correct / total
    print(
        f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%")

# Evaluate on the test set
model.eval()  # Set model to evaluation mode
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "/home/mustafa/dllab/week6/model.pth")
print("Optimized model saved to mmodel.pt")

Epoch 1/5, Loss: 0.2484, Accuracy: 91.99%
Epoch 2/5, Loss: 0.1106, Accuracy: 96.62%
Epoch 3/5, Loss: 0.0862, Accuracy: 97.30%
Epoch 4/5, Loss: 0.0749, Accuracy: 97.72%
Epoch 5/5, Loss: 0.0669, Accuracy: 97.96%
Test Accuracy: 98.73%
Optimized model saved to mmodel.pt


In [6]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the CNN model (same as in MNIST)
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        # Convolutional layer 1
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  # Batch normalization after conv1
        # Convolutional layer 2
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  # Batch normalization after conv2
        # Convolutional layer 3 (additional layer for more complexity)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)  # Batch normalization after conv3
        # Fully connected layers
        self.fc1 = nn.Linear(128 * 3 * 3, 512)
        self.fc2 = nn.Linear(512, 10)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)  # Dropout regularization to reduce overfitting


    def forward(self, x):
            # Conv1 -> BatchNorm -> ReLU -> MaxPool
            x = self.pool(torch.relu(self.bn1(self.conv1(x))))
            # Conv2 -> BatchNorm -> ReLU -> MaxPool
            x = self.pool(torch.relu(self.bn2(self.conv2(x))))
            # Conv3 -> BatchNorm -> ReLU -> MaxPool
            x = self.pool(torch.relu(self.bn3(self.conv3(x))))

            # Flatten the tensor to feed into fully connected layers
            x = x.view(-1, 128 * 3 * 3)  # This assumes the size is 128x3x3

            # Fully connected layer 1 -> ReLU activation
            x = torch.relu(self.fc1(x))

            # Apply dropout after the fully connected layer to prevent overfitting
            x = self.dropout(x)

            # Fully connected layer 2 (output layer)
            x = self.fc2(x)
            return x


# Step 1: Load FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

mnist_testset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(mnist_testset, batch_size=1, shuffle=False)

# Step 2: Load the pre-trained model from disk
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
path = torch.load("/home/mustafa/dllab/week6/model.pth")

# Load the entire model (which includes both architecture and trained parameters)
model=CNNClassifier()
model.load_state_dict(path)
model.to(device)  # Move the model to the correct device (GPU/CPU)

# Step 3: Print model state_dict (inspect parameters)
print("Model's state_dict:")
for param_tensor in model.state_dict().keys():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
print()

# Step 4: Evaluate the model on the FashionMNIST test set
model.eval()  # Set model to evaluation mode

correct = 0
total = 0
for i, (inputs, labels) in enumerate(test_loader):
    inputs = inputs.to(device)
    labels = labels.to(device)

    # Perform forward pass
    outputs = model(inputs)

    # Get predicted class label (highest value in the output layer)
    _, predicted = torch.max(outputs, 1)


    # Calculate total number of labels
    total += labels.size(0)

    # Calculate total correct predictions
    correct += (predicted == labels).sum()

# Calculate and print accuracy
accuracy = 100.0 * correct / total
print("The overall accuracy is {:.2f}%".format(accuracy))

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Using downloaded and verified file: ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz
Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Using downloaded and verified file: ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Using downloaded and verified file: ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz
Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-label

100.0%
  path = torch.load("/home/mustafa/dllab/week6/model.pth")


Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Model's state_dict:
conv1.weight 	 torch.Size([32, 1, 3, 3])
conv1.bias 	 torch.Size([32])
bn1.weight 	 torch.Size([32])
bn1.bias 	 torch.Size([32])
bn1.running_mean 	 torch.Size([32])
bn1.running_var 	 torch.Size([32])
bn1.num_batches_tracked 	 torch.Size([])
conv2.weight 	 torch.Size([64, 32, 3, 3])
conv2.bias 	 torch.Size([64])
bn2.weight 	 torch.Size([64])
bn2.bias 	 torch.Size([64])
bn2.running_mean 	 torch.Size([64])
bn2.running_var 	 torch.Size([64])
bn2.num_batches_tracked 	 torch.Size([])
conv3.weight 	 torch.Size([128, 64, 3, 3])
conv3.bias 	 torch.Size([128])
bn3.weight 	 torch.Size([128])
bn3.bias 	 torch.Size([128])
bn3.running_mean 	 torch.Size([128])
bn3.running_var 	 torch.Size([128])
bn3.num_batches_tracked 	 torch.Size([])
fc1.weight 	 torch.Size([512, 1152])
fc1.bias 	 torch.Size([512])
fc2.weight 	 torch.Size([10, 512])
fc2.bias 	 torch.Size([10])

The overall accuracy is 4.75%


2. Learn the AlexNet architecture and apply transfer learning to perform the classification task. Using the pre-trained AlexNet, classify images from the cats_and_dogs_filtered dataset downloaded from the below link. Finetune the classifier given in AlexNet as a two- class classifier. Perform pre-processing of images as per the requirement.

In [11]:
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

base_dir = 'cats_and_dogs_filtered'  # Adjust based on your extracted folder structure

train_dir = os.path.join(base_dir, 'train')
valid_dir = os.path.join(base_dir, 'validation')

print("Train directory:", train_dir)
print("Validation directory:", valid_dir)

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(227),  # Crop the image to 227x227
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Pre-trained AlexNet normalization
])

train_dataset = datasets.ImageFolder(train_dir, transform=transform)
valid_dataset = datasets.ImageFolder(valid_dir, transform=transform)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

model = models.alexnet(pretrained=True)

for param in model.parameters():
    param.requires_grad = False

model.classifier[6] = nn.Linear(in_features=4096, out_features=2)  # Change to 2 output classes

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.classifier.parameters(), lr=0.001, momentum=0.9)

epochs = 10  # Number of epochs
for epoch in range(epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct/total:.2f}%")

model.eval()  # Set model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # No need to track gradients during validation
    for inputs, labels in valid_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")

Train directory: cats_and_dogs_filtered/train
Validation directory: cats_and_dogs_filtered/validation


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /home/mustafa/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100.0%


Epoch [1/10], Loss: 0.1682, Accuracy: 93.05%
Epoch [2/10], Loss: 0.0975, Accuracy: 96.40%
Epoch [3/10], Loss: 0.0842, Accuracy: 96.90%
Epoch [4/10], Loss: 0.0605, Accuracy: 97.75%
Epoch [5/10], Loss: 0.0566, Accuracy: 97.70%
Epoch [6/10], Loss: 0.0584, Accuracy: 97.75%
Epoch [7/10], Loss: 0.0497, Accuracy: 98.30%
Epoch [8/10], Loss: 0.0436, Accuracy: 98.30%
Epoch [9/10], Loss: 0.0459, Accuracy: 98.35%
Epoch [10/10], Loss: 0.0354, Accuracy: 99.00%
Validation Accuracy: 96.30%


3. Implement check points in PyTorch by saving model state_dict, optimizer state_dict, epochs and loss during training so that the training can be resumed at a later point. Also, illustrate the use of check point to save the best found parameters during training.

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os


class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(128 * 3 * 3, 512)  # This will be updated dynamically
        self.fc2 = nn.Linear(512, 10)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Apply convolutions and pooling
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)  # Dynamically calculate the size for flattening
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


checkpoint_dir = './checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

model = CNNModel()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

checkpoint_path = './checkpoints/checkpoint.pt'
start_epoch = 0
best_loss = float('inf')

if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])
    start_epoch = checkpoint['last_epoch']
    best_loss = checkpoint['last_loss']
    print(f"Resuming training from epoch {start_epoch}...")

num_epochs = 10 

for epoch in range(start_epoch, num_epochs):
    model.train() 
    running_loss = 0.0

    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

    if (epoch + 1) % 2 == 0:
        checkpoint = {
            "last_loss": avg_loss,
            "last_epoch": epoch + 1,
            "model_state": model.state_dict(),
            "optimizer_state": optimizer.state_dict(),
        }
        torch.save(checkpoint, checkpoint_path)
        print(f"Checkpoint saved at epoch {epoch + 1}")

    if avg_loss < best_loss:
        best_loss = avg_loss
        best_model_path = './checkpoints/best_model.pt'
        torch.save(model.state_dict(), best_model_path)
        print(f"Best model saved with loss {best_loss:.4f}")

print("Training completed.")

Epoch [1/10], Loss: 0.1772
Best model saved with loss 0.1772
Epoch [2/10], Loss: 0.0499
Checkpoint saved at epoch 2
Best model saved with loss 0.0499
Epoch [3/10], Loss: 0.0364
Best model saved with loss 0.0364
Epoch [4/10], Loss: 0.0310
Checkpoint saved at epoch 4
Best model saved with loss 0.0310
Epoch [5/10], Loss: 0.0248
Best model saved with loss 0.0248
Epoch [6/10], Loss: 0.0195
Checkpoint saved at epoch 6
Best model saved with loss 0.0195
Epoch [7/10], Loss: 0.0190
Best model saved with loss 0.0190
Epoch [8/10], Loss: 0.0156
Checkpoint saved at epoch 8
Best model saved with loss 0.0156
Epoch [9/10], Loss: 0.0144
Best model saved with loss 0.0144
Epoch [10/10], Loss: 0.0123
Checkpoint saved at epoch 10
Best model saved with loss 0.0123
Training completed.


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os


# Define the CNN model (same as above)
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(128 * 3 * 3, 512)
        self.fc2 = nn.Linear(512, 10)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))
        x = x.view(-1, 128 * 3 * 3)  # Flattening the output of the conv layers
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


# Checkpoint directory
checkpoint_dir = './checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

model = CNNModel()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Check for a pre-existing checkpoint
checkpoint_path = './checkpoints/checkpoint.pt'
start_epoch = 0
best_loss = float('inf')

if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])
    start_epoch = checkpoint['last_epoch']
    best_loss = checkpoint['last_loss']
    print(f"Resuming training from epoch {start_epoch}...")

# Training loop (same as saving the checkpoint)
num_epochs = 13  # Total number of epochs for training

for epoch in range(start_epoch, num_epochs):
    model.train()
    running_loss = 0.0

    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

    if (epoch + 1) % 2 == 0:
        checkpoint = {
            "last_loss": avg_loss,
            "last_epoch": epoch + 1,
            "model_state": model.state_dict(),
            "optimizer_state": optimizer.state_dict(),
        }
        torch.save(checkpoint, checkpoint_path)
        print(f"Checkpoint saved at epoch {epoch + 1}")

    # Save the best model based on validation loss
    if avg_loss < best_loss:
        best_loss = avg_loss
        best_model_path = './checkpoints/best_model.pt'
        torch.save(model.state_dict(), best_model_path)
        print(f"Best model saved with loss {best_loss:.4f}")

print("Training completed.")

  checkpoint = torch.load(checkpoint_path)


Resuming training from epoch 10...
Epoch [11/13], Loss: 0.0112
Best model saved with loss 0.0112
Epoch [12/13], Loss: 0.0112
Checkpoint saved at epoch 12
Best model saved with loss 0.0112
Epoch [13/13], Loss: 0.0104
Best model saved with loss 0.0104
Training completed.
