## Imports

In [None]:
import os
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt


## Loading Data
- Load the dataset using your own custom code

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import zipfile

# Replace 'file_name.zip' with the name of your zip file
zip_file_path = '/content/drive/MyDrive/CS6910/Assignment2/ImageClassification/team_16.zip'

extract_to_path = '/content/dataset/'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_path)






# Load dataset

# Normalize the image data to have mean 0 and standard deviation 1
# Resize the image to 224x224
# Convert the image to tensor
# Create a DataLoader for the dataset
# Use a batch size of 32
# Shuffle the dataset

data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = datasets.ImageFolder(root='./dataset/team_16/train', transform=data_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = datasets.ImageFolder(root='./dataset/team_16/test', transform=data_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Visualizing normalized images

In [None]:
import matplotlib.pyplot as plt
plt.imshow(train_loader.dataset[1000][0].permute(1, 2, 0))
plt.show()

## MLFFNN Definition and Helper Functions


In [None]:
class ImageClassification:
    def __init__(self, model, criterion, optimizer, deep_cnn='vgg'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.deep_cnn = deep_cnn
        self.train_losses = []
        self.train_accuracies = []


    def train(self, train_loader, num_epochs=5):
        for epoch in range(num_epochs):
            tqdm_train_loader = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
            self.model.train()
            running_loss = 0.0
            correct = 0
            total = 0
            for i, (images, labels) in enumerate(tqdm_train_loader):
                images, labels = images.to(device), labels.to(device)
                self.optimizer.zero_grad()
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item()
                predicted = torch.argmax(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                tqdm_train_loader.set_postfix({'Running Loss': running_loss / (i + 1)})
                tqdm_train_loader.set_description(f"Epoch {epoch+1}/{num_epochs}")

            train_loss = running_loss / len(train_loader)
            train_accuracy = correct / total

            self.train_losses.append(train_loss)
            self.train_accuracies.append(train_accuracy)

            print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')

            # save for each epoch
            self.save_model(f'models/Q1/{self.deep_cnn}_epoch_{epoch+1}.pth')
            # if train_loss < 0.01:
            #     print('Loss is less than 0.01, stopping training')
            #     # save the final model
            #     self.save_model(f'models/Q1/{self.deep_cnn}_final.pth')
            #     break

    def evaluate(self, test_loader):
        self.model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = self.model(images)
                loss = self.criterion(outputs, labels)
                running_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        test_accuracy = correct / total
        test_loss = running_loss / len(test_loader)
        return test_loss, test_accuracy

    def test(self, test_loader):
        test_loss, test_accuracy = self.evaluate(test_loader)
        print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

    def plot_cm_train(self, train_loader):
        self.model.eval()
        y_true = []
        y_pred = []
        with torch.no_grad():
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = self.model(images)
                _, predicted = torch.max(outputs, 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())
        cm = confusion_matrix(y_true, y_pred)
        cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        plt.figure(figsize=(10, 10))
        annot_labels = np.array([[f'{norm*100:.2f}%' for norm in row] for row in cm_norm])
        sns.heatmap(cm, annot=annot_labels, fmt='', xticklabels=train_loader.dataset.classes, yticklabels=train_loader.dataset.classes, cmap='YlGnBu')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.title('Confusion Matrix (Train)')
        plt.savefig(f'plots/confusion_matrix_train_{self.deep_cnn}.pdf', format='pdf', bbox_inches='tight')
        plt.show()

    def plot_cm_test(self, test_loader):
        self.model.eval()
        y_true = []
        y_pred = []
        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = self.model(images)
                _, predicted = torch.max(outputs, 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())
        cm = confusion_matrix(y_true, y_pred)
        cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        plt.figure(figsize=(10, 10))
        annot_labels = np.array([[f'{norm*100:.2f}%' for norm in row] for row in cm_norm])
        sns.heatmap(cm, annot=annot_labels, fmt='', xticklabels=test_loader.dataset.classes, yticklabels=test_loader.dataset.classes, cmap='YlGnBu')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.title('Confusion Matrix (Test)')
        plt.savefig(f'plots/confusion_matrix_test_{self.deep_cnn}.pdf', format='pdf', bbox_inches='tight')
        plt.show()

    def plot_losses(self):
        plt.figure(figsize=(10, 5))
        plt.plot(self.train_losses, label='Train Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.title('Train Loss')
        plt.savefig(f'plots/losses_{self.deep_cnn}.pdf', format='pdf', bbox_inches='tight')
        plt.show()

    def plot_accuracies(self):
        plt.figure(figsize=(10, 5))
        plt.plot(self.train_accuracies, label='Train Accuracy')

        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.title('Train Accuracy')
        plt.savefig(f'plots/accuracies_{self.deep_cnn}.pdf', format='pdf', bbox_inches='tight')
        plt.show()

    def save_model(self, path):
        torch.save(self.model.state_dict(), path)

    def load_model(self, path):
        self.model.load_state_dict(torch.load(path))



## VGGNet setup for Feature Extraction

In [None]:
# Load pre-trained VGGNet (without fully connected layers)
vgg = models.vgg16(pretrained=True).features
# vgg = vgg.features  # Remove the classifier (fully connected layers)
vgg = vgg.to(device)
vgg.eval()

# Freeze the VGGNet parameters
for param in vgg.parameters():
    param.requires_grad = False

# Print the VGGNet architecture
print(vgg)

# Define MLFFNN architecture using VGGNet features
class MLFFNN_VGG(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MLFFNN_VGG, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = vgg(x)
        x = x.view(x.size(0), -1)  # Flatten the feature maps
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## Training and evaluaton using VGGNet features

In [None]:

# Initialize model, loss, and optimizer
hidden_size = 256
num_classes = len(train_dataset.classes) # = 5
model = MLFFNN_VGG(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train and test the model
image_classification_vgg = ImageClassification(model, criterion, optimizer, deep_cnn='vgg')
image_classification_vgg.train(train_loader, num_epochs=10)
image_classification_vgg.test(test_loader)

image_classification_vgg.plot_cm_train(train_loader)
image_classification_vgg.plot_cm_test(test_loader)
image_classification_vgg.plot_losses()
image_classification_vgg.plot_accuracies()

## GoogLeNet setup for Feature Extraction

In [None]:
# Load GoogLeNet
googlenet = models.googlenet(pretrained=True)
googlenet = googlenet.to(device)
googlenet.eval()
googlenet_features = nn.Sequential(*list(googlenet.children())[:-2])  # exclude FC + aux classifiers

# Freeze the GoogLeNet parameters
for param in googlenet.parameters():
    param.requires_grad = False

# Print the GoogLeNet architecture
print(googlenet)

# MLFFNN architecture using GoogLeNet features
class MLFFNN_GoogLeNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MLFFNN_GoogLeNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = googlenet_features(x)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x



## Training and Evaluation using GoogleNet Features

In [None]:
# Initialize model, loss, and optimizer
# input_size = 1024  # GoogLeNet output size
hidden_size = 256
num_classes = len(train_dataset.classes) # = 5
model = MLFFNN_GoogLeNet(input_size, hidden_size, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


# Train and test the model
image_classification_googlenet = ImageClassification(model, criterion, optimizer, deep_cnn='googlenet')
image_classification_googlenet.train(train_loader, num_epochs=10)
image_classification_googlenet.test(test_loader)


# Plot confusion matrix, losses, and accuracies
image_classification_googlenet.plot_cm_train(train_loader)
image_classification_googlenet.plot_cm_test(test_loader)
image_classification_googlenet.plot_losses()
image_classification_googlenet.plot_accuracies()
