In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import pandas as pd
import os
import random
import numpy as np
import copy

In [None]:
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True, warn_only=True)

os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)


In [None]:
!wget https://www.cse.iitb.ac.in/~pjyothi/cs335/dataset.tar.gz

--2024-08-10 16:34:13--  https://www.cse.iitb.ac.in/~pjyothi/cs335/dataset.tar.gz
Resolving www.cse.iitb.ac.in (www.cse.iitb.ac.in)... 103.21.127.134
Connecting to www.cse.iitb.ac.in (www.cse.iitb.ac.in)|103.21.127.134|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 48946920 (47M) [application/x-gzip]
Saving to: ‘dataset.tar.gz’


2024-08-10 16:34:20 (8.25 MB/s) - ‘dataset.tar.gz’ saved [48946920/48946920]



In [None]:
!mv dataset.tar.gz?rlkey=vajo7g4w8nl1q92ikv8qu75qu dataset.tar.gz

In [None]:
!tar -xzvf dataset.tar.gz

dataset/
dataset/kaggle/
dataset/kaggle/686.jpg
dataset/kaggle/670.jpg
dataset/kaggle/239.jpg
dataset/kaggle/728.jpg
dataset/kaggle/536.jpg
dataset/kaggle/547.jpg
dataset/kaggle/692.jpg
dataset/kaggle/669.jpg
dataset/kaggle/557.jpg
dataset/kaggle/1.jpg
dataset/kaggle/416.jpg
dataset/kaggle/197.jpg
dataset/kaggle/776.jpg
dataset/kaggle/694.jpg
dataset/kaggle/462.jpg
dataset/kaggle/323.jpg
dataset/kaggle/605.jpg
dataset/kaggle/464.jpg
dataset/kaggle/113.jpg
dataset/kaggle/266.jpg
dataset/kaggle/261.jpg
dataset/kaggle/103.jpg
dataset/kaggle/85.jpg
dataset/kaggle/574.jpg
dataset/kaggle/224.jpg
dataset/kaggle/121.jpg
dataset/kaggle/19.jpg
dataset/kaggle/202.jpg
dataset/kaggle/550.jpg
dataset/kaggle/148.jpg
dataset/kaggle/334.jpg
dataset/kaggle/517.jpg
dataset/kaggle/665.jpg
dataset/kaggle/242.jpg
dataset/kaggle/223.jpg
dataset/kaggle/238.jpg
dataset/kaggle/56.jpg
dataset/kaggle/274.jpg
dataset/kaggle/491.jpg
dataset/kaggle/88.jpg
dataset/kaggle/543.jpg
dataset/kaggle/317.jpg
dataset/kaggle/

In [None]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

print("device is {}".format(device))

device is cuda


In [None]:
def read_image_tensor(image_folder,transform,num_images=None):
    if num_images==None:
        num_images = len(os.listdir(image_folder))
    images = []
    for i in range(num_images):
        img = torchvision.io.read_image(os.path.join(image_folder,f"{i}.jpg")).float()
        images.append(transform(img))
    return torch.stack(images).to(device)

In [None]:
def get_labels(csv_file):
    df = pd.read_csv(csv_file)
    return torch.tensor(df['label'].astype(float),dtype=torch.float32).to(device)
    pass
    # TODO: Copy this from the Colab notebook in Q1

In [None]:
img_size = (256,256)
base_transform = transforms.Compose(
    [transforms.Resize(img_size)
    ]
)
train_X = read_image_tensor("dataset/train/",base_transform)/256
train_Y = get_labels("dataset/train.csv")
valid_X = read_image_tensor("dataset/test/",base_transform)/256
valid_Y = get_labels("dataset/test.csv")

In [None]:
train_dataset = TensorDataset(train_X, train_Y)
valid_dataset = TensorDataset(valid_X, valid_Y)
batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

In [None]:
len(valid_dataset.tensors[1])

400

In [None]:
# you can use this utility function to get the number of trainable parameters in your model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
set_seed(42)
# TODO: Load a pretrained model VGG-11 into baseModel
# Refer to https://pytorch.org/docs/stable/hub.html#torch.hub.load on how to load a pretrained vgg11 model
# VGG model: https://pytorch.org/hub/pytorch_vision_vgg/
baseModel = None
baseModel = torch.hub.load('pytorch/vision', 'vgg11', pretrained=True)#TODO: Complete definition

# TODO: Freeze all the params of the VGG-11 model
for param in baseModel.parameters():
    param.requires_grad = False


# Once frozen correctly, the following statement should print that the number of trainable params is 0
print("Number of trainable params in base model is ", count_parameters(baseModel))

Downloading: "https://github.com/pytorch/vision/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://download.pytorch.org/models/vgg11-8a719046.pth" to /root/.cache/torch/hub/checkpoints/vgg11-8a719046.pth
100%|██████████| 507M/507M [00:15<00:00, 34.8MB/s]


Number of trainable params in base model is  0


In [None]:
import torch.nn as nn

class Classifier(nn.Module):
    def __init__(self, baseModel, numOutputNeurons):
        super(Classifier, self).__init__()

        # Initialize the base model
        self.baseModel = baseModel

        # Replace the classifier part of the base model
        self.baseModel.classifier = nn.Sequential(
            nn.Linear(baseModel.classifier[-1].in_features, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(1024, numOutputNeurons)
        )

        # Define the sigmoid layer for the output probabilities
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.baseModel(x)
        out = self.baseModel.classifier(out)
        probs = self.sigmoid(out)
        return probs


In [None]:
device = torch.device(device)
model = None
model = Classifier(baseModel=baseModel, numOutputNeurons=1)
model = model.to(device)

print("Model is ")
print(model)
print("Trainable params of new model with classifier head is ", count_parameters(model))

Model is 
Classifier(
  (baseModel): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (12): ReLU(inplace=True)
      (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (14): ReLU(inplace=True)
      (15): MaxPo

In [None]:
# initialize loss function and optimizer
num_epochs = 30
loss_func = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
def train_model(model, train_loader, valid_loader, num_epochs, loss_function, optimizer):
    # TODO: Note how the best checkpoint is saved based on validation accuracy

    set_seed(42)
    prev_acc = 0.0
    best_checkpoint = None

    for epoch in range(num_epochs):
        model.train()

        total_loss = 0.0
        for i, data in enumerate(train_loader):
            inputs, labels = data
            optimizer.zero_grad()
            output = model(inputs)

            loss = loss_function(output, labels.view(output.shape))
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        average_loss = total_loss/len(train_loader)

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {average_loss:.4f}")
        model.eval()

        correct = 0
        total = 0

        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data
                outputs = model(inputs)
                pred = (outputs > 0.5)*1
                correct += (pred==labels.view(pred.shape)).sum()
                total += labels.size(0)
            accur = 100*correct/total
            print(f"Test Accuracy after Epoch {epoch+1}: {accur:.2f}%")
            if accur > prev_acc:
              print("Saving best checkpoint")
              prev_acc = accur
              best_checkpoint = copy.deepcopy(model)


    print("Training done.")
    return best_checkpoint

In [None]:
best_checkpoint = train_model(model, train_loader, valid_loader, num_epochs, loss_func, optimizer)

RuntimeError: ignored

In [None]:
def evaluate(model, test_loader):

  set_seed(42)
  model.eval()
  predictions = []

  # TODO: Evaluate model and generate binary outputs for all the test instances
  # in test_loader. Return the predicted outputs in a list named predictions.
  return predictions


## Evaluate using best checkpoint on Kaggle test set

In [None]:
img_size = (256,256)
base_transform = transforms.Compose(
    [transforms.Resize(img_size)
    ]
)
kaggle_X = read_image_tensor("dataset/kaggle/",base_transform)/256
kaggle_dataset = TensorDataset(kaggle_X)
batch_size = 64

kaggle_loader = DataLoader(kaggle_dataset, batch_size=batch_size, shuffle=False)
kaggle_predictions = evaluate(best_checkpoint, kaggle_loader)

ids = [i for i in range(len(kaggle_predictions))]
pred_dict = {"id": ids, "label": kaggle_predictions}
df = pd.DataFrame(pred_dict)
df.to_csv("./submission.csv", index=False)



## Training the last few layers of vgg-11 along with the classifier

In [None]:
set_seed(42)
baseModel = None
# TODO: Copy from an earlier cell where baseModel is initialized to a pretrained VGG-11

In [None]:
print(count_parameters(baseModel))

# TODO: Except for parameters in the last layer or two, freeze the rest.
# The two print statements will show the initial number of trainable parameters
# in baseModel and the substantially smaller (almost by a factor of 100)
# number of trainable parameters after implementing the TODO.

print(count_parameters(baseModel))

In [None]:
device = torch.device(device)
model = None
model = Classifier(baseModel=baseModel, numOutputNeurons=1)
model = model.to(device)

print("Model is ")
print(model)
print("Trainable params of new model with classifier head is ", count_parameters(model))

In [None]:
# initialize loss function and optimizer
num_epochs = 30
loss_func = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
best_checkpoint_ft = train_model(model, train_loader, valid_loader, num_epochs, loss_func, optimizer)

In [None]:
batch_size=64
kaggle_loader = DataLoader(kaggle_dataset, batch_size=batch_size, shuffle=False)
kaggle_predictions = evaluate(best_checkpoint_ft, kaggle_loader)

ids = [i for i in range(len(kaggle_predictions))]
pred_dict = {"id": ids, "label": kaggle_predictions}
df = pd.DataFrame(pred_dict)
df.to_csv("./submission.csv", index=False)

# Submit submission.csv to Kaggle