In [1]:
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import numpy as np
from sklearn.utils import shuffle # for shuffling
import os
import cv2
import random
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

from sklearn.model_selection import train_test_split
import gc

In [2]:
# class labels
classesList = ["Amphibia", "Animalia", "Arachnida", "Aves", "Fungi", "Insecta", "Mammalia", "Mollusca", "Plantae", "Reptilia"]

In [3]:
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip -O nature_12K.zip
!unzip -q nature_12K.zip
!rm nature_12K.zip

--2024-04-07 07:16:52--  https://storage.googleapis.com/wandb_datasets/nature_12K.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 209.85.147.207, 142.250.125.207, 142.250.136.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|209.85.147.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3816687935 (3.6G) [application/zip]
Saving to: 'nature_12K.zip'


2024-04-07 07:17:07 (242 MB/s) - 'nature_12K.zip' saved [3816687935/3816687935]



In [4]:
dtype = torch.float
device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
# Loading pretrained ResNet50 model
model = models.resnet50(pretrained=True)

# Freezing all the parameters in the model
for param in model.parameters():
    param.requires_grad = False

# Getting the number of inputs for the final layer
num_features = model.fc.in_features  
model.fc = nn.Linear(num_features, 10) 

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 155MB/s] 


In [6]:
import wandb
# !wandb login
wandb.login()
# login into your wandb account

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [7]:
# resnet50 takes input dimension of 224*224
resize_width = 224
resize_height = 224

In [8]:
def load_data(train_dir, test_dir, batchSize):
    
    # Transformation
    transform = transforms.Compose([
        transforms.Resize((resize_width, resize_height)), # Resizing the image
        transforms.ToTensor(), # Converting image to tensor
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
    ])

    # Dataset
    TrainDataset = datasets.ImageFolder(root=train_dir, transform=transform)
    class_to_idx = TrainDataset.class_to_idx

    # Initialize lists to hold indices for training and validation
    train_indices = []
    val_indices = []

    # Spliting indices for each class
    for class_name, class_index in class_to_idx.items():
        # Find indices of images in the current class
        class_indices = [i for i, (_, label) in enumerate(TrainDataset.samples) if label == class_index]
        # Split these indices into training and validation
        _train_indices, _val_indices = train_test_split(class_indices, test_size=0.2, random_state=42)
        # Append to the main list
        train_indices.extend(_train_indices)
        val_indices.extend(_val_indices)

    # creating subsets for training and validation
    # based on the indices we took from splitting 
    train_subset = Subset(TrainDataset, train_indices)
    val_subset = Subset(TrainDataset, val_indices)

    # Create data loaders
    trainData_loader = DataLoader(train_subset, batch_size=batchSize, shuffle=True, num_workers=2, pin_memory=True)
    valData_loader = DataLoader(val_subset, batch_size=batchSize, shuffle=True, num_workers=2, pin_memory=True)

    TestDataset = datasets.ImageFolder(root=test_dir, transform=transform)
    # DataLoader with shuffling
    TestData_loader = DataLoader(TestDataset,num_workers=2, batch_size=batchSize, pin_memory=True)
    
    return trainData_loader, valData_loader, TestData_loader
    

In [9]:

def train(model, criterion, optimizer, num_epochs, train_loader, val_loader):
    for epoch in range(num_epochs):
        # activating the model in train mode
        model.train()
        
        for ind, (inputs, labels) in enumerate(tqdm(train_loader, desc=f'Training Progress {epoch+1}')):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        find_accuracy(model, criterion, train_loader, "train")
        find_accuracy(model, criterion, val_loader, "validation")
        

def find_accuracy(model, criterion, dataLoader, dataName):
#     making the model in evaluation mode
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in dataLoader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    print(f'{dataName} Loss: {val_loss/len(dataLoader)}, '
          f'{dataName} Accuracy: {100*correct/total}%\n')
    wandb.log({f"{dataName}_loss": val_loss/len(dataLoader)})
    wandb.log({f"{dataName}_accuracy": 100*correct/total})

In [10]:
trainDataLoader, valDataLoader, testDataLoader = load_data(train_dir = 'inaturalist_12K/train', test_dir = 'inaturalist_12K/val', batchSize = 64)

wandb.init(project="Assignment 2")
wandb.run.name = "Training on the pretrained"
wandb.run.save()


optimizer = optim.NAdam(model.parameters(), lr=1e-4, weight_decay=0.005)
# Get the number of inputs for the final layer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.nn.DataParallel(model,device_ids = [0]).to(device)
criterion = nn.CrossEntropyLoss()


train(model, criterion, optimizer, 10, trainDataLoader, valDataLoader)
wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mcs23m065[0m ([33mcs23m065_iitm[0m). Use [1m`wandb login --relogin`[0m to force relogin


Training Progress 1: 100%|██████████| 125/125 [00:55<00:00,  2.26it/s]


train Loss: 1.6073386850357056, train Accuracy: 62.282785348168524%

validation Loss: 1.608281321823597, validation Accuracy: 63.1%



Training Progress 2: 100%|██████████| 125/125 [00:45<00:00,  2.73it/s]


train Loss: 1.2542275347709655, train Accuracy: 69.796224528066%

validation Loss: 1.2536559738218784, validation Accuracy: 69.65%



Training Progress 3: 100%|██████████| 125/125 [00:47<00:00,  2.63it/s]


train Loss: 1.0778322172164918, train Accuracy: 71.70896362045255%

validation Loss: 1.0819353349506855, validation Accuracy: 72.4%



Training Progress 4: 100%|██████████| 125/125 [00:45<00:00,  2.75it/s]


train Loss: 0.9797352046966553, train Accuracy: 72.78409801225153%

validation Loss: 0.9819287341088057, validation Accuracy: 73.05%



Training Progress 5: 100%|██████████| 125/125 [00:46<00:00,  2.68it/s]


train Loss: 0.9199437885284424, train Accuracy: 73.1466433304163%

validation Loss: 0.9337042141705751, validation Accuracy: 74.25%



Training Progress 6: 100%|██████████| 125/125 [00:46<00:00,  2.67it/s]


train Loss: 0.8715750722885132, train Accuracy: 73.9967495936992%

validation Loss: 0.88818084821105, validation Accuracy: 74.45%



Training Progress 7: 100%|██████████| 125/125 [00:45<00:00,  2.78it/s]


train Loss: 0.8451165175437927, train Accuracy: 74.59682460307539%

validation Loss: 0.8616251163184643, validation Accuracy: 74.95%



Training Progress 8: 100%|██████████| 125/125 [00:45<00:00,  2.75it/s]


train Loss: 0.8109210572242737, train Accuracy: 75.10938867358419%

validation Loss: 0.8354829605668783, validation Accuracy: 75.6%



Training Progress 9: 100%|██████████| 125/125 [00:45<00:00,  2.77it/s]


train Loss: 0.787678471326828, train Accuracy: 75.90948868608577%

validation Loss: 0.812268789857626, validation Accuracy: 75.5%



Training Progress 10: 100%|██████████| 125/125 [00:44<00:00,  2.81it/s]


train Loss: 0.7618500356674194, train Accuracy: 76.34704338042255%

validation Loss: 0.7980984877794981, validation Accuracy: 76.85%



VBox(children=(Label(value='0.001 MB of 0.050 MB uploaded\r'), FloatProgress(value=0.027982496545370796, max=1…

0,1
train_accuracy,▁▅▆▆▆▇▇▇██
train_loss,█▅▄▃▂▂▂▁▁▁
validation_accuracy,▁▄▆▆▇▇▇▇▇█
validation_loss,█▅▃▃▂▂▂▁▁▁

0,1
train_accuracy,76.34704
train_loss,0.76185
validation_accuracy,76.85
validation_loss,0.7981


In [11]:
# checking top2 accuracy which is a metrix used in the model(top-k in general)
def find_top2_accuracy(model, criterion, dataLoader, dataName):
    model.eval()
    val_loss = 0.0
    correct_top1 = 0
    correct_top2 = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in dataLoader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item()

            # Top-1 accuracy
            _, predicted_top1 = outputs.max(1)
            correct_top1 += predicted_top1.eq(labels).sum().item()

            # Top-2 accuracy
            _, predicted_top2 = outputs.topk(2, 1, True, True)
            correct_top2 += predicted_top2.eq(labels.view(-1, 1).expand_as(predicted_top2)).sum().item()

            total += labels.size(0)
    
    print(f'{dataName} Loss: {val_loss / len(dataLoader)}, '
          f'{dataName} Top-1 Accuracy: {100 * correct_top1 / total}%, '
          f'{dataName} Top-2 Accuracy: {100 * correct_top2 / total}%')


In [12]:
find_top2_accuracy(model, criterion, testDataLoader, "test ")

test  Loss: 0.7928383331745863, test  Top-1 Accuracy: 75.15%, test  Top-2 Accuracy: 87.5%


In [None]:
# for generating the 30 images and their class labels with their predictions
import torch
import matplotlib.pyplot as plt
import numpy as np

# Identify the computing device used by the model
compute_device = next(model.parameters()).device

model.eval()  # Switch model to evaluation mode

# Prepare to collect a limited number of image samples for each category
samples_limit = 3
category_samples = {category: [] for category in range(10)}  # Assuming categories are labeled 0-9

# Ensure no gradient computations for efficiency
with torch.no_grad():
    for batch_images, batch_labels in testDataLoader:
        batch_images, batch_labels = batch_images.to(compute_device), batch_labels.to(compute_device)  # Match model's device
        # Check if sufficient samples have been collected
        if all(len(samples) >= samples_limit for samples in category_samples.values()):
            break
        for image, label in zip(batch_images, batch_labels):
            current_label = label.item()
            if len(category_samples[current_label]) < samples_limit:
                # Predict the label for each image
                prediction = model(image.unsqueeze(0)).argmax(1).item()
                # Store the CPU-based image and its predicted label
                category_samples[current_label].append((image.cpu(), prediction))

# Setting up the visualization
figure, axes = plt.subplots(10, 3, figsize=(10, 33))  # Allocate a grid for the sample images

for category_id, images in category_samples.items():
    for index, (image, predicted) in enumerate(images):
        plot_axis = axes[category_id, index]
        # Reformat image for plotting
        image_to_plot = image.numpy().transpose((1, 2, 0))
        normalize_mean = np.array([0.485, 0.456, 0.406])
        normalize_std = np.array([0.229, 0.224, 0.225])
        image_to_plot = normalize_std * image_to_plot + normalize_mean
        image_to_plot = np.clip(image_to_plot, 0, 1)
        plot_axis.imshow(image_to_plot)
        plot_axis.set_title(f'Real: {classesList[category_id]}, Guess: {classesList[predicted]}')
        plot_axis.axis('off')

plt.tight_layout()

# Save and display the image grid
plt.savefig('/kaggle/working/predictions_overview.png', dpi=300)
plt.show()


In [None]:
# for generating the confusion matrix
import torch
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for data, labels in testDataLoader:
        data = data.to(device)
        outputs = model(data)
        
        _, predicted = torch.max(outputs, 1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Compute the confusion matrix
cm = confusion_matrix(all_labels, all_preds)

# Plotting the confusion matrix
fig, ax = plt.subplots(figsize=(10,10))
sns.heatmap(cm, annot=True, fmt='d', ax=ax, cmap='Blues', xticklabels=classesList, yticklabels=classesList)
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
plt.show()