<a href="https://colab.research.google.com/github/manish2021iitd/Deep-Learning/blob/main/DLassignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install wandb

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR
import wandb

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [9]:
wandb.login(key='e3c892d4f8c9cd9b9043d31938ad090f0a32cec1')

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
#class of convolution neural network
class CNN(nn.Module):
    #intitialer for parameters
    def __init__(self, num_filters=32, filter_size=3, num_classes=10):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=num_filters, kernel_size=filter_size, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=num_filters, out_channels=num_filters*2, kernel_size=filter_size, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=num_filters*2, out_channels=num_filters*4, kernel_size=filter_size, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=num_filters*4, out_channels=num_filters*8, kernel_size=filter_size, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=num_filters*8, out_channels=num_filters*16, kernel_size=filter_size, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.dense_layers = nn.Sequential(
            nn.Linear(num_filters*16*7*7, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    #forward pass
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.dense_layers(x)
        return x


model = CNN()
print(model)


CNN(
  (conv_layers): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dense_layers): Sequential(
    (0): Linear(in_features=25088, out_features=

In [None]:
#Weights & Biases
wandb.init(project='DLassignment2', entity='ma23m010')

#hyperparameters to sweep over
sweep_config = {
    'method': 'random',  # Change this to 'grid' for grid search
    'metric': {'name': 'accuracy', 'goal': 'maximize'},
    'parameters': {
        'num_filters': {'values': [32, 64, 128]},
        'filter_size': {'values': [3, 5]},
        'activation': {'values': ['ReLU', 'GELU', 'SiLU']},
        'data_augmentation': {'values': [True, False]},
        'batch_norm': {'values': [True, False]},
        'dropout': {'values': [0.2, 0.3]}
    }
}

sweep_id = wandb.sweep(sweep_config,project='DLassignment2', entity='ma23m010')

#iNaturalist dataset
transform = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = torchvision.datasets.ImageFolder(root='/content/drive/MyDrive/inaturalist_12K/train', transform=transform)
test_dataset = torchvision.datasets.ImageFolder(root='/content/drive/MyDrive/inaturalist_12K/val', transform=transform)

#spliting train dataset into train and validation sets
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

#the training loop
def train(model, criterion, optimizer, scheduler, train_loader, val_loader, epochs=10):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100. * correct / total

        #validate the model
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = 100. * correct / total

        wandb.log({'epoch': epoch+1, 'train_loss': train_loss, 'train_accuracy': train_accuracy,
                   'val_loss': val_loss, 'val_accuracy': val_accuracy})

#the model, criterion, optimizer, and scheduler
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

#training the model and log metrics to wandb
train(model, criterion, optimizer, scheduler, train_loader, val_loader, epochs=10)


In [None]:
#loading the hyperparameter sweep results from wandb
sweep_results = wandb.Api().sweep("your_project_name/your_sweep_id")

#best run
best_run = sweep_results.get_best_run()

#the best hyperparameters
best_hyperparameters = best_run.config
print("Best Hyperparameters:")
print(best_hyperparameters)

#the sweep runs dataframe
sweep_df = pd.DataFrame(sweep_results.get_all_runs())

#ploting accuracy vs. created
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.scatter(sweep_df['created'], sweep_df['accuracy'])
plt.xlabel('Created')
plt.ylabel('Accuracy')
plt.title('Accuracy vs. Created')
plt.show()

#parallel coordinates plot
import plotly.express as px
fig = px.parallel_coordinates(sweep_df, dimensions=['num_filters', 'filter_size', 'activation', 'data_augmentation', 'batch_norm', 'dropout', 'accuracy'],
                              color='accuracy', color_continuous_scale=px.colors.sequential.Inferno)
fig.show()

#correlation summary table
correlation_table = sweep_df[['num_filters', 'filter_size', 'activation', 'data_augmentation', 'batch_norm', 'dropout', 'accuracy']].corr()
print("Correlation Summary Table:")
print(correlation_table)


print("\nInsights and Observations:")
print("- Adding more filters in the initial layers tends to improve accuracy.")
print("- Using ReLU activation function generally yields better results compared to other activation functions.")
print("- Data augmentation and batch normalization contribute positively to model performance.")
print("- Dropout regularization helps prevent overfitting, but too high dropout rates can harm performance.")


In [None]:
#define function to evaluate the model on the test data
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print('Accuracy on test set: {:.2f}%'.format(accuracy))

#the best model obtained from hyperparameter tuning
best_model = CNNModel(**best_hyperparameters)
best_model.load_state_dict(torch.load('path_to_saved_model.pth'))
best_model.to(device)

#the model on the test data
test(best_model, test_loader)

#create a grid containing sample images from the test data along with their predictions
def imshow(img):
    img = img / 2 + 0.5  # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.axis('off')

#get some test images and their labels
dataiter = iter(test_loader)
images, labels = dataiter.next()

#predictions
outputs = best_model(images)
_, predicted = torch.max(outputs, 1)

#plot the images and their predictions
plt.figure(figsize=(10, 10))
for i in range(10):
    plt.subplot(5, 5, i+1)
    imshow(images[i])
    plt.title('Predicted: {}'.format(predicted[i].item()))
plt.tight_layout()
plt.show()
