In [20]:
import numpy as np
import wandb
import copy
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, TensorDataset, ConcatDataset
import torchvision.transforms as transforms

In [3]:
def read_images(path):
    """
    Read images from a folder using PyTorch's ImageFolder and DataLoader,
    apply transformations, and return NumPy arrays for images and labels.

    Args:
    - path (str): Path to the folder containing images.

    Returns:
    - X (np.array): NumPy array of images with shape (num_images, channels, height, width).
    - y (np.array): NumPy array of labels with shape (num_images,).
    """
    data_transform = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor()])
    dataset = ImageFolder(path, transform=data_transform)
        
    data = DataLoader(dataset, batch_size=32) 
    
    X = [] 
    y = []
    
    for image, label in tqdm(data):
        X.append(image) 
        y.append(label) 
        
    # Concatenate the lists of arrays along the batch dimension (axis=0)
    X = np.concatenate(X, axis=0)
    y = np.concatenate(y, axis=0)
        
    return X, y

In [4]:
def shuffle_data(X, y):  
    """
    Shuffle data samples and their corresponding labels.

    Parameters:
    - X (numpy.ndarray): NumPy array containing data samples.
    - y (numpy.ndarray): NumPy array containing corresponding labels.

    Returns:
    - X_shuffled (numpy.ndarray): NumPy array containing shuffled data samples.
    - y_shuffled (numpy.ndarray): NumPy array containing corresponding shuffled labels.
    """
    
    # Combine X, y into a list of tuples
    data = list(zip(X, y))

    # Shuffle the combined data
    random.shuffle(data)

    # Unpack the shuffled data back into separate arrays
    X_shuffled, y_shuffled = zip(*data)

    # Convert the shuffled lists to NumPy arrays 
    X_shuffled = np.array(X_shuffled)
    y_shuffled = np.array(y_shuffled)
    
    return X_shuffled, y_shuffled

In [5]:
def create_dataloader(X, y, batch_size, shuffle=True):
    """
    Create a PyTorch DataLoader from input data and labels.

    Parameters:
    - X (numpy.ndarray): Input data array.
    - y (numpy.ndarray): Labels array.
    - batch_size (int, optional): Batch size for DataLoader (default=32).
    - shuffle (bool, optional): Whether to shuffle the data (default=True).

    Returns:
    - DataLoader: PyTorch DataLoader for the input data and labels.
    """
    # Convert NumPy arrays to PyTorch tensors
    X_tensor = torch.from_numpy(X)
    y_tensor = torch.from_numpy(y)

    # Create a TensorDataset from X_train_tensor and y_train_tensor
    dataset = TensorDataset(X_tensor, y_tensor)

    # Define batch size and create DataLoader
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    return loader

In [11]:
# setting the path to dataset
train_path = "/Users/pratikkadlak/Pratik/DeepLearning/DL_Assignment_2/inaturalist_12K/train"
test_path = "/Users/pratikkadlak/Pratik/DeepLearning/DL_Assignment_2/inaturalist_12K/val"

# reading the images 
X_train, y_train = read_images(train_path)
X_test, y_test = read_images(test_path)

# shuffling the data
X_train, y_train = shuffle_data(X_train, y_train)

# making data loaders
train_loader = create_dataloader(X_train, y_train, 32)
test_loader = create_dataloader(X_test, y_test, 32)

  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

# Question 2

## GoogleNet

In [None]:
"""
Fine-tune a pre-trained GoogLeNet model on a custom dataset.

This code block loads a pre-trained GoogLeNet model, modifies the final fully connected (FC) layer
to have 10 output classes, defines a loss function (CrossEntropyLoss) and optimizer (SGD), and trains
the model on a custom dataset using the specified number of epochs.

The training loop iterates through each epoch, performing forward and backward passes, and evaluates
the model's accuracy on a test dataset after each epoch. The trained model is saved to a file named
'googlenet_model.pth' after training.

"""

# Define GoogLeNet model
model = models.googlenet(pretrained=True)  # Load pre-trained weights
num_classes = 10
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Modify final FC layer

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Access GPU if available
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

# Train the model
# num_epochs = 10
# for epoch in range(num_epochs):
#     model.train()
#     for images, labels in train_loader:
#         images, labels = images.to(device), labels.to(device)
#         optimizer.zero_grad()
#         outputs = model(images)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()

#     # Evaluate the model
#     model.eval()
#     correct = 0
#     total = 0
#     with torch.no_grad():
#         for images, labels in test_loader:
#             images, labels = images.to(device), labels.to(device)
#             outputs = model(images)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()

#     accuracy = 100 * correct / total
#     print(f'Epoch {epoch+1}/{num_epochs}, Test Accuracy: {accuracy:.2f}%')


# Train the model
num_epochs = 10  # Number of training epochs
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    for images, labels in train_loader:  # Iterate over batches of training data
        images, labels = images.to(device), labels.to(device)  # Move data to device (CPU or GPU)
        optimizer.zero_grad()  # Clear previous gradients
        outputs = model(images)  # Forward pass: compute predicted outputs
        loss = criterion(outputs, labels)  # Calculate the loss
        loss.backward()  # Backward pass: compute gradients
        optimizer.step()  # Update model parameters based on gradients

    # Evaluate the model after each epoch
    model.eval()  # Set the model to evaluation mode (disables dropout and batch normalization)
    correct = 0  # Initialize number of correctly predicted samples
    total = 0  # Initialize total number of samples
    with torch.no_grad():  # Disable gradient tracking for evaluation
        for images, labels in test_loader:  # Iterate over batches of test data
            images, labels = images.to(device), labels.to(device)  # Move data to device
            outputs = model(images)  # Forward pass: compute predicted outputs
            _, predicted = torch.max(outputs.data, 1)  # Get predicted labels
            total += labels.size(0)  # Update total count of samples
            correct += (predicted == labels).sum().item()  # Count correct predictions

    accuracy = 100 * correct / total  # Calculate accuracy percentage
    print(f'Epoch {epoch+1}/{num_epochs}, Test Accuracy: {accuracy:.2f}%')  # Print test accuracy after each epoch



# Save the trained model
torch.save(model.state_dict(), 'googlenet_model.pth')

## 1. Freezing all layers except the last layer:
- Freeze all layers except the final classification layer.
- Fine-tune only the weights of the last layer during training.

In [None]:
"""
Train a model using a pre-trained GoogLeNet architecture for a specific classification task.

1. Load the pre-trained GoogLeNet model and modify the last layer for the desired number of output classes.
2. Freeze all layers except the last layer to only train the new classifier layer.
3. Define the optimizer (Adam) and loss function (CrossEntropyLoss).
4. Train the model using the specified data loader for a certain number of epochs.

Args:
- train_loader (torch.utils.data.DataLoader): DataLoader for training data.
- num_epochs (int): Number of training epochs (default is 10).
"""


# num_classes is the number of classes in your dataset
num_classes = 10

# Load pre-trained GoogLeNet
model = models.googlenet(pretrained=True)

# Freeze all layers except the last layer
for param in model.parameters():
    param.requires_grad = False

# Modify the last layer for your specific classification task
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes) 

# Access GPU if available
device = "mps" if torch.backends.mps.is_available() else "cpu"
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model
def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    """
    Train a neural network model using the specified criterion, optimizer, and data loader for a certain number of epochs.

    Args:
    - model (torch.nn.Module): The neural network model to train.
    - criterion (torch.nn.Module): The loss function used for optimization.
    - optimizer (torch.optim.Optimizer): The optimizer for updating model parameters.
    - train_loader (torch.utils.data.DataLoader): DataLoader for training data.
    - num_epochs (int): Number of training epochs (default is 10).

    Returns:
    - None
    """
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_predictions = 0
        for images, labels in tqdm(train_loader):
            images , labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_predictions += labels.size(0)
        
        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = correct_predictions / total_predictions
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

train_model(model, criterion, optimizer, train_loader)

In [16]:
def test_model(model, criterion, test_loader):
    """
    Test a neural network model using the specified criterion and data loader.

    Args:
    - model (torch.nn.Module): The trained neural network model to evaluate.
    - criterion (torch.nn.Module): The loss function used for evaluation.
    - test_loader (torch.utils.data.DataLoader): DataLoader for test data.

    Returns:
    - None
    """
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    with torch.no_grad():
        for images, labels in tqdm(test_loader):  # assuming you have a DataLoader for test data
            images , labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_predictions += labels.size(0)
        
    test_loss = running_loss / len(test_loader)
    test_accuracy = correct_predictions / total_predictions
    print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

test_model(model, criterion, test_loader)

  0%|          | 0/63 [00:00<?, ?it/s]

Test Loss: 0.9131, Test Accuracy: 0.6975


## 2. Fine-tuning up to a certain number of layers:

- Freeze the initial layers (e.g., convolutional layers) and fine-tune only the later layers (e.g., fully connected layers).
- Experiment with different values of 'k' to find the optimal number of layers to fine-tune.

In [None]:
# Load pre-trained GoogLeNet
model = models.googlenet(pretrained=True)

# Define the number of layers to fine-tune (k)
k = 5  # Example: Fine-tune the last 5 layers

# Freeze layers up to k
if k > 0:
    for i, child in enumerate(model.children()):
        if i < k:
            for param in child.parameters():
                param.requires_grad = False
        else:
            break

# Modify the classifier for your specific classification task
num_ftrs = model.fc.in_features
num_classes = 10  # Change this to your actual number of classes
model.fc = nn.Linear(num_ftrs, num_classes)

# Move the model to GPU if available
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = model.to(device)

# Define optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model
def train_model(model, criterion, optimizer, train_loader, num_epochs=10):
    """
    Load a pre-trained GoogLeNet model and fine-tune it for a specific classification task.

    Parameters:
    - model (nn.Module): Pre-trained GoogLeNet model.
    - criterion (torch.nn.modules.loss._Loss): Loss function.
    - optimizer (torch.optim.optimizer.Optimizer): Optimizer for training.
    - train_loader (torch.utils.data.DataLoader): DataLoader for training data.
    - num_epochs (int): Number of training epochs (default is 10).

    Returns:
    - None
    """
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_preds = 0
        total_preds = 0
        for images, labels in tqdm(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)
        
        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = correct_preds / total_preds
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')

train_model(model, criterion, optimizer, train_loader)

In [18]:
def test_model(model, criterion, test_loader):
    """
    Evaluate a trained neural network model using the specified criterion and data loader for test data.

    Args:
    - model (torch.nn.Module): The trained neural network model to evaluate.
    - criterion (torch.nn.Module): The loss function used for evaluation.
    - test_loader (torch.utils.data.DataLoader): DataLoader for test data.

    Returns:
    - None
    """
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    with torch.no_grad():
        for images, labels in tqdm(test_loader):  # assuming you have a DataLoader for test data
            images , labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_predictions += labels.size(0)
        
    test_loss = running_loss / len(test_loader)
    test_accuracy = correct_predictions / total_predictions
    print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')

test_model(model, criterion, test_loader)

  0%|          | 0/63 [00:00<?, ?it/s]

Test Loss: 1.4563, Test Accuracy: 0.5440


## 3. Feature extraction using pre-trained models:

- Use pre-trained models like GoogLeNet, InceptionV3, ResNet50, etc., as feature extractors.
- Remove the final classification layer and use the extracted features as inputs to a smaller model (e.g., a simple feedforward neural network).
- Train the smaller model on the extracted features to classify images.

In [19]:
# Set device (GPU if available, otherwise CPU)
device = "mps" if torch.backends.mps.is_available() else "cpu"

# Load pre-trained GoogLeNet without the final classification layer
googlenet = models.googlenet(pretrained=True).to(device)
googlenet = nn.Sequential(*list(googlenet.children())[:-1])  # Remove the final layer

# Define a smaller feedforward neural network for classification
class SimpleClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Extract features using GoogLeNet
features_list = []
labels_list = []
with torch.no_grad():
    for images, labels in tqdm(train_loader):
        images = images.to(device)
        features = googlenet(images).squeeze()  # Remove the batch dimension
        features_list.append(features)
        labels_list.append(labels)

# Concatenate features and labels
features = torch.cat(features_list, dim=0).to(device)
labels = torch.cat(labels_list, dim=0).to(device)

# Define the input size for the classifier based on the extracted features
input_size = features.size(1)

# Initialize the simple classifier and move it to the device
classifier = SimpleClassifier(input_size, hidden_size=128, num_classes=10).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)

  0%|          | 0/313 [00:00<?, ?it/s]

In [20]:
# Train the classifier on the extracted features
num_epochs = 10
for epoch in range(num_epochs):
    classifier.train()  # Set the model to training mode
    optimizer.zero_grad()
    outputs = classifier(features)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # Calculate accuracy
    _, predicted = torch.max(outputs, 1)
    correct = (predicted == labels).sum().item()
    accuracy = correct / labels.size(0) * 100

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}, Accuracy: {accuracy:.2f}%')

Epoch [1/10], Loss: 2.311309337615967, Accuracy: 10.91%
Epoch [2/10], Loss: 2.238849639892578, Accuracy: 26.92%
Epoch [3/10], Loss: 2.169893264770508, Accuracy: 37.78%
Epoch [4/10], Loss: 2.092411994934082, Accuracy: 46.00%
Epoch [5/10], Loss: 2.009640693664551, Accuracy: 52.67%
Epoch [6/10], Loss: 1.9256505966186523, Accuracy: 56.91%
Epoch [7/10], Loss: 1.8435523509979248, Accuracy: 59.24%
Epoch [8/10], Loss: 1.7641732692718506, Accuracy: 60.76%
Epoch [9/10], Loss: 1.6886913776397705, Accuracy: 61.57%
Epoch [10/10], Loss: 1.6167616844177246, Accuracy: 62.22%


In [21]:
# Initialize lists to store predicted labels and ground truth labels
predicted_labels = []
true_labels = []

# Switch the model to evaluation mode
classifier.eval()

# Iterate over the test_loader
with torch.no_grad():
    for images, labels in tqdm(test_loader):
        images = images.to(device)
        features = googlenet(images).squeeze()  # Extract features using GoogLeNet
        outputs = classifier(features)  # Get predictions from the classifier
        _, predicted = torch.max(outputs, 1)  # Get the predicted labels
        predicted_labels.extend(predicted.cpu().numpy())  # Append predicted labels to the list
        true_labels.extend(labels.cpu().numpy())  # Append true labels to the list

# Convert lists to NumPy arrays for easier analysis
predicted_labels = np.array(predicted_labels)
true_labels = np.array(true_labels)

# Calculate accuracy
accuracy = np.mean(predicted_labels == true_labels) * 100
print(f'Testing Accuracy: {accuracy:.2f}%')


  0%|          | 0/63 [00:00<?, ?it/s]

Testing Accuracy: 62.30%


## Question 3

- Fine Tuning the Feature Extracted Model

In [6]:
def read_images(path, batch_size):
    """
    Read images from a specified path using PyTorch's DataLoader and apply transformations.

    Args:
    - path (str): The path to the directory containing the images.
    - batch_size (int): The batch size for DataLoader.

    Returns:
    - X (numpy.ndarray): Array of images.
    - y (numpy.ndarray): Array of corresponding labels.
    """
    data_transform = transforms.Compose([transforms.Resize((299,299)), transforms.ToTensor()])
    dataset = ImageFolder(path, transform=data_transform)
        
    data = DataLoader(dataset, batch_size=batch_size) 
    
    X = [] 
    y = []
    
    for image, label in tqdm(data):
        X.append(image) 
        y.append(label) 
        
    # Concatenate the lists of arrays along the batch dimension (axis=0)
    X = np.concatenate(X, axis=0)
    y = np.concatenate(y, axis=0)
        
    return X, y

In [7]:
train_path = "/Users/pratikkadlak/Pratik/DeepLearning/DL_Assignment_2/inaturalist_12K/train"
test_path = "/Users/pratikkadlak/Pratik/DeepLearning/DL_Assignment_2/inaturalist_12K/val"

X_train, y_train = read_images(train_path, 32)
X_test, y_test = read_images(test_path, 32)

X_train, y_train = shuffle_data(X_train, y_train)

train_loader = create_dataloader(X_train, y_train, 32)
test_loader = create_dataloader(X_test, y_test, 32)

  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

In [8]:
def augment_data():
    """
    Augment data in a DataLoader using various transformations and return an augmented DataLoader.

    Args:
    - train_loader (DataLoader): DataLoader containing the original training data.

    Returns:
    - aug_loader (DataLoader): Augmented DataLoader with transformed data for training.
    """
    
    # Create a copy of the original train_loader
    train_loader_copy = copy.deepcopy(train_loader)
    
    # Define data augmentation transformations
    augmented_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
        transforms.RandomRotation(10),  # Randomly rotate the image by up to 10 degrees
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Randomly adjust brightness, contrast, saturation, and hue
        transforms.ToTensor(),  # Convert the image to a PyTorch tensor
    ])
    
    # Apply the transformations to the images in train_loader
    train_loader_copy.dataset.transform = augmented_transform

    augmented_dataset = ConcatDataset([train_loader.dataset, train_loader_copy.dataset])
    aug_loader = DataLoader(augmented_dataset, batch_size=train_loader.batch_size, shuffle=True)
    return aug_loader

In [13]:
# Define a smaller feedforward neural network for classification
class SimpleClassifier(nn.Module):
    """
    A simple feedforward neural network for classification tasks.

    Args:
    - input_size (int): The size of the input features.
    - activation_func (str): The activation function to use. Options: "ReLU", "SiLU", "GELU", "Mish".
    - apply_dropout (str): Whether to apply dropout. Options: "Yes", "No".
    - prob (float): Dropout probability.
    - hidden_size (int): The size of the hidden layer.
    - num_classes (int): The number of output classes.

    Returns:
    - None
    """
        
    def __init__(self, input_size, activation_func, apply_dropout, prob, hidden_size, num_classes):
        super(SimpleClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        
        # trying different activation func
        if activation_func == "ReLU": self.activation = nn.ReLU()
        elif activation_func == "SiLU": self.activation = nn.SiLU()
        elif activation_func == "GELU": self.activation = nn.GELU()
        elif activation_func == "Mish": self.activation = nn.Mish()
               
        self.apply_drop = apply_dropout
        # Adding Dropout
        self.dropout = nn.Dropout(p=prob)
        
        
        # Output Layer
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.activation(x)
        
        if self.apply_drop == "Yes":
            x = self.dropout(x)
            
        x = self.fc2(x)
        return x

In [14]:
# Set device (GPU if available, otherwise CPU)
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = "mps" if torch.backends.mps.is_available() else "cpu"

In [15]:
def extract_features():
    """
    Extract features using a pre-trained GoogLeNet model without the final classification layer.

    Returns:
    - googlenet (torch.nn.Module): Pre-trained GoogLeNet model without the final layer.
    - features (torch.Tensor): Extracted features from the images.
    - labels (torch.Tensor): Corresponding labels for the extracted features.
    """
    # Load pre-trained GoogLeNet without the final classification layer
    googlenet = models.googlenet(pretrained=True).to(device)
    googlenet = nn.Sequential(*list(googlenet.children())[:-1])  # Remove the final layer

    # Extract features using GoogLeNet
    features_list = []
    labels_list = []
    with torch.no_grad():
        for images, labels in tqdm(train_loader):
            images = images.to(device)
            features = googlenet(images).squeeze()  # Remove the batch dimension
            features_list.append(features)
            labels_list.append(labels)

    # Concatenate features and labels
    features = torch.cat(features_list, dim=0).to(device)
    labels = torch.cat(labels_list, dim=0).to(device)
    
    return googlenet, features, labels

In [16]:
def evaluate_model(googlenet, classifier, test_loader):
    """
    Evaluate a classifier model using features extracted by a pre-trained GoogLeNet model.

    Args:
    - googlenet (torch.nn.Module): Pre-trained GoogLeNet model without the final layer.
    - classifier (torch.nn.Module): Classifier model to evaluate.
    - test_loader (torch.utils.data.DataLoader): DataLoader for test data.

    Returns:
    - float: Accuracy of the classifier model on the test data.
    """
    # Initialize lists to store predicted labels and ground truth labels
    predicted_labels = []
    true_labels = []

    # Switch the model to evaluation mode
    classifier.eval()

    # Iterate over the test_loader
    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            images = images.to(device)
            features = googlenet(images).squeeze()  # Extract features using GoogLeNet
            outputs = classifier(features)  # Get predictions from the classifier
            _, predicted = torch.max(outputs, 1)  # Get the predicted labels
            predicted_labels.extend(predicted.cpu().numpy())  # Append predicted labels to the list
            true_labels.extend(labels.cpu().numpy())  # Append true labels to the list

    # Convert lists to NumPy arrays for easier analysis
    predicted_labels = np.array(predicted_labels)
    true_labels = np.array(true_labels)

    # Calculate accuracy
    accuracy = np.mean(predicted_labels == true_labels) * 100
    # print(f'Testing Accuracy: {accuracy:.2f}%')
    return accuracy

In [17]:
def train_model(config):
    """
    Train a simple classifier model using extracted features from a pre-trained GoogLeNet model.

    Args:
    - config: A configuration object containing hyperparameters and settings for training.

    Returns:
    - None
    """
    if config.data_augment == "Yes":
        data_loader = augment_data()
        train_loader = data_loader
    
    googlenet, features, labels = extract_features()

    # Define the input size for the classifier based on the extracted features
    input_size = features.size(1)
    
    features, labels = features.to(device), labels.to(device)

    # Initialize the simple classifier and move it to the device
    classifier = SimpleClassifier(input_size, config.activation_func, config.dropout, config.prob, config.hidden_units, num_classes=10).to(device)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()

    # Trying Different Optimizers 
    if config.optimizer == "SGD": optimizer = torch.optim.SGD(classifier.parameters(), lr=0.001) 
    elif config.optimizer == "Adam": optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001) 
    elif config.optimizer == "NAdam": optimizer = torch.optim.NAdam(classifier.parameters(), lr=0.001) 
    elif config.optimizer == "RMSprop": optimizer = torch.optim.RMSprop(classifier.parameters(), lr=0.001) 
        
    # optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
    # optimizer = torch.optim.NAdam(classifier.parameters(), lr=0.001)
    # optimizer = torch.optim.SGD(classifier.parameters(), lr=0.001, momentum=0.9)
    # Best Optimizer working is Adam for this problem so trying to change parameters values
    # optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001, weight_decay=0.0005)
    
    run_name = f"epoch_{config.epoch}_opt_{config.optimizer}_act_{config.activation_func}_augment_{config.data_augment}_dropout_{config.dropout}_prob_{config.prob}_hu_{config.hidden_units}"


    # Train the classifier on the extracted features
    num_epochs = config.epoch # for 100 epoch this gives accuracy trian_accuracy of 89.52 %
    for epoch in range(num_epochs):
        classifier.train()  # Set the model to training mode
        optimizer.zero_grad()
        outputs = classifier(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Calculate accuracy
        _, predicted = torch.max(outputs, 1)
        correct = (predicted == labels).sum().item()
        train_accuracy = correct / labels.size(0) * 100
        test_accuracy = evaluate_model(googlenet, classifier, test_loader)

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}, Accuracy: {train_accuracy:.2f}%, Test Accuracy: {test_accuracy}')
        wandb.log({"train_accuracy":train_accuracy, 'train_loss':loss.item(), 'test_accuracy':test_accuracy})
        
    wandb.run.name = run_name
    wandb.run.save()
    wandb.run.finish()

In [18]:
sweep_config = {
"name": "PartB_FineTuning",
"metric": {
    "name":"test_accuracy",
    "goal": "maximize"
},
"method": "bayes",
"parameters": {
        "epoch": {
            "values": [10, 20, 30]
        },
        "activation_func": {
            "values": ["ReLU", "GELU", "SiLU", "Mish"]
        },
        "data_augment": {
            "values": ["Yes", "No"]
        },
        "dropout": {
            "values": ["Yes", "No"]
        },
        "prob": {
            "values": [0.2, 0.3]
        },
        "hidden_units": {
            "values": [256, 512, 1024]
        },
        "optimizer": {
            "values": ["SGD", "Adam", "NAdam", "RMSprop"]
        }
    
    }
}

In [21]:
def train():
    """
    Initialize a Weights & Biases run and train a CNN model using the configured hyperparameters.

    Uses the `wandb.sweep` function to create a sweep with the specified configuration,
    then runs the training process using the `wandb.agent` function.
    """
    with wandb.init(project="DL_Assignment_2") as run:
        config = wandb.config
        train_model(config)

sweep_id = wandb.sweep(sweep_config, project = "DL_Assignment_2")
wandb.agent(sweep_id, train, count = 1)
wandb.finish()

Create sweep with ID: 5rf38dbi
Sweep URL: https://wandb.ai/space_monkeys/DL_Assignment_2/sweeps/5rf38dbi


[34m[1mwandb[0m: Agent Starting Run: 3usaecgk with config:
[34m[1mwandb[0m: 	activation_func: GELU
[34m[1mwandb[0m: 	data_augment: No
[34m[1mwandb[0m: 	dropout: Yes
[34m[1mwandb[0m: 	epoch: 20
[34m[1mwandb[0m: 	hidden_units: 256
[34m[1mwandb[0m: 	optimizer: RMSprop
[34m[1mwandb[0m: 	prob: 0.3
[34m[1mwandb[0m: Currently logged in as: [33mkadlakpratik[0m ([33mspace_monkeys[0m). Use [1m`wandb login --relogin`[0m to force relogin




  0%|          | 0/313 [00:00<?, ?it/s]

  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [1/20], Loss: 2.315261125564575, Accuracy: 8.96%, Test Accuracy: 23.3


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [2/20], Loss: 2.509401798248291, Accuracy: 22.09%, Test Accuracy: 31.0


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [3/20], Loss: 2.740562677383423, Accuracy: 24.01%, Test Accuracy: 10.0


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [4/20], Loss: 6.202073097229004, Accuracy: 11.86%, Test Accuracy: 20.849999999999998


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [5/20], Loss: 5.133575916290283, Accuracy: 17.11%, Test Accuracy: 19.8


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [6/20], Loss: 3.2188847064971924, Accuracy: 18.85%, Test Accuracy: 28.15


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [7/20], Loss: 2.477202892303467, Accuracy: 24.66%, Test Accuracy: 30.5


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [8/20], Loss: 2.066556930541992, Accuracy: 28.31%, Test Accuracy: 48.5


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [9/20], Loss: 1.8021153211593628, Accuracy: 40.21%, Test Accuracy: 59.45


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [10/20], Loss: 1.610175609588623, Accuracy: 49.86%, Test Accuracy: 60.050000000000004


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [11/20], Loss: 1.5725457668304443, Accuracy: 50.87%, Test Accuracy: 55.85


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [12/20], Loss: 1.5699949264526367, Accuracy: 51.04%, Test Accuracy: 50.8


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [13/20], Loss: 1.6169414520263672, Accuracy: 46.55%, Test Accuracy: 51.0


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [14/20], Loss: 1.615235686302185, Accuracy: 47.62%, Test Accuracy: 53.400000000000006


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [15/20], Loss: 1.5598338842391968, Accuracy: 50.05%, Test Accuracy: 61.550000000000004


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [16/20], Loss: 1.4394842386245728, Accuracy: 56.96%, Test Accuracy: 61.75000000000001


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [17/20], Loss: 1.4018408060073853, Accuracy: 57.88%, Test Accuracy: 63.65


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [18/20], Loss: 1.3793177604675293, Accuracy: 60.26%, Test Accuracy: 58.95


  0%|          | 0/63 [00:00<?, ?it/s]

Epoch [19/20], Loss: 1.3815555572509766, Accuracy: 56.46%, Test Accuracy: 61.150000000000006


  0%|          | 0/63 [00:00<?, ?it/s]



Epoch [20/20], Loss: 1.3974430561065674, Accuracy: 57.94%, Test Accuracy: 57.99999999999999


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
test_accuracy,▃▄▁▂▂▃▄▆▇█▇▆▆▇███▇█▇
train_accuracy,▁▃▃▁▂▂▃▄▅▇▇▇▆▆▇███▇█
train_loss,▂▃▃█▆▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
test_accuracy,58.0
train_accuracy,57.93579
train_loss,1.39744
