**Task 3:** *Implement and train a CNN 100K model with convolutional, pooling, and FC layers with up to 100K
weights. Perform hyperparameter tuning.*

In [40]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import FashionMNIST
from sklearn.model_selection import train_test_split

In [41]:
train_dataset = torchvision.datasets.FashionMNIST('C:\\Users\\Sarayu G\\582\\', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

test_dataset = torchvision.datasets.FashionMNIST('C:\\Users\\Sarayu G\\582\\', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))


# Use the following code to create a validation set of 10%
train_indices, val_indices, _, _ = train_test_split(
    range(len(train_dataset)),
    train_dataset.targets,
    stratify=train_dataset.targets,
    test_size=0.1,
)

# Generate training and validation subsets based on indices
train_split = Subset(train_dataset, train_indices)
val_split = Subset(train_dataset, val_indices)


# set batches sizes
train_batch_size = 900 #Define train batch size
test_batch_size  = 1000 #Define test batch size (can be larger than train batch size)


# Define dataloader objects that help to iterate over batches and samples for
# training, validation and testing
train_batches = DataLoader(train_split, batch_size=train_batch_size, shuffle=True)
val_batches = DataLoader(val_split, batch_size=train_batch_size, shuffle=True)
test_batches = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True)
                                           
num_train_batches=len(train_batches)
num_val_batches=len(val_batches)
num_test_batches=len(test_batches)


#print(num_train_batches)
#print(num_val_batches)
#print(num_test_batches)

In [42]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, input_channels, output_dim):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(16 * 7 * 7, 120)  
        self.fc2 = nn.Linear(120, output_dim)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 7 * 7)  
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [44]:
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
import random
import time

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Initialize data dimensions and hyperparameters
input_channels = 1
output_dim = 10
#hidden_dims = [64, 64]  # hidden layer configuration
learning_rate = 0.003
epochs = 15
max_weights = 100000

# Load FashionMNIST data and define train_batches, val_batches

# Define the FCN model
#model = CNN(num_channels_conv1=32, num_channels_conv2=64, num_neurons_fc1=128, num_neurons_fc2=10)
model = CNN(input_channels, output_dim)


# Count total parameters in the model
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters in the model:", total_params)

# Define loss function and optimizer
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list = np.zeros((epochs,))
validation_loss_list = np.zeros((epochs,))
validation_accuracy_list = np.zeros((epochs,))
test_accuracy = 0

start_time = time.time()

# Iterate over epochs and train the FCN model
for epoch in tqdm(range(epochs)):
    model.train()
    epoch_loss = 0.0
    num_batches = 0
    for train_features, train_labels in train_batches:
        optimizer.zero_grad()
        train_features = train_features.reshape(-1, 1, 28, 28)
        outputs = model(train_features)
        loss = loss_func(outputs, train_labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        num_batches += 1
        
    average_epoch_loss = epoch_loss / num_batches
    train_loss_list[epoch] = average_epoch_loss

    # Evaluate validation accuracy
    val_loss = 0
    val_acc = 0
    total_samples = 0
    for val_features, val_labels in val_batches:
        with torch.no_grad():
            model.eval()
            val_features = val_features.reshape(-1, 1, 28, 28)
            val_outputs = model(val_features)
            loss = loss_func(val_outputs, val_labels)
            val_loss += loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            val_acc += (predicted == val_labels).sum().item()
            total_samples += val_labels.size(0)
    average_val_loss = val_loss / len(val_batches)
    average_val_acc = val_acc / total_samples * 100
    # Record average validation loss and accuracy for the epoch
    validation_loss_list[epoch] = average_val_loss
    validation_accuracy_list[epoch] = average_val_acc

    #print(f"Epoch {epoch + 1}/{epochs}, Validation Accuracy: {average_val_acc}%")

    # Check if the total parameters are within the budget
    if total_params > max_weights:
        break

end_time = time.time()
training_time = end_time - start_time

# Print total training time
#print("Total training time:", training_time, "seconds")

Total parameters in the model: 96658


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [05:35<00:00, 22.38s/it]


In [45]:
test_accuracy_list = []

total_correct = 0
total_samples = 0

# Telling PyTorch we aren't passing inputs to network for training purpose
with torch.no_grad():
    for test_features, test_labels in test_batches:
        model.eval()
        
        # Reshape test images into a vector
        test_features = test_features.reshape(-1, 1, 28, 28)
        
        # Compute test outputs (targets)
        test_outputs = model(test_features)
        
        # Compute predicted labels
        _, predicted = torch.max(test_outputs, 1)
        
        # Compute number of correct predictions in the batch
        total_correct += (predicted == test_labels).sum().item()
        
        # Count total number of samples in the batch
        total_samples += test_labels.size(0)
        # Compute total accuracy
        test_accuracy = total_correct / total_samples * 100
        #print("Test Accuracy:", test_accuracy, "%")
        test_accuracy_list.append(test_accuracy)

#test_accuracy_array = np.array(test_accuracy_list)
#test_accuracy_std = np.std(test_accuracy_array)

# Calculate upper and lower bounds of testing accuracy
#test_accuracy_upper_bound = np.max(test_accuracy_array)
#test_accuracy_lower_bound = np.min(test_accuracy_array)

#print("Standard Deviation of Testing Accuracy:", test_accuracy_std)
#print("Upper Bound of Testing Accuracy:", test_accuracy_upper_bound)
#print("Lower Bound of Testing Accuracy:", test_accuracy_lower_bound)
#print("Test Accuracy:", test_accuracy, "%")

In [46]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set Seaborn style and font scale
sns.set_theme(style='whitegrid', font_scale=1.5)

# Create subplots
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

# Plot training loss
#axes[0].plot(train_loss_list, linewidth=3)
#axes[0].set_ylabel("Training Loss")
#axes[0].set_xlabel("Epochs")

# Plot validation accuracy
#axes[1].plot(validation_accuracy_list, linewidth=3, color='gold')
#axes[1].set_ylabel("Validation Accuracy")
#axes[1].set_xlabel("Epochs")

# Remove the top and right spines from the plots
#sns.despine()

# Display the plots
#plt.show()

In [48]:
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

#axes[0].plot(train_loss_list, label='Training loss', linewidth=3)
#axes[0].plot(validation_loss_list, label='Validation loss', linewidth=3)
#axes[0].set_title('Training Loss vs Validation loss')
#axes[0].set_xlabel('Epochs')
#axes[0].set_ylabel('loss')
#axes[0].legend()

#axes[1].plot(test_accuracy_list, linewidth=3, color='gold')
#axes[1].set_title('Testing Loss')
#axes[1].set_ylabel("Loss")
#axes[1].set_xlabel("Epochs")

# Remove the top and right spines from the plots
#sns.despine()

# Display the plots
#plt.show()

**Task 4:** *Reduce the number of weights in the CNN 100K model to create CNN 50K, CNN 20K, and CNN 10K.
Train these models similarly to CNN 100K.*

For CNN 50K:

In [55]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, input_channels, output_dim):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(16 * 7 * 7, 60)  
        self.fc2 = nn.Linear(60, output_dim)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 7 * 7)  
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [56]:
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
import random
import time

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Initialize data dimensions and hyperparameters
input_channels = 1
output_dim = 10
#hidden_dims = [64, 64]  # hidden layer configuration
learning_rate = 0.003
epochs = 15
max_weights = 100000

# Load FashionMNIST data and define train_batches, val_batches

# Define the FCN model
#model = CNN(num_channels_conv1=32, num_channels_conv2=64, num_neurons_fc1=128, num_neurons_fc2=10)
model = CNN(input_channels, output_dim)


# Count total parameters in the model
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters in the model:", total_params)

# Define loss function and optimizer
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list = np.zeros((epochs,))
validation_loss_list = np.zeros((epochs,))
validation_accuracy_list = np.zeros((epochs,))
test_accuracy = 0

start_time = time.time()

# Iterate over epochs and train the FCN model
for epoch in tqdm(range(epochs)):
    model.train()
    epoch_loss = 0.0
    num_batches = 0
    for train_features, train_labels in train_batches:
        optimizer.zero_grad()
        train_features = train_features.reshape(-1, 1, 28, 28)
        outputs = model(train_features)
        loss = loss_func(outputs, train_labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        num_batches += 1
        
    average_epoch_loss = epoch_loss / num_batches
    train_loss_list[epoch] = average_epoch_loss

    # Evaluate validation accuracy
    val_loss = 0
    val_acc = 0
    total_samples = 0
    for val_features, val_labels in val_batches:
        with torch.no_grad():
            model.eval()
            val_features = val_features.reshape(-1, 1, 28, 28)
            val_outputs = model(val_features)
            loss = loss_func(val_outputs, val_labels)
            val_loss += loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            val_acc += (predicted == val_labels).sum().item()
            total_samples += val_labels.size(0)
    average_val_loss = val_loss / len(val_batches)
    average_val_acc = val_acc / total_samples * 100
    # Record average validation loss and accuracy for the epoch
    validation_loss_list[epoch] = average_val_loss
    validation_accuracy_list[epoch] = average_val_acc

    #print(f"Epoch {epoch + 1}/{epochs}, Validation Accuracy: {average_val_acc}%")

    # Check if the total parameters are within the budget
    if total_params > max_weights:
        break
        
end_time = time.time()
training_time = end_time - start_time

efficiency = total_params / training_time
#print("Weights per second:", efficiency)

# Print total training time
#print("Total training time:", training_time, "seconds")

Total parameters in the model: 48958


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [05:27<00:00, 21.83s/it]


In [51]:
test_accuracy_list = []

total_correct = 0
total_samples = 0

# Telling PyTorch we aren't passing inputs to network for training purpose
with torch.no_grad():
    for test_features, test_labels in test_batches:
        model.eval()
        
        # Reshape test images into a vector
        test_features = test_features.reshape(-1, 1, 28, 28)
        
        # Compute test outputs (targets)
        test_outputs = model(test_features)
        
        # Compute predicted labels
        _, predicted = torch.max(test_outputs, 1)
        
        # Compute number of correct predictions in the batch
        total_correct += (predicted == test_labels).sum().item()
        
        # Count total number of samples in the batch
        total_samples += test_labels.size(0)
        # Compute total accuracy
        test_accuracy = total_correct / total_samples * 100
        #print("Test Accuracy:", test_accuracy, "%")
        test_accuracy_list.append(test_accuracy)

#print("Test Accuracy:", test_accuracy, "%")

In [52]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set Seaborn style and font scale
sns.set_theme(style='whitegrid', font_scale=1.5)

# Create subplots
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

# Plot training loss
#axes[0].plot(train_loss_list, linewidth=3)
#axes[0].set_ylabel("Training Loss")
#axes[0].set_xlabel("Epochs")

# Plot validation accuracy
#axes[1].plot(validation_accuracy_list, linewidth=3, color='gold')
#axes[1].set_ylabel("Validation Accuracy")
#axes[1].set_xlabel("Epochs")

# Remove the top and right spines from the plots
#sns.despine()

# Display the plots
#plt.show()

In [53]:
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

#axes[0].plot(train_loss_list, label='Training loss', linewidth=3)
#axes[0].plot(validation_loss_list, label='Validation loss', linewidth=3)
#axes[0].set_title('Training Loss vs Validation loss')
#axes[0].set_xlabel('Epochs')
#axes[0].set_ylabel('loss')
#axes[0].legend()

**Task 4:**

For CNN 20K:

In [57]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, input_channels, output_dim):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 4, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(4, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 7 * 7, 45)  
        self.fc2 = nn.Linear(45, output_dim)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 8 * 7 * 7)  
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [58]:
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
import random
import time

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Initialize data dimensions and hyperparameters
input_channels = 1
output_dim = 10
#hidden_dims = [64, 64]  # hidden layer configuration
learning_rate = 0.003
epochs = 15
max_weights = 100000

# Load FashionMNIST data and define train_batches, val_batches

# Define the FCN model
#model = CNN(num_channels_conv1=32, num_channels_conv2=64, num_neurons_fc1=128, num_neurons_fc2=10)
model = CNN(input_channels, output_dim)


# Count total parameters in the model
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters in the model:", total_params)

# Define loss function and optimizer
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list = np.zeros((epochs,))
validation_loss_list = np.zeros((epochs,))
validation_accuracy_list = np.zeros((epochs,))
test_accuracy = 0

start_time = time.time()

# Iterate over epochs and train the FCN model
for epoch in tqdm(range(epochs)):
    model.train()
    epoch_loss = 0.0
    num_batches = 0
    for train_features, train_labels in train_batches:
        optimizer.zero_grad()
        train_features = train_features.reshape(-1, 1, 28, 28)
        outputs = model(train_features)
        loss = loss_func(outputs, train_labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        num_batches += 1
        
    average_epoch_loss = epoch_loss / num_batches
    train_loss_list[epoch] = average_epoch_loss

    # Evaluate validation accuracy
    val_loss = 0
    val_acc = 0
    total_samples = 0
    for val_features, val_labels in val_batches:
        with torch.no_grad():
            model.eval()
            val_features = val_features.reshape(-1, 1, 28, 28)
            val_outputs = model(val_features)
            loss = loss_func(val_outputs, val_labels)
            val_loss += loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            val_acc += (predicted == val_labels).sum().item()
            total_samples += val_labels.size(0)
    average_val_loss = val_loss / len(val_batches)
    average_val_acc = val_acc / total_samples * 100
    # Record average validation loss and accuracy for the epoch
    validation_loss_list[epoch] = average_val_loss
    validation_accuracy_list[epoch] = average_val_acc

    #print(f"Epoch {epoch + 1}/{epochs}, Validation Accuracy: {average_val_acc}%")

    # Check if the total parameters are within the budget
    if total_params > max_weights:
        break
        
end_time = time.time()
training_time = end_time - start_time

efficiency = total_params / training_time
#print("Weights per second:", efficiency)

# Print total training time
#print("Total training time:", training_time, "seconds")

Total parameters in the model: 18481


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [04:33<00:00, 18.20s/it]


In [59]:
test_accuracy_list = []

total_correct = 0
total_samples = 0

# Telling PyTorch we aren't passing inputs to network for training purpose
with torch.no_grad():
    for test_features, test_labels in test_batches:
        model.eval()
        
        # Reshape test images into a vector
        test_features = test_features.reshape(-1, 1, 28, 28)
        
        # Compute test outputs (targets)
        test_outputs = model(test_features)
        
        # Compute predicted labels
        _, predicted = torch.max(test_outputs, 1)
        
        # Compute number of correct predictions in the batch
        total_correct += (predicted == test_labels).sum().item()
        
        # Count total number of samples in the batch
        total_samples += test_labels.size(0)
        # Compute total accuracy
        test_accuracy = total_correct / total_samples * 100
        #print("Test Accuracy:", test_accuracy, "%")
        test_accuracy_list.append(test_accuracy)

#print("Test Accuracy:", test_accuracy, "%")

In [60]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set Seaborn style and font scale
sns.set_theme(style='whitegrid', font_scale=1.5)

# Create subplots
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

# Plot training loss
#axes[0].plot(train_loss_list, linewidth=3)
#axes[0].set_ylabel("Training Loss")
#axes[0].set_xlabel("Epochs")

# Plot validation accuracy
#axes[1].plot(validation_accuracy_list, linewidth=3, color='gold')
#axes[1].set_ylabel("Validation Accuracy")
#axes[1].set_xlabel("Epochs")

# Remove the top and right spines from the plots
#sns.despine()

# Display the plots
#plt.show()

**Task 4:**

For CNN 10K:

In [61]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self, input_channels, output_dim):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 4, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(4, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(8 * 7 * 7, 22)  
        self.fc2 = nn.Linear(22, output_dim)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 8 * 7 * 7)  
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [62]:
import torch
import torch.nn as nn
import numpy as np
from tqdm import tqdm
import random
import time

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Initialize data dimensions and hyperparameters
input_channels = 1
output_dim = 10
#hidden_dims = [64, 64]  # hidden layer configuration
learning_rate = 0.003
epochs = 15
max_weights = 100000

# Load FashionMNIST data and define train_batches, val_batches

# Define the FCN model
model = CNN(input_channels, output_dim)


# Count total parameters in the model
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters in the model:", total_params)

# Define loss function and optimizer
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_loss_list = np.zeros((epochs,))
validation_loss_list = np.zeros((epochs,))
validation_accuracy_list = np.zeros((epochs,))
test_accuracy = 0

start_time = time.time()

# Iterate over epochs and train the FCN model
for epoch in tqdm(range(epochs)):
    model.train()
    epoch_loss = 0.0
    num_batches = 0
    for train_features, train_labels in train_batches:
        optimizer.zero_grad()
        train_features = train_features.reshape(-1, 1, 28, 28)
        outputs = model(train_features)
        loss = loss_func(outputs, train_labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        num_batches += 1
        
    average_epoch_loss = epoch_loss / num_batches
    train_loss_list[epoch] = average_epoch_loss

    # Evaluate validation accuracy
    val_loss = 0
    val_acc = 0
    total_samples = 0
    for val_features, val_labels in val_batches:
        with torch.no_grad():
            model.eval()
            val_features = val_features.reshape(-1, 1, 28, 28)
            val_outputs = model(val_features)
            loss = loss_func(val_outputs, val_labels)
            val_loss += loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            val_acc += (predicted == val_labels).sum().item()
            total_samples += val_labels.size(0)
    average_val_loss = val_loss / len(val_batches)
    average_val_acc = val_acc / total_samples * 100
    # Record average validation loss and accuracy for the epoch
    validation_loss_list[epoch] = average_val_loss
    validation_accuracy_list[epoch] = average_val_acc

    #print(f"Epoch {epoch + 1}/{epochs}, Validation Accuracy: {average_val_acc}%")

    # Check if the total parameters are within the budget
    if total_params > max_weights:
        break
        
end_time = time.time()
training_time = end_time - start_time

# Print total training time
#print("Total training time:", training_time, "seconds")

Total parameters in the model: 9212


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [04:39<00:00, 18.66s/it]


In [63]:
test_accuracy_list = []

total_correct = 0
total_samples = 0

# Telling PyTorch we aren't passing inputs to network for training purpose
with torch.no_grad():
    for test_features, test_labels in test_batches:
        model.eval()
        
        # Reshape test images into a vector
        test_features = test_features.reshape(-1, 1, 28, 28)
        
        # Compute test outputs (targets)
        test_outputs = model(test_features)
        
        # Compute predicted labels
        _, predicted = torch.max(test_outputs, 1)
        
        # Compute number of correct predictions in the batch
        total_correct += (predicted == test_labels).sum().item()
        
        # Count total number of samples in the batch
        total_samples += test_labels.size(0)
        # Compute total accuracy
        test_accuracy = total_correct / total_samples * 100
        #print("Test Accuracy:", test_accuracy, "%")
        test_accuracy_list.append(test_accuracy)

#print("Test Accuracy:", test_accuracy, "%")

In [64]:
import seaborn as sns
import matplotlib.pyplot as plt

#sns.set_theme(style='whitegrid', font_scale=1.5)

#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

# Plot training loss
#axes[0].plot(train_loss_list, linewidth=3)
#axes[0].set_ylabel("Training Loss")
#axes[0].set_xlabel("Epochs")

# Plot validation accuracy
#axes[1].plot(validation_accuracy_list, linewidth=3, color='gold')
#axes[1].set_ylabel("Validation Accuracy")
#axes[1].set_xlabel("Epochs")

#sns.despine()
#plt.show()

In [65]:
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

#axes[0].plot(train_loss_list, label='Training loss', linewidth=3)
#axes[0].plot(validation_loss_list, label='Validation loss', linewidth=3)
#axes[0].set_title('Training Loss vs Validation loss')
#axes[0].set_xlabel('Epochs')
#axes[0].set_ylabel('loss')
#axes[0].legend()

**Bonus:** *Pick a CNN variant from above and then pick several input samples from different
classes. Visualize some of the feature maps of the convolutional layers for these samples (e.g. display
the feature maps in a grid of nxn).*

In [54]:
import matplotlib.pyplot as plt

# Set model to evaluation mode
model.eval()

# Select input samples from different classes
sample_indices = [0, 100, 200, 300]  # Choose samples from different classes
num_samples = len(sample_indices)

# Forward pass and extract feature maps
with torch.no_grad():
    for idx in sample_indices:
        input_image = test_features[idx].unsqueeze(0)  # Select input image
        output = model(input_image)  # Forward pass
        feature_maps = model.conv1(input_image)  # Extract feature maps from the first convolutional layer

        # Visualization
        #num_feature_maps = feature_maps.size(1)
        #fig, axes = plt.subplots(1, num_feature_maps, figsize=(12, 2))

        #for i in range(num_feature_maps):
            #axes[i].imshow(feature_maps[0, i].detach().cpu(), cmap='gray')
            #axes[i].axis('off')
            #axes[i].set_title(f'Feature Map {i+1}')

        #plt.suptitle(f'Input Sample {idx} (Class: {test_labels[idx]})')
        #plt.show()