**Task 3:** 
(a) Consider different optimizers including RMSProp, Adam and SGD with different learning rates.
You can find the corresponding functions in the torch.optim library. Log your training loss,
validation and test accuracy. Compare these optimizers, and observe which optimizer is most
suitable for your FCN

In [5]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import FashionMNIST
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [6]:
# Use the following code to load and normalize the dataset for training and testing
# It will downlad the dataset into data subfolder (change to your data folder name)
train_dataset = torchvision.datasets.FashionMNIST('C:\\Users\\Sarayu G\\582\\', train=True, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))

test_dataset = torchvision.datasets.FashionMNIST('C:\\Users\\Sarayu G\\582\\', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ]))


# Use the following code to create a validation set of 10%
train_indices, val_indices, _, _ = train_test_split(
    range(len(train_dataset)),
    train_dataset.targets,
    stratify=train_dataset.targets,
    test_size=0.1,
)

# Generate training and validation subsets based on indices
train_split = Subset(train_dataset, train_indices)
val_split = Subset(train_dataset, val_indices)


# set batches sizes
train_batch_size = 900 #Define train batch size
test_batch_size  = 1000 #Define test batch size (can be larger than train batch size)


# Define dataloader objects that help to iterate over batches and samples for
# training, validation and testing
train_batches = DataLoader(train_split, batch_size=train_batch_size, shuffle=True)
val_batches = DataLoader(val_split, batch_size=train_batch_size, shuffle=True)
test_batches = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True)
                                           
num_train_batches=len(train_batches)
num_val_batches=len(val_batches)
num_test_batches=len(test_batches)


#print(num_train_batches)
#print(num_val_batches)
#print(num_test_batches)

In [7]:
class FCN(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dims):
        super(FCN, self).__init__()
        self.hidden_layers = nn.ModuleList([nn.Linear(input_dim, hidden_dims[0])])
        for i in range(len(hidden_dims) - 1):
            self.hidden_layers.append(nn.Linear(hidden_dims[i], hidden_dims[i + 1]))
        self.output_layer = nn.Linear(hidden_dims[-1], output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        for layer in self.hidden_layers:
            x = self.relu(layer(x))
        x = self.output_layer(x)
        return x

In [8]:
import numpy as np
from tqdm import tqdm

optimizers = {
    'SGD': optim.SGD,
    'RMSprop': optim.RMSprop,
    'Adam': optim.Adam
}
input_dim = 784
output_dim = 10
learning_rates = [0.001, 0.01, 0.05, 0.1]
epochs = 15

# Initialize lists to store results
results = []

# Iterate over optimizer configurations
for optimizer_name, optimizer_class in optimizers.items():
    for lr in learning_rates:
        model = FCN(input_dim=784, output_dim=10, hidden_dims=[400, 400])

        # Define loss function and optimizer
        loss_func = nn.CrossEntropyLoss()
        optimizer = optimizer_class(model.parameters(), lr=lr)

        # Initialize lists to store metrics
        train_loss_list = np.zeros((epochs,))
        validation_loss_list = np.zeros((epochs,))
        validation_accuracy_list = np.zeros((epochs,))
        test_accuracy = 0

        # Train the model
        for epoch in tqdm(range(epochs)):
            model.train()
            epoch_loss = 0.0  # Initialize epoch loss
            num_batches = 0   # Initialize number of batches processed in this epoch
            for train_features, train_labels in train_batches:
                optimizer.zero_grad()
                train_features = train_features.reshape(-1, input_dim)
                outputs = model(train_features)
                loss = loss_func(outputs, train_labels)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()  # Accumulate loss for each batch
                num_batches += 1

            # Calculate average loss for the epoch
            average_epoch_loss = epoch_loss / num_batches
            train_loss_list[epoch] = average_epoch_loss 

            # Validate the model
            val_loss = 0.0
            val_acc = 0
            total_samples = 0
            for val_features, val_labels in val_batches:
                with torch.no_grad():
                    model.eval()
                    val_features = val_features.reshape(-1, 28*28)
                    val_outputs = model(val_features)
                    loss = loss_func(val_outputs, val_labels)
                    val_loss += loss.item()
                    _, predicted = torch.max(val_outputs.data, 1)
                    val_acc += (predicted == val_labels).sum().item()
                    total_samples += val_labels.size(0)
            average_val_loss = val_loss / len(val_batches)
            average_val_acc = val_acc / total_samples * 100
            # Record average validation loss and accuracy for the epoch
            validation_loss_list[epoch] = average_val_loss
            validation_accuracy_list[epoch] = average_val_acc

        # Test the model
        total_correct = 0
        total_samples = 0
        with torch.no_grad():
            for test_features, test_labels in test_batches:
                model.eval()
                test_features = test_features.reshape(-1, 28*28)
                test_outputs = model(test_features)
                _, predicted = torch.max(test_outputs, 1)
                total_correct += (predicted == test_labels).sum().item()
                total_samples += test_labels.size(0)
        test_accuracy = total_correct / total_samples * 100

        # Store results
        results.append({
            'Optimizer': optimizer_name,
            'Learning Rate': lr,
            'Train Loss': train_loss_list,
            'Validation Loss': validation_loss_list,
            'Validation Accuracy': validation_accuracy_list,
            'Test Accuracy': test_accuracy
        })


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [05:07<00:00, 20.47s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [06:34<00:00, 26.30s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [05:32<00:00, 22.15s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [04:03<00:00, 16.26s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [03:50<00:00, 15.35s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [04:16<00:00, 17.08s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [04:30<00:00, 18.01s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [04:31<00:00, 18.08s/it]
100%|███████████████████████████████████

In [90]:
# Print results
#for result in results:
    #print("Optimizer:", result['Optimizer'])
    #print("Learning Rate:", result['Learning Rate'])
    #print ("Validation Accuracy:", result['Validation Accuracy'])
    #print("Test Accuracy:", result['Test Accuracy'])
    #print("Training loss:", result['Train Loss'])
    #print()
    

In [88]:
import seaborn as sns
import matplotlib.pyplot as plt

#sns.set(style='whitegrid', font_scale=1.5)

# Filter results for Adam optimizer with learning rate 0.001
#adam_results = [result for result in results if result['Optimizer'] == 'Adam' and result['Learning Rate'] == 0.001]

# Extract data
#adam_train_loss = adam_results[0]['Train Loss']
#adam_validation_accuracy = adam_results[0]['Validation Accuracy']
#adam_validation_loss = adam_results[0]['Validation Loss']

# Create subplots with 1 row and 2 columns
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

# Plot training loss
#axes[0].plot(adam_train_loss, label='Training Loss', linewidth=3)
#axes[0].set_title('Training Loss for Adam (lr=0.001)')
#axes[0].set_xlabel('Epochs')
#axes[0].set_ylabel('Loss')

# Plot validation accuracy
#axes[1].plot(adam_validation_accuracy, label='Validation Accuracy', linewidth=3, color='gold')
#axes[1].set_title('Validation Accuracy for Adam (lr=0.001)')
#axes[1].set_ylabel('Validation Accuracy')

#plt.tight_layout()
#plt.show()

**Task 3:**
(b) Analyze the overfitting/underfitting situation of your model. Include Dropout regularization and
discuss whether this improves performance.

In [18]:
import torch.nn.functional as F

class FCNWithDropout(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dims, dropout_rate):
        super(FCNWithDropout, self).__init__()
        self.hidden_layers = nn.ModuleList([nn.Linear(input_dim, hidden_dims[0])])
        for i in range(len(hidden_dims) - 1):
            self.hidden_layers.append(nn.Linear(hidden_dims[i], hidden_dims[i + 1]))
        self.output_layer = nn.Linear(hidden_dims[-1], output_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        for layer in self.hidden_layers:
            x = self.relu(layer(x))
            x = self.dropout(x) 
        x = self.output_layer(x)
        return x

In [89]:
# Initialize the model with dropout
dropout_rate = 0.5
model_with_dropout = FCNWithDropout(input_dim=784, output_dim=10, hidden_dims=[400, 400], dropout_rate=dropout_rate)
optimizer = optim.Adam(model_with_dropout.parameters(), lr=0.001)

# Initialize lists to store metrics
train_loss_list_with_dropout = []
validation_accuracy_list_with_dropout = []
validation_loss_list_with_dropout = [0] * epochs
test_accuracy = 0

# Train the model with dropout
for epoch in tqdm(range(epochs)):
    model_with_dropout.train()
    epoch_loss = 0.0
    num_batches = 0
    for train_features, train_labels in train_batches:
        optimizer.zero_grad()
        train_features = train_features.reshape(-1, 784)
        outputs = model_with_dropout(train_features)
        loss = loss_func(outputs, train_labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()  # Accumulate loss for each batch
        num_batches += 1

    # Calculate average loss for the epoch
    average_epoch_loss = epoch_loss / num_batches
    train_loss_list_with_dropout.append(average_epoch_loss)
    #print(f'Epoch [{epoch+1}/{epochs}], Training Loss: {average_epoch_loss:.4f}')
    
    # Validate the model
    val_acc = 0
    val_loss = 0
    total_samples = 0
    for val_features, val_labels in val_batches:
        with torch.no_grad():
            model_with_dropout.eval()
            val_features = val_features.reshape(-1, 28*28)
            val_outputs = model_with_dropout(val_features)
            loss = loss_func(val_outputs, val_labels)
            val_loss += loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            val_acc += (predicted == val_labels).sum().item()
            total_samples += val_labels.size(0)
    average_val_loss = val_loss / len(val_batches)
    average_val_acc = val_acc / total_samples * 100
    # Record average validation loss and accuracy for the epoch
    validation_loss_list_with_dropout[epoch] = average_val_loss
    validation_accuracy_list_with_dropout.append(average_val_acc)
    #print("Epoch:", epoch, "; Validation Accuracy:", validation_accuracy_list_with_dropout[epoch], '%')

total_correct = 0
total_samples = 0
with torch.no_grad():
        for test_features, test_labels in test_batches:
            model_with_dropout.eval()
            test_features = test_features.reshape(-1, 28*28)
            test_outputs = model_with_dropout.eval()(test_features)
            _, predicted = torch.max(test_outputs, 1)
            total_correct += (predicted == test_labels).sum().item()
            total_samples += test_labels.size(0)
test_accuracy_with = total_correct / total_samples * 100
#print("Test Accuracy:", test_accuracy_with, "%")


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [04:07<00:00, 16.48s/it]


In [87]:
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

# Plot training loss
#axes[0].plot(adam_train_loss, label='Without Dropout', linewidth=3)
#axes[0].plot(train_loss_list_with_dropout, label='With Dropout', linewidth=3)
#axes[0].set_title('Training Loss for Adam (lr=0.001) with vs without dropout')
#axes[0].set_xlabel('Epochs')
#axes[0].set_ylabel('Training Loss')
#axes[0].legend()

# Plot validation accuracy
#axes[1].plot(adam_validation_accuracy, label='Without Dropout', linewidth=3, color='gold')
#axes[1].plot(validation_accuracy_list_with_dropout, label='With Dropout', linewidth=3)
#axes[1].set_title('Validation Accuracy for Adam (lr=0.001) with vs without dropout')
#axes[1].set_ylabel('Validation Accuracy')
#axes[1].legend()

#plt.tight_layout()
#plt.show()

In [86]:
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

#axes[0].plot(adam_train_loss, label='Training loss', linewidth=3)
#axes[0].plot(adam_validation_loss, label='Validation loss', linewidth=3)
#axes[0].set_title('Training Loss vs Validation loss without dropout')
#axes[0].set_xlabel('Epochs')
#axes[0].set_ylabel('loss')
#axes[0].legend()

#axes[1].plot(train_loss_list_with_dropout, label='Training loss with dropout', linewidth=3)
#axes[1].plot(validation_loss_list_with_dropout, label='Validation loss with dropout', linewidth=3)
#axes[1].set_title('Training Loss vs Validation Loss with dropout')
#axes[1].set_xlabel('Epochs')
#axes[1].set_ylabel('loss')
#axes[1].legend()

**Task 3:** (c) Consider different Initializations, such Random Normal, Xavier Normal, Kaiming (He) Uniform.

In [41]:
import torch.nn.init as init

class FCN(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dims, initialization='xavier_normal'):
        super(FCN, self).__init__()
        self.hidden_layers = nn.ModuleList([nn.Linear(input_dim, hidden_dims[0])])
        for i in range(len(hidden_dims) - 1):
            self.hidden_layers.append(nn.Linear(hidden_dims[i], hidden_dims[i + 1]))
        self.output_layer = nn.Linear(hidden_dims[-1], output_dim)
        self.relu = nn.ReLU()

        # Initialize weights based on the chosen initialization method
        if initialization == 'random_normal':
            self.initialize_weights_random_normal()
        elif initialization == 'xavier_normal':
            self.initialize_weights_xavier_normal()
        elif initialization == 'kaiming_uniform':
            self.initialize_weights_kaiming_uniform()

    def initialize_weights_random_normal(self):
        for layer in self.hidden_layers:
            init.normal_(layer.weight.data, mean=0.0, std=1.0)
            init.constant_(layer.bias.data, 0.0)
        init.normal_(self.output_layer.weight.data, mean=0.0, std=1.0)
        init.constant_(self.output_layer.bias.data, 0.0)

    def initialize_weights_xavier_normal(self):
        for layer in self.hidden_layers:
            init.xavier_normal_(layer.weight.data)
            init.constant_(layer.bias.data, 0.0)
        init.xavier_normal_(self.output_layer.weight.data)
        init.constant_(self.output_layer.bias.data, 0.0)

    def initialize_weights_kaiming_uniform(self):
        for layer in self.hidden_layers:
            init.kaiming_uniform_(layer.weight.data, mode='fan_in', nonlinearity='relu')
            init.constant_(layer.bias.data, 0.0)
        init.kaiming_uniform_(self.output_layer.weight.data, mode='fan_in', nonlinearity='relu')
        init.constant_(self.output_layer.bias.data, 0.0)

    def forward(self, x):
        for layer in self.hidden_layers:
            x = self.relu(layer(x))
        x = self.output_layer(x)
        return x


In [42]:
input_dim = 784
output_dim = 10
learning_rate = 0.001
epochs = 15
hidden_dims = [400, 400]

# Initialize lists to store results
results = []

# Iterate over optimizer configurations
initializations = ['random_normal', 'xavier_normal', 'kaiming_uniform']

for initialization in initializations:
    # Initialize model
    model = FCN(input_dim=input_dim, output_dim=output_dim, hidden_dims=hidden_dims, initialization=initialization)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    loss_func = nn.CrossEntropyLoss()
    
    train_loss_list = []
    validation_loss_list = []
    validation_accuracy_list = []

    # Train the model
    for epoch in tqdm(range(epochs), desc=f'Optimization: {initialization}'):
        model.train()
        epoch_loss = 0.0  # Initialize epoch loss
        num_batches = 0   # Initialize number of batches processed in this epoch
        for train_features, train_labels in train_batches:
            optimizer.zero_grad()
            train_features = train_features.reshape(-1, input_dim)
            outputs = model(train_features)
            loss = loss_func(outputs, train_labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()  # Accumulate loss for each batch
            num_batches += 1

        # Calculate average loss for the epoch
        average_epoch_loss = epoch_loss / num_batches
        train_loss_list.append(average_epoch_loss)

        # Validate the model
        val_loss = 0.0
        val_acc = 0
        total_samples = 0
        for val_features, val_labels in val_batches:
            with torch.no_grad():
                model.eval()
                val_features = val_features.reshape(-1, 28*28)
                val_outputs = model(val_features)
                loss = loss_func(val_outputs, val_labels)
                val_loss += loss.item()
                _, predicted = torch.max(val_outputs.data, 1)
                val_acc += (predicted == val_labels).sum().item()
                total_samples += val_labels.size(0)
        average_val_loss = val_loss / len(val_batches)
        average_val_acc = val_acc / total_samples * 100
        # Record average validation loss and accuracy for the epoch
        validation_loss_list.append(average_val_loss)
        validation_accuracy_list.append(average_val_acc)

    # Test the model
    total_correct = 0
    total_samples = 0
    with torch.no_grad():
        for test_features, test_labels in test_batches:
            model.eval()
            test_features = test_features.reshape(-1, 28*28)
            test_outputs = model(test_features)
            _, predicted = torch.max(test_outputs, 1)
            total_correct += (predicted == test_labels).sum().item()
            total_samples += test_labels.size(0)
    test_accuracy = total_correct / total_samples * 100

    # Store results
    results.append({
        'Initialization': initialization,
        'Train Loss': train_loss_list,
        'Validation Loss': validation_loss_list,
        'Validation Accuracy': validation_accuracy_list,
        'Test Accuracy': test_accuracy
    })


Optimization: random_normal: 100%|█████████████████████████████████████████████████████| 15/15 [07:58<00:00, 31.90s/it]
Optimization: xavier_normal: 100%|█████████████████████████████████████████████████████| 15/15 [08:00<00:00, 32.03s/it]
Optimization: kaiming_uniform: 100%|███████████████████████████████████████████████████| 15/15 [07:59<00:00, 31.97s/it]


In [85]:
# Print results
#for result in results:
    #print("Initialization:", result['Initialization'])
    #print("Validation Loss:", result['Validation Loss'])
    #print ("Validation Accuracy:", result['Validation Accuracy'])
    #print("Test Accuracy:", result['Test Accuracy'])
    #print("Training loss:", result['Train Loss'])
    #print()

In [84]:
#fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(16, 18))

#for i, result in enumerate(results):
    #initialization = result['Initialization']
    #axes[i, 0].plot(result['Train Loss'], label='Training loss', linewidth=3)
    #axes[i, 0].plot(result['Validation Loss'], label='Validation loss', linewidth=3)
    #axes[i, 0].set_title(f'Training Loss vs Validation Loss - {initialization}')
    #axes[i, 0].set_xlabel('Epochs')
    #axes[i, 0].set_ylabel('Loss')
    #axes[i, 0].legend()

    #axes[i, 1].plot(result['Validation Accuracy'], label='Validation Accuracy', linewidth=3)
    #axes[i, 1].set_title(f'Validation Accuracy - {initialization}')
    #axes[i, 1].set_xlabel('Epochs')
    #axes[i, 1].set_ylabel('Accuracy (%)')
    #axes[i, 1].legend()

#plt.tight_layout()
#plt.show()

**Task 3:** (d) Include normalization such as Batch Normalization.

In [73]:
import torch.nn.functional as F

class FCNWithBatchNorm(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dims):
        super(FCNWithBatchNorm, self).__init__()
        self.hidden_layers = nn.ModuleList([nn.Linear(input_dim, hidden_dims[0])])
        self.batch_norms = nn.ModuleList([nn.BatchNorm1d(hidden_dims[0])])  # BatchNorm layer for the first hidden layer
        for i in range(len(hidden_dims) - 1):
            self.hidden_layers.append(nn.Linear(hidden_dims[i], hidden_dims[i + 1]))
            self.batch_norms.append(nn.BatchNorm1d(hidden_dims[i + 1]))  # BatchNorm layer for subsequent hidden layers
        self.output_layer = nn.Linear(hidden_dims[-1], output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        for layer, batch_norm in zip(self.hidden_layers, self.batch_norms):
            x = layer(x)
            x = batch_norm(x)
            x = self.relu(x)
        x = self.output_layer(x)
        return x

In [74]:
input_dim = 784
output_dim = 10
learning_rate = 0.001
epochs = 15
hidden_dims = [400, 400]

model = FCNWithBatchNorm(input_dim=input_dim, output_dim=output_dim, hidden_dims=hidden_dims)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_func = nn.CrossEntropyLoss()

results = {}
    
train_loss_list = []
validation_loss_list = []
validation_accuracy_list = []

# Train the model
for epoch in tqdm(range(epochs)):
    model.train()
    epoch_loss = 0.0  # Initialize epoch loss
    num_batches = 0   # Initialize number of batches processed in this epoch
    for train_features, train_labels in train_batches:
        optimizer.zero_grad()
        train_features = train_features.reshape(-1, input_dim)
        outputs = model(train_features)
        loss = loss_func(outputs, train_labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()  # Accumulate loss for each batch
        num_batches += 1

    # Calculate average loss for the epoch
    average_epoch_loss = epoch_loss / num_batches
    train_loss_list.append(average_epoch_loss)

    # Validate the model
    val_loss = 0.0
    val_acc = 0
    total_samples = 0
    for val_features, val_labels in val_batches:
        with torch.no_grad():
            model.eval()
            val_features = val_features.reshape(-1, 28*28)
            val_outputs = model(val_features)
            loss = loss_func(val_outputs, val_labels)
            val_loss += loss.item()
            _, predicted = torch.max(val_outputs.data, 1)
            val_acc += (predicted == val_labels).sum().item()
            total_samples += val_labels.size(0)
    average_val_loss = val_loss / len(val_batches)
    average_val_acc = val_acc / total_samples * 100
    # Record average validation loss and accuracy for the epoch
    validation_loss_list.append(average_val_loss)
    validation_accuracy_list.append(average_val_acc)

# Test the model
total_correct = 0
total_samples = 0
with torch.no_grad():
    for test_features, test_labels in test_batches:
        model.eval()
        test_features = test_features.reshape(-1, 28*28)
        test_outputs = model(test_features)
        _, predicted = torch.max(test_outputs, 1)
        total_correct += (predicted == test_labels).sum().item()
        total_samples += test_labels.size(0)
test_accuracy = total_correct / total_samples * 100

# Store results
results['FCNWithBatchNorm'] = {
    'Train Loss': train_loss_list,
    'Validation Loss': validation_loss_list,
    'Validation Accuracy': validation_accuracy_list,
    'Test Accuracy': test_accuracy
}


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [03:41<00:00, 14.79s/it]


In [83]:
#for model_name, result in results.items():
    #print("Model:", model_name)
    #print("Validation Loss:", result['Validation Loss'])
    #print ("Validation Accuracy:", result['Validation Accuracy'])
    #print("Test Accuracy:", result['Test Accuracy'])
    #print("Training loss:", result['Train Loss'])
    #print()

In [82]:
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

# Plot training loss
#axes[0].plot(train_loss_list, label='Training Loss', linewidth=3)
#axes[0].set_title('Training Loss for Adam (lr=0.001) with BatchNorm')
#axes[0].set_xlabel('Epochs')
#axes[0].set_ylabel('Loss')

# Plot validation accuracy
#axes[1].plot(validation_accuracy_list, label='Validation Accuracy', linewidth=3, color='gold')
#axes[1].set_title('Validation Accuracy for Adam (lr=0.001) with BatchNorm')
#axes[1].set_ylabel('Validation Accuracy')

#plt.tight_layout()
#plt.show()

In [81]:
#fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 6))

#axes[0].plot(train_loss_list, label='Training loss', linewidth=3)
#axes[0].plot(validation_loss_list, label='Validation loss', linewidth=3)
#axes[0].set_title('Training Loss vs Validation Loss')
#axes[0].set_xlabel('Epochs')
#axes[0].set_ylabel('loss')
#axes[0].legend()