In [3]:
import time
import numpy as np
import torch
import torchvision
from torch.autograd import Variable
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from IPython.display import Image, display

In [5]:
display(Image(url='https://production-media.paperswithcode.com/datasets/4fdf2b82-2bc3-4f97-ba51-400322b228b1.png'))

In [7]:
# Constants
IMAGE_WIDTH = 32
IMAGE_HEIGHT = 32
COLOR_CHANNELS = 3
EPOCHS = 300
LEARNING_RATES = [.00001, 0.0001, 0.001, 0.01, 0.1]
KEEP_RATES = [.5, .65, .8]
WEIGHT_DECAY_RATES = [.0005, .005, .05]
BATCH_SIZE = 32
BATCH_IMAGE_COUNT = 10000
CLASSES = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
N_CLASSES = len(CLASSES)

In [8]:
import torch

class Net(torch.nn.Module):
    def __init__(self, n_hidden_nodes, n_hidden_layers, activation, keep_rate=0):
        super(Net, self).__init__()
        self.n_hidden_nodes = n_hidden_nodes  # Number of hidden nodes in each layer
        self.n_hidden_layers = n_hidden_layers  # Number of hidden layers
        self.activation = activation  # Activation function (either 'sigmoid' or 'relu')
        
        # Set a default value for keep_rate if not provided
        if not keep_rate:
            keep_rate = 0.5
        self.keep_rate = keep_rate  # Dropout keep rate
        
        # Set up the first perceptron layer and add dropout
        self.fc1 = torch.nn.Linear(IMAGE_WIDTH * IMAGE_HEIGHT * COLOR_CHANNELS,
                                   n_hidden_nodes)  # Fully connected layer 1
        self.fc1_drop = torch.nn.Dropout(1 - keep_rate)  # Dropout layer for the first hidden layer
        
        # If there are 2 hidden layers, set up the second perceptron layer and dropout
        if n_hidden_layers == 2:
            self.fc2 = torch.nn.Linear(n_hidden_nodes,
                                       n_hidden_nodes)  # Fully connected layer 2
            self.fc2_drop = torch.nn.Dropout(1 - keep_rate)  # Dropout layer for the second hidden layer

        self.out = torch.nn.Linear(n_hidden_nodes, N_CLASSES)  # Output layer

    def forward(self, x):
        x = x.view(-1, IMAGE_WIDTH * IMAGE_WIDTH * COLOR_CHANNELS)  # Flatten the input
        if self.activation == "sigmoid":
            sigmoid = torch.nn.Sigmoid()
            x = sigmoid(self.fc1(x))  # Apply sigmoid activation to the first hidden layer
        elif self.activation == "relu":
            x = torch.nn.functional.relu(self.fc1(x))  # Apply ReLU activation to the first hidden layer
        x = self.fc1_drop(x)  # Apply dropout to the first hidden layer
        
        # If there are 2 hidden layers, process the second hidden layer in a similar manner
        if self.n_hidden_layers == 2:
            if self.activation == "sigmoid":
                x = sigmoid(self.fc2(x))  # Apply sigmoid activation to the second hidden layer
            elif self.activation == "relu":
                x = torch.nn.functional.relu(self.fc2(x))  # Apply ReLU activation to the second hidden layer
            x = self.fc2_drop(x)  # Apply dropout to the second hidden layer
        
        return torch.nn.functional.log_softmax(self.out(x))  # Apply log softmax to the output layer


### NLL Loss
- The negative log-likelihood loss, often abbreviated as NLL loss, is a commonly used loss function in classification problems, especially for problems where the target labels are discrete and represent class probabilities. It is also closely related to the cross-entropy loss.

- In a classification problem, you have a set of predicted probabilities for each class, typically produced by a softmax activation function in the output layer of a neural network. The NLL loss measures the dissimilarity between these predicted probabilities and the true target probabilities (one-hot encoded for the true class).

Mathematically, the NLL loss for a single data point is defined as:

###  $$NLL(\text{prediction}, \text{target}) = -\sum_{i} \text{target}[i] \cdot \log(\text{prediction}[i])$$


In [9]:
def train(epoch, model, train_loader, optimizer, log_interval=100, cuda=None):
    # Set the model in training mode
    model.train()
    
    correct = 0  # Initialize a variable to keep track of correct predictions
    
    # Iterate over batches in the training loader
    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda:
            data, target = data.cuda(), target.cuda()  # Move data and target tensors to GPU if available
        
        data, target = Variable(data), Variable(target)  # Wrap data and target in Variable objects (used in older PyTorch versions)

        optimizer.zero_grad()  # Clear the gradients from the previous iteration
        output = model(data)  # Forward pass: compute the model's predictions
        
        # Calculate the predicted class labels
        pred = output.data.max(1)[1]  # Get the index of the maximum log-probability
        
        # Update the count of correct predictions
        correct += pred.eq(target.data).cpu().sum()
        
        # Calculate the accuracy for this batch
        accuracy = 100. * correct / len(train_loader.dataset)
        
        # Compute the negative log-likelihood loss (used for classification)
        loss = torch.nn.functional.nll_loss(output, target)
        
        loss.backward()  # Backpropagate the gradients
        optimizer.step()  # Update the model's parameters using the computed gradients


In [10]:
def validate(loss_vector, accuracy_vector, model, validation_loader, cuda=None):
    # Set the model in evaluation mode
    model.eval()
    
    val_loss, correct = 0, 0  # Initialize variables for loss and correct predictions
    
    # Iterate over batches in the validation loader
    for data, target in validation_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()  # Move data and target tensors to GPU if available
        
        data, target = Variable(data, volatile=True), Variable(target)  # Wrap data and target in Variable objects (used in older PyTorch versions)

        output = model(data)  # Forward pass: compute the model's predictions
        val_loss += torch.nn.functional.nll_loss(output, target).data  # Compute the negative log-likelihood loss (used for classification)
        
        # Calculate the predicted class labels
        pred = output.data.max(1)[1]  # Get the index of the maximum log-probability
        
        # Update the count of correct predictions
        correct += pred.eq(target.data).cpu().sum()

    val_loss /= len(validation_loader)  # Calculate the average validation loss
    
    loss_vector.append(val_loss)  # Append the validation loss to a list
    
    accuracy = 100. * correct / len(validation_loader.dataset)  # Calculate the accuracy as a percentage
    accuracy_vector.append(accuracy)  # Append the accuracy to a list
    
    # Print the validation results
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        val_loss, correct, len(validation_loader.dataset), accuracy))


In [11]:
def main():
    # Check if CUDA (GPU) is available
    cuda = torch.cuda.is_available()
    print('Using PyTorch version:', torch.__version__, 'CUDA:', cuda)

    # Define a data transformation for preprocessing
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize image data
    ])

    # Load the training dataset (CIFAR-10)
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                              shuffle=True, num_workers=0, pin_memory=False)

    # Load the validation dataset (CIFAR-10)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                           download=True, transform=transform)
    validation_loader = torch.utils.data.DataLoader(testset, batch_size=4,
                                             shuffle=False, num_workers=0, pin_memory=False)

    # Define neural network architecture and configuration
    hidden_nodes = 100  # Number of hidden nodes in each layer
    layers = 2  # Number of hidden layers
    model = Net(hidden_nodes, layers, "sigmoid")  # Create a neural network model
    if cuda:
        model.cuda()  # Move the model to GPU if CUDA is available

    # Define the optimizer for training (Stochastic Gradient Descent)
    optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATES[1])  # Learning rate specified elsewhere

    loss_vector = []  # List to store training loss values
    acc_vector = []   # List to store training accuracy values
    
    EPOCHS = 25  # Number of training epochs

    # Training loop over multiple epochs
    for epoch in range(1, EPOCHS + 1):
        print('Epoch {}'.format(epoch))
        
        # Train the model on the training dataset
        train(epoch, model, train_loader, optimizer, cuda=cuda)
        
        # Validate the model on the validation dataset and collect loss and accuracy data
        validate(loss_vector, acc_vector, model, validation_loader, cuda=cuda)
        
        if epoch == 25:  # Stop training after a specific number of epochs (25 in this case)
            break


In [9]:
main()

Using PyTorch version: 1.12.1 CUDA: False
Files already downloaded and verified
Files already downloaded and verified
Epoch 1


  return torch.nn.functional.log_softmax(self.out(x))
  data, target = Variable(data, volatile=True), Variable(target)



Validation set: Average loss: 2.2055, Accuracy: 2685/10000 (27%)

Epoch 2

Validation set: Average loss: 2.1463, Accuracy: 2856/10000 (29%)

Epoch 3

Validation set: Average loss: 2.1028, Accuracy: 2976/10000 (30%)

Epoch 4

Validation set: Average loss: 2.0683, Accuracy: 3026/10000 (30%)

Epoch 5

Validation set: Average loss: 2.0404, Accuracy: 3043/10000 (30%)

Epoch 6

Validation set: Average loss: 2.0174, Accuracy: 3126/10000 (31%)

Epoch 7

Validation set: Average loss: 1.9980, Accuracy: 3120/10000 (31%)

Epoch 8

Validation set: Average loss: 1.9816, Accuracy: 3170/10000 (32%)

Epoch 9

Validation set: Average loss: 1.9675, Accuracy: 3201/10000 (32%)

Epoch 10

Validation set: Average loss: 1.9554, Accuracy: 3226/10000 (32%)

Epoch 11

Validation set: Average loss: 1.9446, Accuracy: 3248/10000 (32%)

Epoch 12

Validation set: Average loss: 1.9350, Accuracy: 3317/10000 (33%)

Epoch 13

Validation set: Average loss: 1.9265, Accuracy: 3318/10000 (33%)

Epoch 14

Validation set: Ave