<a href="https://colab.research.google.com/github/mahfouz72/softmax-and-neural-network-mnist/blob/Neural-network/softmax_and_neural_network_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# @title Imports
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import torch
import torch.optim as optim
import matplotlib.pyplot as plt
import time

In [2]:
# @title Global vaiables
learning_rate = 0.01
epochs = 25
batch_size = 64
input_size = 28 * 28
number_of_classes = 10
l2_value = 0.000

In [4]:
# @title Data Preparation

transform = transforms.ToTensor()

train_dataset = datasets.MNIST(root='data/MNIST', train=True, transform=transform, download=True)
test_and_validation_dataset = datasets.MNIST(root='data/MNIST', train=False, transform=transform, download=True)

# Extract data and labels for train=False (Split into Validation and Test sets)
test_data = test_and_validation_dataset.data.numpy() / 255.0
test_labels = test_and_validation_dataset.targets.numpy()

val_data, test_data, val_labels, test_labels = train_test_split(
    test_data, test_labels, test_size=0.5, random_state=123
)

# Wrap NumPy arrays into TensorDataset
val_dataset = TensorDataset(torch.from_numpy(val_data).float(), torch.from_numpy(val_labels).long())
test_dataset = TensorDataset(torch.from_numpy(test_data).float(), torch.from_numpy(test_labels).long())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# @title Softmax Regression Model

class SoftmaxRegression(torch.nn.Module):
    def __init__(self, input_size, number_of_classes):
        super(SoftmaxRegression, self).__init__()
        self.linear = torch.nn.Linear(input_size, number_of_classes)

    def forward(self, x):
      return self.linear(x)

model = SoftmaxRegression(input_size, number_of_classes)
optimizer = optim.SGD(model.parameters(), lr = learning_rate)
loss_function = torch.nn.CrossEntropyLoss()

In [None]:
# @title Training

train_losses = []
val_losses = []
val_accuracies = []

for epoch in range(epochs):
    model.train()  # just setting to training mode
    train_loss = 0.0

    for images, labels in train_loader:
        images = images.view(-1, input_size)

        outputs = model(images)
        loss = loss_function(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_losses.append(train_loss / len(train_loader))


    model.eval()
    val_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
      for images, labels in val_loader:
        images = images.view(-1, input_size)

        outputs = model(images)
        loss = loss_function(outputs, labels)

        val_loss += loss.item()

        predicition = torch.max(outputs, 1)[1]
        correct_predictions += (predicition == labels).sum().item()
        total_predictions += labels.size(0)

    val_losses.append(val_loss / len(val_loader))
    val_accuracies.append(correct_predictions / total_predictions)


    print(f"Epoch {epoch+1}/{epochs}, "
          f"Train Loss: {train_losses[-1]:.4f}, "
          f"Val Loss: {val_losses[-1]:.4f}, "
          f"Val Accuracy: {val_accuracies[-1] * 100:.4f}%")

In [5]:
# @title Neural Network Model

class NeuralNetwork(torch.nn.Module):
    def __init__(self, input_size, number_of_classes):
        super(NeuralNetwork, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, 128)   #input: input_size, output: 128 neurons
        self.relu1 = torch.nn.ReLU()                  #Defines a ReLU (Rectified Linear Unit) activation function to introduce
                                                        #non-linearity after the first layer.
        self.fc2 = torch.nn.Linear(128, 64)
        self.relu2 = torch.nn.ReLU()
        self.fc3 = torch.nn.Linear(64, number_of_classes)

    def forward(self, x):
        x = x.view(-1, input_size)  # Flatten the input
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        return self.fc3(x)

# Initialize NN model, optimizer, and loss function
nn_model = NeuralNetwork(input_size, number_of_classes)      #Adam optimizer is used for training the model. It adjusts the weights of the network to minimize the loss.
nn_optimizer = optim.Adam(nn_model.parameters(), lr=learning_rate)
nn_loss_function = torch.nn.CrossEntropyLoss()
#torch.nn.CrossEntropyLoss(): Combines the softmax activation and negative log-likelihood loss. It's suitable for multi-class classification tasks like MNIST.


In [6]:
"""
model: The neural network to be trained.
loader: The data loader (e.g., train_loader) that provides batches of data.
optimizer: The optimizer (e.g., nn_optimizer) used to update the model's weights.
loss_function: The loss function (e.g., nn_loss_function) that measures the error.



"""
# @title NN Training Loop

def train_nn(model, loader, optimizer, loss_function):
    model.train()
    total_loss = 0
    for images, labels in loader:
        # Flatten the images
        images = images.view(-1, input_size)
        labels = labels

        # Forward pass
        outputs = model(images)
        loss = loss_function(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(loader)

In [7]:
"""
The evaluate_nn function evaluates the performance of a trained neural network on a given dataset (e.g., validation or test data). Here's a breakdown:
model: The trained neural network to be evaluated.
loader: The data loader (e.g., val_loader or test_loader) that provides batches of data for evaluation.
loss_function: The loss function (e.g., nn_loss_function) used to compute the loss.

"""
# @title NN Evaluation Loop

def evaluate_nn(model, loader, loss_function):
    model.eval()
    correct = 0
    total = 0
    total_loss = 0
    with torch.no_grad():
        for images, labels in loader:
            # Flatten the images
            images = images.view(-1, input_size)
            labels = labels

            # Forward pass
            outputs = model(images)
            loss = loss_function(outputs, labels)

            # Compute accuracy
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            total_loss += loss.item()

    accuracy = correct / total
    return total_loss / len(loader), accuracy


In [10]:
# @title Train and Evaluate Neural Network

# Training the NN
print("Training Neural Network...")
start_time = time.time()
for epoch in range(epochs):
    train_loss = train_nn(nn_model, train_loader, nn_optimizer, nn_loss_function)
    val_loss, val_accuracy = evaluate_nn(nn_model, val_loader, nn_loss_function)
    print(f"Epoch {epoch+1}/{epochs}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, Val Accuracy = {val_accuracy:.4f}")
end_time = time.time()
# Testing the NN
test_loss, test_accuracy = evaluate_nn(nn_model, test_loader, nn_loss_function)
print(f"Test Accuracy: {test_accuracy:.4f}")
training_time = end_time - start_time
print(f"Training time: {training_time:.4f}")


Training Neural Network...
Epoch 1/25: Train Loss = 0.2370, Val Loss = 0.2162, Val Accuracy = 0.9422
Epoch 2/25: Train Loss = 0.1426, Val Loss = 0.1379, Val Accuracy = 0.9626
Epoch 3/25: Train Loss = 0.1246, Val Loss = 0.1289, Val Accuracy = 0.9636
Epoch 4/25: Train Loss = 0.1033, Val Loss = 0.1751, Val Accuracy = 0.9590
Epoch 5/25: Train Loss = 0.1020, Val Loss = 0.1923, Val Accuracy = 0.9574
Epoch 6/25: Train Loss = 0.0930, Val Loss = 0.1606, Val Accuracy = 0.9650
Epoch 7/25: Train Loss = 0.0908, Val Loss = 0.1528, Val Accuracy = 0.9670
Epoch 8/25: Train Loss = 0.0904, Val Loss = 0.1788, Val Accuracy = 0.9646
Epoch 9/25: Train Loss = 0.0747, Val Loss = 0.1728, Val Accuracy = 0.9658
Epoch 10/25: Train Loss = 0.0848, Val Loss = 0.2049, Val Accuracy = 0.9552
Epoch 11/25: Train Loss = 0.0711, Val Loss = 0.1685, Val Accuracy = 0.9682
Epoch 12/25: Train Loss = 0.0641, Val Loss = 0.1463, Val Accuracy = 0.9722
Epoch 13/25: Train Loss = 0.0575, Val Loss = 0.1769, Val Accuracy = 0.9668
Epoch 1