# 👀 Multilayer perceptron (MLP)

In [None]:
working_dir = "/home/mary/work/repos/generative_deep_Learning_2nd_edition_pytorch"

In [None]:
import sys
import os

# Add the path to the notebooks folder
notebooks_path = os.path.abspath(working_dir)
if notebooks_path not in sys.path:
    sys.path.append(notebooks_path)

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

import torch.nn as nn
import torch.optim as optim
from torchsummary import summary

# from torchinfo import summary

import numpy as np 
import matplotlib.pyplot as plt

from notebooks.utils import display

## 0. Parameters <a name="parameters"></a>

In [None]:
NUM_CLASSES = 10
batch_size = 32

## 1. Prepare the Data <a name="prepare"></a>

In [None]:
data_dir = working_dir + "/data"

### 1.1 Manual method <a name="manual"></a>

In this method we will try to memic as much as possible the steps done to load the Cifar 10 data in keras, you can skip this to the pytorch style if you are not interested in this method

In [None]:
trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True)
print(type(trainset))
print(trainset[0])
testset = torchvision.datasets.CIFAR10(root=data_dir, train=False, download=True)

we will define a transform to convert to tensor, this ToTensor transform will also internally scale the color channel to 0-1 and permute the image shape to be [C, H, W], if we want a tranformer that only convert to tensor or numpy we can implment a custom one using Lambda.
The list of available transfors could be found here (https://pytorch.org/vision/0.9/transforms.html#functional-transforms)
- Note: Pytorch uses a channel first format (i.e [C, H, W]) where as tensor flow uses channel-last format (i.e [H, W, C]), the channel first format align with PyTorch's conventions and optimize memory layout for efficient computation, especially on GPUs. this format is optimized for PyTorch’s back-end libraries (e.g., cuDNN) and GPU acceleration. This arrangement allows faster access to data during operations on individual channels, which is common in convolutional neural networks (CNNs).

In [None]:
transform = transforms.Compose([
    transforms.ToTensor()
])

In [None]:
x_train = torch.stack([transform(data_item[0]) for data_item in trainset], dim=0)
x_test = torch.stack([transform(data_item[0]) for data_item in testset], dim=0)

y_train = torch.as_tensor([data_item[1] for data_item in trainset])
y_test = torch.as_tensor([data_item[1] for data_item in testset])

In [None]:
print(x_train.shape)
print(y_test.shape)

In [None]:
# convert lables to one hot encoding
y_train = F.one_hot(y_train, num_classes=NUM_CLASSES)
y_test = F.one_hot(y_test, num_classes=NUM_CLASSES)

In [None]:
display(x_train[:10])
print(y_train[:10])

From here we have different options on how to use x_train and y_train to train the model
- We can use them as tensors in the training loop and handle the batches using tensor slicing
- We can use TensorDataset to convert the training samples and lables into a pytorch dataset (which is re-inventing the wheel since the loaded trainset was already a TensorDataset) and then use DataLoader to get a dataloader that will also handle the batch size, and can have a number of workers for large datasets
- We can implment our own custom DataSet and DataLoader by inherting those clases (if we want to do something expecial)

Here we will use the second option

In [None]:
train_dataset = TensorDataset(x_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

### 1.2 Pytorch method <a name="pytorch"></a>

Note the labels here will not be one hot encoded

In [None]:
# Define the transformations for the data (e.g., normalization, conversion to tensors)
transform = transforms.Compose([
    transforms.ToTensor(),
])

def one_hot_encode(label):
    return F.one_hot(torch.tensor(label), num_classes=NUM_CLASSES).float()

# Download and load the CIFAR-10 training and test datasets
# trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True, 
#                                         transform=transform, target_transform=one_hot_encode)

# the pytorch loss fucntion uses the integer lables directly no need to convert them to one hot encoding
trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True, 
                                        transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

# testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, 
#                                        transform=transform, target_transform=one_hot_encode)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, 
                                       transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
# Load the first batch of images
dataiter = iter(trainloader)
images, lables = next(dataiter)

In [None]:
type(images)
print(images.shape)

In [None]:
display(images[:10])
print(lables[:10])

## 2. Build the model <a name="build"></a>

### Method 1 without including the activations in the model itself

In [None]:
class MLPModel_1(nn.Module):
    def __init__(self):
        super().__init__()
        # define the layers
        self.input = nn.Linear(32*32*3, 200)
        self.fc1 = nn.Linear(200, 150)
        self.fc2 = nn.Linear(150, NUM_CLASSES)
    
    def forward(self, x):
        # flatten the input
        x = x.view(x.shape[0], -1)
        x = self.input(x)
        x = torch.relu(x)
        x = self.fc1(x)
        x = torch.relu(x)
        x = self.fc2(x)
        # x = torch.softmax(x, dim=1)
        # CrossEntropyLoss combome the softmax with the loss function so no need to add softmax layer to the model

        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPModel_1().to(device)
print(model.state_dict().keys())
print(model)

In [None]:
summary(model, (3, 32, 32))

### Method 2 with including the activations in the model itself

In [None]:
class MLPModel_2(nn.Module):
    def __init__(self):
        super().__init__()
        # define the layers
        self.input = nn.Linear(32*32*3, 200)
        self.activ_1 = nn.ReLU()
        self.fc1 = nn.Linear(200, 150)
        self.activ_2 = nn.ReLU()
        self.fc2 = nn.Linear(150, NUM_CLASSES)
        self.activ_3 = nn.Softmax(dim=1)
    
    def forward(self, x):
        # flatten the input
        x = x.view(x.shape[0], -1)
        x = self.input(x)
        x = self.activ_1(x)
        x = self.fc1(x)
        x = self.activ_2(x)
        x = self.fc2(x)
        # x = self.activ_3(x)
        # CrossEntropyLoss combome the softmax with the loss function so no need to add softmax layer to the model
        # If softmax is used then we can use the NLLLoss (Negative Log Likelihood Loss)
        
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLPModel_2().to(device)
print(model.state_dict().keys())
print(model)

In [None]:
summary(model, (3, 32, 32))

## 3. Train the model <a name="train"></a>

In [None]:
num_epoch = 10
learning_rate = 0.0005

In [None]:
# Define the loss function
loss_fn = nn.CrossEntropyLoss()
# CrossEntropyLoss combome the softmax with the loss function so no need to add softmax layer to the model

In [None]:
# Define the optmizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def fit(model, train_dataloader, optimizer, loss_fn, epochs=10):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    # loop over the number of epoch
    for i in range(epochs):
        #  set the model for training
        model.train()
        # loop over the dataloader to get all the data
        running_loss = 0.0
        num_samples = 0
        correct = 0
        for images, labels in train_dataloader:
            #  zero the gradiants of the optimizer
            optimizer.zero_grad()
            # move the training data to the same device as the model
            images, labels = images.to(device), labels.to(device)
            # Predict the lables
            predictions = model(images)
            # calculate the loss
            loss = loss_fn(predictions, labels)
            # calcualte the gradients for the loss
            loss.backward()
            # updat the weights using the optimizer
            optimizer.step()
            # accumilate the loss
            running_loss += loss.item()

            # calcualte the accuracy
            _,pred_lable = torch.max(predictions, 1)
            # _, corr_label = torch.max(labels, 1)

            num_samples += labels.size(0)
            correct += (pred_lable==labels).sum().item()
        
        print( f"Epoch {i} / {epochs}: loss= {running_loss/len(train_dataloader):.4f}, accuracy= {correct/num_samples:.4f}")

            

In [None]:
fit(model, trainloader, optimizer, loss_fn, num_epoch)

## 4. Evaluation <a name="evaluate"></a>

In [None]:
def evaluate (model, dataloader, loss_fn, device):
    # set the model to eval mode
    model.eval()
    corr_predictions = 0
    total = 0
    total_loss = 0.0
    # disable the gradiants calculations
    with torch.no_grad():
        # loop to load all data
        for images, lables in dataloader:
            images, lables = images.to(device), lables.to(device)
            # use the model to predict the labels
            outputs = model(images)
            # calcaulte the loss
            loss = loss_fn(outputs, lables)
            total_loss += loss

            # get the highest predicted values
            _, pred_lables = torch.max(outputs, 1)
            # _, corr_lables = torch.max(lables, 1)

            corr_predictions += (pred_lables==lables).sum().item()
            total += lables.size(0)
        
        val_loss = total_loss / len(dataloader)
        val_acc = corr_predictions / total
    
    return val_loss, val_acc


In [None]:
val_loss, val_acc = evaluate(model, testloader, loss_fn, device)
print(f"validation loss: {val_loss}, validation_acc: {val_acc}")

Try indvidual images

In [None]:
CLASSES = np.array(
    [
        "airplane",
        "automobile",
        "bird",
        "cat",
        "deer",
        "dog",
        "frog",
        "horse",
        "ship",
        "truck",
    ]
)

dataiter = iter(testloader)
images, lables = next(dataiter)

In [None]:
output = model(images.to(device))
_, pred_index = torch.max(output.detach().cpu(), 1)
predicted_class = CLASSES[pred_index]

# _, index = torch.max(lables, 1)
gt_classes = CLASSES[lables]

In [None]:
n_to_show = 10
indices = np.random.choice(range(len(images)), n_to_show)

fig = plt.figure(figsize=(15, 3))
fig.subplots_adjust(hspace=0.4, wspace=0.4)

for i, idx in enumerate(indices):
    img = images[idx].permute(1,2,0).numpy()
    ax = fig.add_subplot(1, n_to_show, i + 1)
    ax.axis("off")
    ax.text(
        0.5,
        -0.35,
        "pred = " + str(predicted_class[idx]),
        fontsize=10,
        ha="center",
        transform=ax.transAxes,
    )
    ax.text(
        0.5,
        -0.7,
        "act = " + str(gt_classes[idx]),
        fontsize=10,
        ha="center",
        transform=ax.transAxes,
    )
    ax.imshow(img)