# Neural Net
Basic neural net with fully connected layers. \
Demo on MNIST dataset. \
Gpu , Datasets, Dateloader, Transforms, Neural Net, Training and Evaluations

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Device Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper Parameters
input_size = 784 #(MNist images are 28*28 = 784)
hidden_size = 500
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001

#MNIST dataset
train_dataset = torchvision.datasets.MNIST(root = '../data',
                                           train = True,
                                           transform = transforms.ToTensor(),
                                           download = True)
test_dataset = torchvision.datasets.MNIST(root = '../data',
                                           train = False,
                                           transform = transforms.ToTensor())
    

# mnist
print("train_dataset: details")
print(f'train_dataset size     :{list(train_dataset.train_data.size())}')#  [60000, 28, 28]
print(f'train_dataset mean     :{train_dataset.train_data.float().mean()}')
print(f'train_dataset mean/255 :{train_dataset.train_data.float().mean()/255}') # 0.1306604762738429
print(f'train_dataset std-dev  :{train_dataset.train_data.float().std()}')
print(f'train_dataset std-dev/255:{train_dataset.train_data.float().std()/255}') # 0.30810780717887876

print("\n train_dataset: details")
print(f'train_dataset size     :{list(train_dataset.train_data.size())}')#  [60000, 28, 28]
print(f'train_dataset mean     :{train_dataset.train_data.float().mean(axis = (0,1,2))}')
print(f'train_dataset mean/255 :{train_dataset.train_data.float().mean(axis = (0,1,2))/255}') # 0.1306604762738429
print(f'train_dataset std-dev  :{train_dataset.train_data.float().std(axis = (0,1,2))}')
print(f'train_dataset std-dev/255:{train_dataset.train_data.float().std(axis = (0,1,2))/255}') # 0.30810780717887876




# Data loader
train_loader = torch.utils.data.DataLoader(dataset= train_dataset,
                                           batch_size=batch_size,
                                           shuffle =True)
test_loader = torch.utils.data.DataLoader(dataset= test_dataset,
                                          batch_size = batch_size, 
                                          shuffle =False)
examples = iter(test_loader)

example_data , example_targets = next(examples)

for i in range(6):
    plt.subplot(2,3,i+1)
    plt.imshow(example_data[i][0], cmap = 'gray')
plt.show()

print(f'Num_MNIST_samples = {len(train_dataset)}')


## Fully Connected Neural Network with one hidden layer
Note: in PyTorch, when you call an instance of a nn.Module, it always internally calls the forward method. \
\
This behavior is due to the __call__ method defined within the nn.Module class. When you invoke a nn.Module object like a function (e.g., model(input)), the __call__ method is executed, which in turn calls the forward method with the provided input.\
\
It's important to note that you should not directly call the forward method yourself (e.g., model.forward(input)). Calling the module instance ensures that all the necessary hooks and mechanisms within PyTorch are properly executed, including pre-forward hooks, the actual forward pass, and post-forward hooks. Directly calling forward bypasses these mechanisms, potentially leading to unexpected behavior, especially when using features like hooks or when working with models in training mode.

In [None]:
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)
        # no softmax or activation at the end

    def forward(self, x):
        out = self.l1(x)
        out = self.relu1(out)
        out = self.l2(out)
        # no softmax or activation at the end
        '''
        # This is because : in Pytorch if one uses the nn.CrossEntropyLoss the input must be unnormalized raw value (aka logits), 
        the target must be class index instead of one hot encoded vectors.. Very very important
        Hence there is no need of softmax
        '''
        return out

model = NeuralNet(input_size, hidden_size, num_classes).to(device)

# Loss 
''' By definition the cross entropy loss uses the output probabliltities of a softmax layer (as model ouput) and a 
one hot encoded vector( for the label). And the calculation is done

In Pytorch this is different though
In Pytorch if one uses the nn.CrossEntropyLoss the input must be unnormalized raw value (aka logits), 
        the target must be class index instead of one hot encoded vectors.. Very very important
        Hence there is no need of softmax
'''
loss_handle = nn.CrossEntropyLoss()
# Optimizer for back propagation and weight updates
# optimizer = torch.optim.SGD(model.parameters(), lr= learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

img, label = next(iter(train_loader))
print('img.shape   =',img.shape)
print('label.shape =',label.shape)
train_loader_len = len(train_loader)
print(f'Num_MNIST_samples = {train_loader_len}')
print('Checking if using the iterator and next reduced the train loader length by one')
train_loader_len = len(train_loader)
print(f'Num_MNIST_samples = {train_loader_len}')

num_epochs = 5
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Reshape it to the Neural Network input size which is 28*28 = 784
        # Deploy the images to the appropriate device
        # the train loader loads the number of images as specified by the batch size.
        # The batch_size as specified in the beginning is 100. So images size is batch_size * 784 = 100*784
        images = images.reshape(-1, 28*28).to(device)
        # Deploy labels to the device
        labels = labels.to(device)
        # model_outputs: This model output has 10 values per sample (corresponding to the 10 classes)
        # model_outputs size is 100*10 (batch_size * num_classes)
        model_outputs =  model(images)
        '''in Pytorch if one uses the nn.CrossEntropyLoss the input must be unnormalized raw value (aka logits), 
        the target must be class index instead of one hot encoded vectors.. Very very important
        Hence there is no need of softmax'''
        # Loss
        loss = loss_handle(model_outputs, labels)
        # Back propagate the loss
        loss.backward()
        # Update the weights
        optimizer.step()
        # Zero the gradients , before the next back propagation    
        # (why dont the gradients get updated to zero/reset every time backpropagate is called
        optimizer.zero_grad()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{train_loader_len}], Loss: {loss.item():.4f}')
    




        '''
        # We would have to choose the max value, the max index rather and that would be indicator of the class
        # Each row has two columns. So in each row choose the highest value (and hence the highest index)
        max_values, max_indices = torch.max(model_outputs, dim=1)
        y_predicteds = max_indices
        if i == 0 and epoch == 0:
            print(f'images.shape        = {images.shape}')
            print(f'labels.shape        = {labels.shape}')
            print(f'model_outputs.shape = {model_outputs.shape}')
            print(f'max_indices.shape   = {max_indices.shape}')
            print(f'y_predicteds.shape  = {y_predicteds.shape}')
        '''

In [None]:
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size, num_classes)
        # no softmax or activation at the end

    def forward(self, x):
        out = self.l1(x)
        out = self.relu1(out)
        out = self.l2(out)
        # no softmax or activation at the end
        '''
        # This is because : in Pytorch if one uses the nn.CrossEntropyLoss the input must be unnormalized raw value (aka logits), 
        the target must be class index instead of one hot encoded vectors.. Very very important
        Hence there is no need of softmax
        '''
        return out

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
print('model.parameters()=', model.parameters())
# Loss 
loss_handle = nn.CrossEntropyLoss()
# Optimizer for back propagation and weight updates
# optimizer = torch.optim.SGD(model.parameters(), lr= learning_rate)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

img, label = next(iter(train_loader))
print('img.shape   =',img.shape)
print('label.shape =',label.shape)
train_loader_len = len(train_loader)
print(f'Num_MNIST_samples = {train_loader_len}')
print('Checking if using the iterator and next reduced the train loader length by one')
train_loader_len = len(train_loader)
print(f'Num_MNIST_samples = {train_loader_len}')


for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Reshape it to the Neural Network input size which is 28*28 = 784
        # Deploy the images to the appropriate device
        # the train loader loads the number of images as specified by the batch size.
        # The batch_size as specified in the beginning is 100. So images size is batch_size * 784 = 100*784
        images = images.reshape(-1, 28*28).to(device)
        # Deploy labels to the device
        labels = labels.to(device)
        # model_outputs: This model output has 10 values per sample (corresponding to the 10 classes)
        # model_outputs size is 100*10 (batch_size * num_classes)
        model_outputs =  model(images)
        '''in Pytorch if one uses the nn.CrossEntropyLoss the input must be unnormalized raw value (aka logits), 
        the target must be class index instead of one hot encoded vectors.. Very very important
        Hence there is no need of softmax'''
        # Loss
        loss = loss_handle(model_outputs, labels)
        # Back propagate the loss
        loss.backward()
        # Update the weights
        optimizer.step()
        # Zero the gradients , before the next back propagation    
        # (why dont the gradients get updated to zero/reset every time backpropagate is called
        optimizer.zero_grad()

        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{train_loader_len}], Loss: {loss.item():.4f}')
    




        '''
        # We would have to choose the max value, the max index rather and that would be indicator of the class
        # Each row has two columns. So in each row choose the highest value (and hence the highest index)
        max_values, max_indices = torch.max(model_outputs, dim=1)
        y_predicteds = max_indices
        if i == 0 and epoch == 0:
            print(f'images.shape        = {images.shape}')
            print(f'labels.shape        = {labels.shape}')
            print(f'model_outputs.shape = {model_outputs.shape}')
            print(f'max_indices.shape   = {max_indices.shape}')
            print(f'y_predicteds.shape  = {y_predicteds.shape}')
        '''

In [None]:
# Test the model
## KSW: TODO. This is wrong fix it
test_loader_len = len(test_loader)
print(f'Num_MNIST_samples = {test_loader_len}')

n_correct = 0
n_samples = len(test_loader.dataset)

for _, (images, labels) in enumerate(test_loader):
    images = images.reshape(-1, 28*28)
    images = images.to(device)
    labels = labels.to(device)
    # Y predicted
    y_predicteds =  model(images)

    print('labels.shape()=',labels.shape)
    print('y_predicteds.shape()=',y_predicteds.shape)
    
    # Loss
    loss = loss_handle(y_predicteds, labels)
    # Back propagate the loss
    loss.backward()
    # Update the weights
    optimizer.step()
    # Zero the gradients , before the next back propagation    
    # (why dont the gradients get updated to zero/reset every time backpropagate is called
    optimizer.zero_grad()

    n_correct += (y_predicteds == labels).sum().item()

acc = n_correct / n_samples
print(f'Accuracy of the network on the {n_samples} test images: {100*acc} %') 

        
