### Preliminaries

In [4]:
# import libraries
import torch 
import numpy as np
from torch import nn 
from torch import optim
from torchvision import datasets, transforms 
from torch.utils.data import random_split, DataLoader

# Check cuda availbility 
print('CUDA is available!') if  torch.cuda.is_available() else print('No cuda')
# assign cuda to device
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device= torch.device('cpu')

import matplotlib.pyplot as plt
# %matplotlib inline

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"


CUDA is available!


### Example 1: DNN for MNIST
5 main components 
- model 
- load data 
- trainig loop 
- validation loop 
- hparams, optmizer, and loss
- Start the training process
- Test on the test set 

#### 1- Build Model architecture

In [10]:
# DNN model 
# class DNN_conventional(nn.Module):
#     def __init__(self, input_size, output_size): # contructor method for the subclass 
#         super().__init__() # super allows you to call methods of the superclass in your subclass 
#         # building the DNN model using sequential 
#         # default way
#         self.l1 = nn.Linear(input_size, 64)
#         self.l2 = nn.Linear(64,64)
#         self.l3 = nn.Linear(64,output_size)
#         self.relu = nn.ReLU()
        
        
#     def forward(self, x):
#         out = self.l1(x)
#         out = self.relu(out)
#         out = self.l2(out)

#         return out
            
 
class DNN(nn.Module):
    def __init__(self, input_size, output_size): # contructor method for the subclass 
        super().__init__() # super allows you to call methods of the superclass in your subclass 
        # sequential way
        self.dnn = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 64), 
            nn.ReLU(),
            nn.Linear(64, output_size)
                )

    # def forward function for the model
    def forward(self,x):
        batch_size = x.size(0)
        # x: batch_size, width, length
        # vectorize the input
        x = x.view(batch_size, -1)
        scores = self.dnn(x)
        return scores

In [9]:
# define input and output size 
# given that the input shape for mnist is: 28 x 28, the vector length = 28^2
input_size = 28**2

# output size = num of classes = 10 
output_size = 10

# create object from the class 
my_dnn = DNN(input_size, output_size).to(device)
my_dnn

DNN(
  (dnn): Sequential(
    (0): Linear(in_features=784, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)

#### 2- Load the data

In [12]:
def load_MNIST():
    # Data transforms 
    transform = transforms.Compose([
        transforms.ToTensor(), # convert PIL image to Tensor
        transforms.Normalize((0.5), (1)) ]) # normalizing each channel with 0.5  mean and std
    # Load load_MNIST train data
    train_data = datasets.MNIST('data', train=True, download=True, transform=transform)

    # Split train data to train and val
    val_size = 0.2 
    num_val_samples = int(0.2 * len(train_data))
    num_train_samples = len(train_data) - num_val_samples
    train, val = random_split(train_data, [num_train_samples, num_val_samples])

    # load test data 
    test_data = datasets.MNIST('data', train=False, download=True, transform=transform)
    
    
    # Pass the data to the dataloader
    train_loader = DataLoader(train, batch_size = 32)
    val_loader = DataLoader(val, batch_size= 32)
    test_loader = DataLoader(test_data, batch_size= 32)
    
    return train_loader, val_loader, test_loader
# loading mnist data
train_dl, val_dl, test_dl = load_MNIST()

#### 3- Training loop

In [13]:
def model_train(epochs, optimizer, criterion, model):
    # loop through each epoch
    for epoch in range(epochs):
        # losses and accuraces accumulate
        losses =[]
        accuracies =[]
        
        # activate model train
        my_dnn.train()
        
        # loop through each batch
        # forward pass
        # calculate loss
        # based on loss calculate gradients with respect model params
        # I ask optimizer update the model parameters given
        for batch_idx, (data, labels) in enumerate(train_dl):
            # send data and target to cuda
            data = data.to(device)
            labels = labels.to(device)

            # forward pass: compute predicted outputs by passing inputs to the model
            scores = model(data)

            # compute the loss function 
            loss = criterion(scores, labels)

            # clear the gradients of the optimizer
            optimizer.zero_grad()

            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()

            # perform a single optimization step based on the computed gradients
            optimizer.step()
            
            
            # append loss for each batch
            losses.append(loss.detach().item()) # deteach the loss from compute graph 
            accuracies.append(labels.eq(scores.detach().argmax(dim=1)).float().mean())
        
        # print for each epoch
        print(f'Epoch: {epoch + 1} \n \t Train Loss:{torch.tensor(losses).mean():0.2f} \t Train Acc:{torch.tensor(accuracies).mean():0.2f}')

        # for each epoch evaluate your model on the validation data
        val_loss, val_acc = model_evaluate(criterion, model, phase = 'Val')
        print(f'\t Val Loss:{val_loss:0.2f} \t\t Val Acc:{val_acc:0.2f}')
    return model

#### 4- Validation loop

In [14]:
def model_evaluate(criterion, model, phase = 'Val'): # phase can be validate or test
    # append losses and accuracies
    losses = []
    accuracies = []
    # activate model eval function
    my_dnn.eval()
    
    dataloader = val_dl if phase == 'Val' else test_dl
    # loop through val data loader
    with torch.no_grad():
        for batch_idx, (data, labels) in enumerate(dataloader):
            # send data and taret to cuda
            data = data.to(device)
            labels = labels.to(device)

            # forward pass: compute predicted outputs by passing inputs to the model
            scores = model(data)

            # compute the loss function 
            loss = criterion(scores, labels)

            # append loss for each batch
            losses.append(loss.detach().item()) # deteach the loss from compute graph 
            accuracies.append(labels.eq(scores.detach().argmax(dim=1)).float().mean())
            
        mean_loss = torch.tensor(losses).mean()
        mean_acc =torch.tensor(accuracies).mean()
        
        return mean_loss, mean_acc

#### 5- Hparams and Optimizer

In [16]:
# Define my optimiser
params = my_dnn.parameters()
# Using adam optimizer
optimizer = optim.AdamW(params, lr=1e-2)

# Define my loss
# here we use cross entropy loss function
criterion = nn.CrossEntropyLoss()

# hparams 
num_epochs = 5

#### 6- Now Lets Start the train and test ! 

In [17]:
# model training 
train_model = model_train(num_epochs, optimizer,criterion, my_dnn)

Epoch: 1 
 	 Train Loss:0.41 	 Train Acc:0.87
	 Val Loss:0.28 		 Val Acc:0.92
Epoch: 2 
 	 Train Loss:0.29 	 Train Acc:0.91
	 Val Loss:0.29 		 Val Acc:0.91
Epoch: 3 
 	 Train Loss:0.27 	 Train Acc:0.92
	 Val Loss:0.25 		 Val Acc:0.93
Epoch: 4 
 	 Train Loss:0.27 	 Train Acc:0.92
	 Val Loss:0.32 		 Val Acc:0.91
Epoch: 5 
 	 Train Loss:0.25 	 Train Acc:0.93
	 Val Loss:0.29 		 Val Acc:0.92


In [20]:
# testing the model on test data 
test_loss, test_acc = model_evaluate(criterion, train_model, phase='test')
print(f'Test Loss:{test_loss.item():0.2f}')
print(f'Test Acc:{test_acc.item():0.2f}')

Test Loss:0.29
Test Acc:0.92
