<a href="https://colab.research.google.com/github/amanjain487/tsai-eva6/blob/main/Assignments/3/Digit_Recognizer_and_Sum_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [63]:
from __future__ import print_function 
# this is not a import statement actually. it tells python compiler to use functions from advanced versions.
# example - python 3.0+ uses print as function, so if we want to use print as function in lower releases, say python 2.6, then above statement does that.
 
import torch 
# a Tensor library like NumPy, with strong GPU support
 
import torch.nn as nn 
# a neural networks library deeply integrated with autograd designed for maximum flexibility
 
import torch.nn.functional as F 
# functional operations required for NN
# like, convolution operation, activation function, los function, pooling and so on...
 
import torch.optim as optim 
# package which implements various optimization algorithms - specifically for updating parameter values
 
from torchvision import datasets 
# contains various famous datasets, which can be loaded to train and test to model
 
from torchvision import transforms 
# common image transformation functions - for pre and post processing images
# like normalization, convert to grayscale, flip, crop and so on..

In [64]:
class MNIST_Model(nn.Module): 
  # define a class named "Net" which takes nn.Module as parameter
  # nn.Module is a base module class upon which our model builds
  # nn.Module is parent class and Net is child class in this case.
 
    def __init__(self): 
      # this function is called as soon as an object is created of this class
      # consider it as constructor of this class
      # it takes itself as parameter
      # it is a constructor of Net class
 
        super().__init__() 
        # lets you avoid referring to base class explicitly.
        # .init() is a constructor of base/parent class
        # in our case, this line is constructor of class "nn.Module"
 
                                                         #input - 28x28x1                        RF = 1x1
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)      #input - 28x28x1    Output - 28x28x32   RF - 3x3
        # create 2d convolutional layer number 1
        # 1st parameter = number of channels in input
        # 2nd parameter = number of channels in output
        # 3rd paramter = size of kernel (always square)
        # padding is specified
 
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)     #input - 28x28x1    Output - 28x28x64   RF - 5x5
        # create 2d convolutional layer number 2
 
        self.pool1 = nn.MaxPool2d(2, 2)                  #input - 28x28x64   Output - 1414x64    RF - 10x10
        # create pooling layer number 1
        # max pooling is done
        # parameters are kernel size and stride
        # 1st parameter = kernel size
        # 2nd parameter = stride
        # padding can also be specified
 
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)    #input - 14x14x64   Output - 14x14x128  RF - 12x12
        # create 2d convolutional layer number 3
 
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)   #input - 14x14x128  Output - 14x14x256  RF - 14x14
        # create 2d convolutional layer number 4
 
        self.pool2 = nn.MaxPool2d(2, 2)                  #input - 14x14x256  Output - 7x7x256    RF - 28x28
        # create pooling layer number 2
        # max pooling is done
 
        self.conv5 = nn.Conv2d(256, 512, 3)              #input - 7x7x256    Output - 5x5x512    RF - 30x30
        # create 2d convolutional layer number 5
 
        self.conv6 = nn.Conv2d(512, 1024, 3)             #input - 5x5x512    Output - 3x3x1024   RF - 32x32
        # create 2d convolutional layer number 63
 
        self.conv7 = nn.Conv2d(1024, 10, 3)              #input - 3x3x1024   Output - 1x1x10     RF - 34x34
        # create 2d convolutional layer number 7
 

    def forward(self, x): 
      # defines the network structure
      # define how model is going to run from input to output
      # input is passed as parameter
      # consider it as forward pass which predicts output at last layer
      # initially, x is a vector/matrix of input pixels
 
        x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x))))) 
        # apply kernel 1 on input
        # followed by relu activation function
        # followed by convolution with kernel 2 in layer 2
        # followed by relu activation function
        # finally, apply max pooling
 
        x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x))))) 
        # apply convolution with kernel 3 in layer 3
        # followed by relu activation function
        # followed by convolution with kernel 4 in layer 4
        # followed by relu activation function
        # finally, apply max pooling
 
        x = F.relu(self.conv6(F.relu(self.conv5(x)))) 
        # apply convolution with kernel 5 in layer 5
        # followed by relu activation function
        # followed by convolution with kernel 6 in layer 6
        # followed by relu activation function
 
        x = (self.conv7(x)) 
        # apply convolution with kernel 7 in layer 7
 
        x = x.view(-1, 10) 
        # behaves like -1 in numpy.reshape
        # arrange x such that it can have any number of rows but 10 columns
 
        return F.log_softmax(x) 
        # apply softmax function
        # followed by log
        # finally return the output.

In [65]:
!pip install torchsummary 
# install torchsummary which provides functions like summary
 
from torchsummary import summary 
# summary provides information complementary to what is provided by print(your_model) 
 
use_cuda = torch.cuda.is_available() 
# check if CUDA is available or not
# CUDA is a parallel computing platform for general computing on GPUs (graphics processing units)
# CUDA enables developers to speed up compute-intensive applications by using the power of GPUs for the parallelizable part of the computation.
 
device = torch.device("cuda" if use_cuda else "cpu") 
# use gpu if cuda is available
# else use cpu
 
mnist_model = MNIST_Model().to(device) 
# pass the entire model to gpu if available or cpu if gpu is not available
 
summary(mnist_model, input_size=(1, 28, 28)) 
#print all the details of model
# input shape must be passed, based on which
# it will show at each layer what wil be the output shape
# it also displays number of parameters at each layer, and how many of those are actually trainable

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 28, 28]          18,496
         MaxPool2d-3           [-1, 64, 14, 14]               0
            Conv2d-4          [-1, 128, 14, 14]          73,856
            Conv2d-5          [-1, 256, 14, 14]         295,168
         MaxPool2d-6            [-1, 256, 7, 7]               0
            Conv2d-7            [-1, 512, 5, 5]       1,180,160
            Conv2d-8           [-1, 1024, 3, 3]       4,719,616
            Conv2d-9             [-1, 10, 1, 1]          92,170
Total params: 6,379,786
Trainable params: 6,379,786
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 1.51
Params size (MB): 24.34
Estimated Total Size (MB): 25.85
-------------------------------------



In [66]:
torch.manual_seed(1) 
# Sets the seed for generating random numbers

mnist_batch_size = 128 
# how many instances of training examples utilized in one iteration

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} 
# number_of_workers means how many process are there to load data into RAM
# to speed up transfer between cpu and gpu, we set pin memory as true

mnist_train_loader = torch.utils.data.DataLoader(# load training data
  datasets.MNIST('../data', # path to MNIST dataset in torchvision
                 train=True, # bool denoting whether it is training data or not
                 download=True, # to download data or not
                 transform=transforms.Compose([
                                               transforms.ToTensor(),
                                               # The output of torchvision datasets are PILImage images of range [0, 1].
                                               # We transform them to Tensors of normalized range [-1, 1]
                                               
                                               transforms.Normalize((0.1307,), (0.3081,)) 
                                               # 0.1307 is mean of MNIST dataset
                                               # 0.3081 is std deviation of MNIST Dataset
                                               ])
                 ),
                 batch_size=mnist_batch_size, # assign batch size 
                 shuffle=True, # should the data be shuffled at every iteration or not
                 **kwargs # parameters such as num_workers, pin_memory if we use cuda
                 )
 
mnist_test_loader = torch.utils.data.DataLoader( #load test data
    datasets.MNIST('../data', #path to MNIST dataset in torchvision
                   train=False, # bool denoting whether it is training data or not
                   transform=transforms.Compose([
                                                 transforms.ToTensor(), 
                                                 # The output of torchvision datasets are PILImage images of range [0, 1].
                                                 # We transform them to Tensors of normalized range [-1, 1]
                                                 
                                                 transforms.Normalize((0.1307,), (0.3081,)) 
                                                 # 0.1307 is mean of MNIST dataset
                                                 # 0.3081 is std deviation of MNIST Dataset
                                                 ])
                   ),
                   batch_size=mnist_batch_size, # assign batch size 
                   shuffle=True, # should the data be shuffled at every iteration or not
                   **kwargs # parameters such as num_workers, pin_memory if we use cuda
                   )


In [67]:
from tqdm.notebook import tqdm 
# lets any process or loop show smart progress meter

def mnist_train(model, device, train_loader, optimizer, epoch): 
  # function to train the model

    model.train() 
    # tell the model that we are training the model

    pbar = tqdm(train_loader) 
    # create progress bar for training

    for batch_idx, (data, target) in enumerate(pbar): 
      # enumerate all batches
      # batch_idx = batch number
      # data = x_train
      # target = y_train

        data, target = data.to(device), target.to(device) 
        # send x_train and y_train to gpu if available else to cpu

        optimizer.zero_grad() 
        # set the gradients to zero before starting to do backpropragation
        # zero_grad clears old gradients from the last step (otherwise you’d just accumulate the gradients from all loss.backward() calls).

        output = model(data) 
        # pass data to model for forward pass

        loss = F.nll_loss(output, target) 
        # calaculate loss function
        # nll_loss = negative log likelihood loss
        # output = prediction by model
        # target = real value

        loss.backward() 
        # computes the derivative of the loss w.r.t. the parameters (or anything requiring gradients) using backpropagation.

        optimizer.step() 
        # causes the optimizer to take a step based on the gradients of the parameters.

        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}') 
        # format how progress bar should look and what should it display


def mnist_test(model, device, test_loader): 
  # function to test the model

    model.eval() 
    # tell the model that we are testing/evaluating the model

    test_loss = 0 
    # to find average loss

    correct = 0 
    # required for calculating accuracy
    # accuracy = correct predictions / total predictions

    with torch.no_grad(): 
      # temporarily disabling gradient calculation

        for data, target in test_loader: 
          # data = x_test, target = y_test
            
            data, target = data.to(device), target.to(device)
             # send x_text and y_test to gpu if available else to cpu

            output = model(data) 
            # pass data to model for forward pass

            test_loss += F.nll_loss(output, target, reduction='sum').item()
            # sum up batch loss
            # in train function, it computes average loss for each batch
            # but here, it computes and add loss of each entry
            # that is why we use reduction='sum' -> adds the loss for each test data

            pred = output.argmax(dim=1, keepdim=True)  
            # get the index of the max log-probability
            # get index of largest value which is probability that the test data is that number

            correct += pred.eq( # computes element wise equality 
                               target.view_as(pred) # view target as pred
                               ).sum().item() # compare predictions and target
                               # count number of correct predictions and fetch that number and add it to variable correct

    test_loss /= len(test_loader.dataset) 
    # calculate average loss

    accuracy = 100. * correct / len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        accuracy))
    #print loss and accuracy of predictions

In [68]:
mnist_optimizer = optim.SGD(# Implements stochastic gradient descent
                      # SGD randomly picks one data point from the whole data set to compute derivatives at each iteration to reduce the computations
                      
                      mnist_model.parameters(), 
                      # fetches all the parameters from model

                      lr=0.01, 
                      # learning rate -> determines the step size at each iteration while moving toward a minimum of a loss function

                      momentum=0.9 # technique to improve training speed and accuracy
                      ) 

for mnist_epoch in range(5): 
  #epoch means how many times entire dataset is passed to model for training

    mnist_train(mnist_model, device, mnist_train_loader, mnist_optimizer, mnist_epoch) 
    # train model defined by us in gpu if available, else cpu
    # train loader loads mnist dataset, optimizer used here is SGD, epoch is 1

    mnist_test(mnist_model, device, mnist_test_loader) # test model trained in above line
    # device is gpu if available, else cpu
    # test loader loads test data from mnist dataset

HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))





Test set: Average loss: 0.0527, Accuracy: 9835/10000 (98%)



HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))



Test set: Average loss: 0.0368, Accuracy: 9881/10000 (99%)



HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))



Test set: Average loss: 0.0326, Accuracy: 9894/10000 (99%)



HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))



Test set: Average loss: 0.0351, Accuracy: 9874/10000 (99%)



HBox(children=(FloatProgress(value=0.0, max=469.0), HTML(value='')))



Test set: Average loss: 0.0287, Accuracy: 9909/10000 (99%)



In [69]:
class Sum_Model(nn.Module): 
  # define a class named "Net" which takes nn.Module as parameter
  # nn.Module is a base module class upon which our model builds
  # nn.Module is parent class and Net is child class in this case.
 
    def __init__(self): 
      # this function is called as soon as an object is created of this class
      # consider it as constructor of this class
      # it takes itself as parameter
      # it is a constructor of Net class
 
        super().__init__() 
        # lets you avoid referring to base class explicitly.
        # .init() is a constructor of base/parent class
        # in our case, this line is constructor of class "nn.Module"


        self.fc1 = nn.Linear(in_features = 2, out_features = 8)
        # 1st fully connected layer
        # 2 input features -> 2 numbers to add
        # 8 output features
        # 8*2 = 16 weights
        self.fc2 = nn.Linear(in_features = 8, out_features = 16)
        # 2nd fully connected layer
        # 8 input features
        # 16 output features
        # 16*8 = 128 weights
        self.fc3 = nn.Linear(in_features = 16, out_features = 8)
        # 3rd fully connected layer
        # in - 16
        # out - 8
        # 128 weights
        self.out = nn.Linear(in_features = 8, out_features = 1)
        # final output layer
        # in - 8
        # out - 1 => sum of 2 numbers
        # 8 weights

    def forward(self, x): 
      # defines the network structure
      # define how model is going to run from input to output
      # input is passed as parameter
      # consider it as forward pass which predicts output at last layer
      # initially, x is a tensor of 2 numbers to add
 
        x = self.fc1(x)
        # pass the input through 1st fully connected layer
        x = self.fc2(x)
        # pass output of 1st fully connected layer to 2nd fully connected layer as input
        x = self.fc3(x)
        # pass output of 2nd fully connected layer as input to 3rd fully connected layer
        x = self.out(x)
        # pass output of 3rd fully connected layer as input to output layer
        return x 
        # return the predicted number
        

In [70]:
from random import randint
import numpy as np

# function to create training set for addition of 2 numbers
# parameters are - number of training instances to create, how many numbers to add(addition of 2 or 3 numbers), largest inidivual number for addition
def random_sums(n, nos, largest):
  X, y = list(), list()
  # create 2 empty list
  # run a loop through n to create n training instances
  for i in range(n):
    # create input numbers list
    # list will contain number of numbers to add
    in_pattern = [randint(1,largest) for _ in range(nos)]
    # create label for those inputs i.e., sum of above random numbers
    out_pattern = sum(in_pattern)

    # append each instance in above created list
    # inputs in X
    # expected outputs in y
    X.append(in_pattern)
    y.append(out_pattern)

  # format as NumPy arrays
  X,y = np.array(X), np.array(y)
  # normalize the numbers
  X = X.astype('float') / float(largest * nos)
  y = y.astype('float') / float(largest * nos)
  # return the numpy arrays
  return X, y



In [71]:
# input parameters for our case

# create 1000 training instances
n = 1000
# we need to perform addition of 2 numbers
nos = 2
# largest number in our case would be 9
largest = 9


In [72]:
# create dataset class for our sum dataset
import pandas as pd
from torch.utils.data import Dataset

class SUM(Dataset):
  def __init__(self, csv_file):
    self.data = pd.read_csv(csv_file)

  def __getitem__(self, index):
      r = self.data.iloc[index]
      input = torch.tensor(r[1:3])
      label = torch.tensor(r[3])
      # returns input numbers and output as tensors
      return input, label

  def __len__(self):
    return len(self.data)

In [73]:
# create dataset
import pandas as pd

# create training instances
X, y = random_sums(n, nos, largest)

# create dataframe for inputs
df1 = pd.DataFrame(X, columns = ['in1', 'in2'])

# create dataframe for outputs
df2 = pd.DataFrame(y, columns = ['label'])

# combine input and output in single dataframe
result = pd.concat([df1, df2], axis=1, join='inner')

# save the dataframe as csv file, so that we can load it later using DataSet class
result.to_csv('sum_dataset.csv')

In [74]:
# open and load the dataset from csv file created above
sum_train_set = SUM("sum_dataset.csv")

In [75]:
print(len(sum_train_set))
print(sum_train_set[0])

1000
(tensor([0.1111, 0.2778], dtype=torch.float64), tensor(0.3889, dtype=torch.float64))


In [76]:
sum_model = Sum_Model().to(device) 
# initialize model and load it in gpu if available, else cpu

# print summary of sum model
summary(sum_model, input_size=(1, 2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 8]              24
            Linear-2                [-1, 1, 16]             144
            Linear-3                 [-1, 1, 8]             136
            Linear-4                 [-1, 1, 1]               9
Total params: 313
Trainable params: 313
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [77]:
torch.manual_seed(1) 
# Sets the seed for generating random numbers

sum_batch_size = 2 
# how many instances of training examples utilized in one iteration

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} 
# number_of_workers means how many process are there to load data into RAM
# to speed up transfer between cpu and gpu, we set pin memory as true

sum_train_loader = torch.utils.data.DataLoader(# load training data
                 sum_train_set,
                 batch_size=sum_batch_size, # assign batch size 
                 shuffle=True, # should the data be shuffled at every iteration or not
                 **kwargs # parameters such as num_workers, pin_memory if we use cuda
                 )


In [78]:
from tqdm.notebook import tqdm 
# lets any process or loop show smart progress meter

def sum_train(model, device, train_loader, optimizer, epoch): 
  # function to train the model

    model.train() 
    # tell the model that we are training the model

    pbar = tqdm(sum_train_loader) 
    # create progress bar for training

    for batch_idx, (data, target) in enumerate(pbar): 
      # enumerate all batches
      # batch_idx = batch number
      # data = x_train
      # target = y_train
        data = data.type(torch.FloatTensor)
        target = target.type(torch.FloatTensor)
        data, target = data.to(device), target.to(device) 
        # send x_train and y_train to gpu if available else to cpu

        optimizer.zero_grad() 
        # set the gradients to zero before starting to do backpropragation
        # zero_grad clears old gradients from the last step (otherwise you’d just accumulate the gradients from all loss.backward() calls).
        output = model(data) 
        # pass data to model for forward pass

        loss = F.mse_loss(output, target) 
        # calaculate loss function
        # mse_loss = mean squared error loss
        # output = prediction by model
        # target = real value

        loss.backward() 
        # computes the derivative of the loss w.r.t. the parameters (or anything requiring gradients) using backpropagation.

        optimizer.step() 
        # causes the optimizer to take a step based on the gradients of the parameters.

        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}') 
        # format how progress bar should look and what should it display



In [79]:
sum_optimizer = optim.Adam(# Implements Adam optimizer for sparse gradients on noisy problems
                      
                      sum_model.parameters(), 
                      # fetches all the parameters from model

                      lr=0.001, 
                      # learning rate -> determines the step size at each iteration while moving toward a minimum of a loss function
                      ) 

for sum_epoch in range(1, 50): 
  #epoch means how many times entire dataset is passed to model for training

    sum_train(sum_model, device, sum_train_loader, sum_optimizer, sum_epoch) 
    # train model defined by us in gpu if available, else cpu
    # train loader loads mnist dataset, optimizer used here is SGD, epoch is 1

HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))






HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




In [80]:
# function to create tensors for testing
# parameters are - number of training instances, number of numbers to add, largest allowed number
def random_sum_tensors(n, nos, largest):
  # create empty lists
  X, y = list(), list()
  for i in range(n):
    # create input numbers list
    # list will contain number of numbers to add
    in_pattern = [randint(1,largest) for _ in range(nos)]
    # create label for those inputs i.e., sum of above random numbers
    out_pattern = sum(in_pattern)

    # append each instance in above created list
    # inputs in X
    # expected outputs in y
    X.append(in_pattern)
    y.append(out_pattern)

  # format as NumPy arrays
  X,y = np.array(X), np.array(y)

  # normalize
  X = X.astype('float') / float(largest * nos)
  y = y.astype('float') / float(largest * nos)
  # return X, y as tensors
  return torch.tensor(X), torch.tensor(y)

# covert normalized value to original value
def invert(value, nos, largest):
  # move the value to cpu, detach the grad calculation and then convert to numpy
  value = value.cpu().detach().numpy()
  return round(value * float(largest * nos))

In [81]:
from math import sqrt
# import mean_squared_error function to calculate errors for test set
from sklearn.metrics import mean_squared_error

# generate test set using random_sum_tensors() function
X, y = random_sum_tensors(n, nos, largest)
# convert X to FloatTesnor
X = X.type(torch.FloatTensor)
# move X,y to cuda if available
X, y = X.to(device), y.to(device)

# pass the input to model and get predictions
result = sum_model(X)

# calculate error
expected = [invert(x, nos, largest) for x in y]
predicted = [invert(x, nos, largest) for x in result[:,0]]
rmse = sqrt(mean_squared_error(expected, predicted))
print('RMSE: %f' % rmse)

RMSE: 1.844451


In [104]:
label = 0
image_prediction = 1
addition_prediction = 1
actual_output = 2
c = 0
#while int(label) != image_prediction or addition_prediction != actual_output:
c += 1
# load single image as input for evaluating the model
images = next(iter(mnist_train_loader))[0]
image_labels = next(iter(mnist_train_loader))[1]
# random index to access the image from a batch
index = randint(1,127)

# access a single image and its label
image, label = images[index], image_labels[index]
# second input for our model - random number
random_number = randint(1,9)

# actual output
actual_output = int(label) + random_number

# reshape image to pass to our model
image = image.reshape(1,1,28,28).to(device)

# pass image to mnist model
image_prediction = mnist_model(image)

# find index with largest confidence, giving the output
image_prediction = torch.argmax(image_prediction).item()

# create input of sum model - predicted number, random number
X= [image_prediction, random_number]
X = np.array(X)
  # normalize
X = X.astype('float') / float(largest * nos)

# pass input to sum model
addition_prediction = sum_model(torch.tensor(X).type(torch.FloatTensor).to(device))

# denormalize/invert normalization
addition_prediction = invert(addition_prediction[0], nos, largest)
print("MNIST Model expected output is : ", int(label))
print("MNIST Model output is : ", image_prediction)
print("Sum Model expected Output is : ", addition_prediction)
print("Sum Model output is : ",actual_output)



MNIST Model expected output is :  4
MNIST Model output is :  4
Sum Model expected Output is :  10
Sum Model output is :  11
