In [1]:
# Imports
import torch
from torch import nn
from torch.nn import functional as F

# Prologue
import dlc_practical_prologue as prologue

In [3]:
#switching off autograd globally
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x1f245974a30>

# 1. Modules

## Baseclass

In [5]:
class Module(object):
    """
    Base class for all modules.
    """

    def forward(self, input_):
        """
        Function to get the input, apply forward pass of module and
        returns a tensor or a tuple of tensors..
        """
        raise NotImplementedError

    def backward(self, gradswrtoutput):
        """
        Function to get input the gradient of the loss with respect to the
        module’s output, accumulate the gradient wrt the parameters, and
        return a tensor or a tuple of tensors containing the gradient of
        the loss wrt the module’s input.
        """
        raise NotImplementedError

    def param(self):
        """
        Returns a list of pairs, each composed of a parameter tensor, and
        a gradient tensor of same size.
        """
        return []

## Activation Functions

### TanH

In [6]:
class TanH(Module):
    """Module to apply the hyperbolic tangent function"""

    def __init__(self):
        super().__init__()

    def forward(self, input_: torch.Tensor) -> torch.Tensor:
        """Returns the tensor after applying tanh to the input and saves the
        input to help in backward pass computation.

        Parameters:
            input (Tensor): The tensor on which the tanh should be applied

        Returns:
            Tensor: The tensor obtained after applying the tanh on the input
        """
        self.inp = input_
        self.out_ = self.inp.tanh()
        return self.out_

    def backward(self, gradwrtoutput: torch.Tensor) -> torch.Tensor:
        """
        Returns the gradient of loss with respect to the input on applying tanh

        Parameters:
            gradientwrtoutput (Tensor): gradient with respect to the output

        Returns:
            Tensor: The gradient of the loss with respect to the input
        """
        return gradwrtoutput * (1 - self.out_.pow(2))

    def param(self):
        return []

### ReLU

In [19]:
class ReLU(Module):
    """Module to apply the Rectified Linear function"""

    def __init__(self):
        super().__init__()

    def forward(self, input_: torch.Tensor) -> torch.Tensor:
        """Returns the tensor after applying ReLU to the input and saves the
        input to help in backward pass computation.

        Parameters:
            input (Tensor): The tensor on which the ReLU should be applied

        Returns:
            Tensor: The tensor obtained after applying the ReLU on the input
        """
        self.inp = input_
        self.out_ = self.inp.clamp(min=0)
        return self.out_

    def backward(self, gradwrtoutput: torch.Tensor) -> torch.Tensor:
        """
        Returns the gradient of loss with respect to the input on applying ReLU

        Parameters:
            gradientwrtoutput (Tensor): gradient with respect to the output

        Returns:
            Tensor: The gradient of the loss with respect to the input
        """
        return gradwrtoutput * self.out_(max=1)

    def param(self):
        return []

### Sigmoid

In [20]:
# did not require in the assignment

class sigmoid(Module):
    """Module to apply the Sigmoid function"""

    def __init__(self):
        super().__init__()

    def forward(self, input_: torch.Tensor) -> torch.Tensor:
        """Returns the tensor after applying sigmoid to the input and saves the
        input to help in backward pass computation.

        Parameters:
            input (Tensor): The tensor on which the sigmoid should be applied

        Returns:
            Tensor: The tensor obtained after applying the sigmoid on the input
        """
        self.inp = input_
        self.out_ = self.inp.sigmoid()
        return self.out_

    def backward(self, gradwrtoutput: torch.Tensor) -> torch.Tensor:
        """
        Returns the gradient of loss with respect to the input on applying sigmoid

        Parameters:
            gradientwrtoutput (Tensor): gradient with respect to the output

        Returns:
            Tensor: The gradient of the loss with respect to the input
        """
        return gradwrtoutput * (self.out_ - self.out_**2)

    def param(self):
        return []

## Losses

### MSE

In [21]:
class MSELoss(Module):
    """Module to calculate the Mean Squared Error."""

    def __init__(self):
        super().__init__()

    def forward(self, input_: torch.Tensor,
                target: torch.Tensor) -> torch.Tensor:
        """Returns the MSE Loss between input_ and target

        Parameters:
            input_ (Tensor): First tensor to calculate the MSE.
            target (Tensor): Second tensor to calculate the MSE.

        Returns:
            Tensor: The Mean Squared Loss between input_ and target
            """
        self.error = target - input_
        self.out_ = self.error.pow(2).mean()
        return self.out_

    def backward(self, gradswrtoutput=1):
        """
        gradient of loss
        """
        return (gradswrtoutput * 2 * self.error)/self.error.size()[0]

## Optimizer

In [22]:
class Optimizer(object):
    """
    Base class for optimzers.
    """
    
    def step(self):
        raise NotImplementedError

### SGD

In [23]:
class SGD(Optimizer):
    """
    Module to perform Stochastic Gradient Descent
    """
    
    def __init__(self, params, lr=0.01):
        super().__init__()
        
        self.params = params
        self.lr = lr
        
    def step(self):
        """
        Function to perform the single optimization step
        
        Parameters
            params (list): List of the paramerters of the network
            lr (float): The learning rate of the network
        """
        
        for weight, gradient in self.params:
            if (weight is None) or (grad is None):
                # incase of activation function modules, skip them
                continue
            else:
                weight.add_(-self.lr*gradient)

## Layers

### Linear 

In [34]:
class Linear(Module):
    """
    Module that implements as linear maxtrix operation layer
    """
    
    def __init__(self, in_features, out_features, bias = True):
        """
        Initialises the layer by creating empty weight and bias tensors
        and Initialising them using Normal distribution.
        """
        super().__init__()
        
        self.in_features, self.out_features = in_features, out_features
        
        self.w = torch.empty(in_features, out_features)
        slef.gradw = torch.empty(in_features, out_features)
        
        if bias:
            self.b = torch.empty(out_features)
            self.gradb = torch.empty(out_features)
        else:
            self.b = None
            self.gradb = None
            
        self.initWeights()
        
    def initWeights(self):
        """
        Initialises the weight and bias parameters of the layer.
        """
        
        self.w.normal_()
        self.gradw.fill_(0)
        
        if slef.b is not None:
            self.b.normal_()
            self.gradb.fill_(0)
            
    def forward(self, input_):
        """
        Computes the forward pass of the layer by multiplying the input with weights and adding the bias
        """
        
        self.inp = input_
        
        if self.b in None:
            self.output = self.input.matmul(self.w)
        else:
            self.output = self.input.matmul(self.w).add(self.b)
            
        return self.output
    
    def backward(self, gradwrtoutput):
        """
        computes the gradient the weights and biases.
        """
        
        gradw = self.input.t().matmul(gradwrtoutput)
        self.gradw.add_(gradw)
        
        if self.b is not None:
            gradb = gradwrtoutput.sum(0)
            self.gradb.add_(gradb)
        gradient = gradwrtoutput.matmul(self.w.t())
        return gradient
    
    def param(self):
        """
        Return the paramerters of the layer
        """
        
        params = [(self.w, self.gradW)]
        if self.b is not None:
            params.append((self.b, self.gradb))
        return params
    
    def zero_grad(self):
        """
        Sets the gradient to zero
        """
        
        self.gradw.zero_()
        
        if self.b is not None:
            self.gradb.zero_()

### Sequential

In [None]:
class Sequential(Module):
    """
    Module to hold the layers and build the Network
    """
    
    def __init__(self, *args):
        super().__init__()
        
        # A list to hold all layers of the network
        self.modules = []
        
        for module in args:
            self.modules.append(module)
            
        def forward(self, input_):
            """
            DOCSTRING TBD
            """
            self.inp = input_
            # incase of no layers, the input itself is returned as output
            output = input_
            
            for module in self.modules:
                output = module.forward(output)
            
            self.output = output
            
            return self.output
        
        def backward(self, gradwrtoutput):
            """
            DOCSTRING TBD
            """
            
            for module in reversed(self.modules):
                gradwrtoutput = module.backward(gradwrtoutput)
            
            self.grad = gradwrtoutput
            
            return self.grad
        
        def param(self):
            """
            List of parameters of all modules
            """
            
            params = []
            for module in self.modules:
                params.extend(module.param())
            
            return params