# Project 2 : Mini deep learning framework

The objective of this project is to design a mini "deep learning framework" using only pytorch's
tensor operations and the standard math library, hence in particular without using autograd or the
neural-network modules.

The framework should use no pre-existing neural-network python toolbox and only work with basic pytroch operations.

____________________________________________________________________

In [None]:
import math
import torch # but don't use nn

# autograd globally off
torch.set_grad_enabled(False)

In [None]:
class Module(object):
    
    def __init__(self):
        '''Constructor of the Module class.'''
        # attributes needed for all modules
        self.output = torch.Tensor() # output of module (after calling forward method)
        self.gradInput = torch.Tensor() # gradient with respect to input to model (result of backprop)
        self.type = str()
        
    def __call__(self, *inp, **kwargs):
        '''Makes layer callable like a function and directly returns the result of forward().'''
        return self.forward(*inp, **kwargs)
   
    def forward(self, *inp, **kwargs):
        '''should get for input, and returns, a tensor or a tuple of tensors.'''
        return self.output
        
    def backward(self, *gradwrtoutput):
        '''should get as input a tensor or a tuple of tensors containing the gradient of the loss
with respect to the module's output, accumulate the gradient wrt the parameters, and return a
tensor or a tuple of tensors containing the gradient of the loss wrt the module's input.'''
        return self.gradInput
    
    def zeroGrad(self):
        '''Sets all the gradients to zero.'''
        self.gradInput *= 0.
        if hasattr(self, 'weights'):
            self.gradWeights *= 0.
        if hasattr(self, 'biases'):
            self.gradBiases *= 0.
        
    def param(self):
        '''return a list of pairs, each composed of a parameter tensor, and a gradient tensor
of same size. This list should be empty for parameterless modules (e.g. ReLU)'''
        if hasattr(self, 'weights') and hasattr(self, 'biases'):
            return {self.weights, self.biases}, {self.gradWeights, self.gradBiases}
        elif hasattr(self, 'weights'):
            return self.weights, self.gradWeights
        elif hasattr(self, 'biases'):
            return self.biases, self.gradBiases
        else : return []


Should implement at least the modules Linear (fully connected layer), ReLU, Tanh, Sequential
to combine several modules in basic sequential structure, and LossMSE to compute the MSE loss.

In [None]:
class Linear(Module):
    '''Fully connected linear layer.'''
    
    def __init__(self, in_features, out_features, bias=True):
        super(Linear, self).__init__()
        self.weights = torch.Tensor(size=(out_features, in_features))     
        self.gradWeights = torch.empty(size=(out_features, in_features))
        if bias:
            self.biases = torch.empty(size=(out_features, 1))
            self.gradBiases = torch.empty(size=(in_features, out_features))
        self.inp = torch.Tensor() # to keep track of past input values
        self.type = 'Linear'
            
    def forward(self, inp):
        self.output = torch.mm(self.weights, inp)
        if hasattr(self, 'biases'):
            self.output.add_(self.biases)
        self.inp = inp
        return self.output
    
    def backward(self, gradOutput): # feed it input and loss
        self.gradInput = torch.mm(self.weights.t(), gradOutput)
        self.gradWeights = torch.mm(gradOutput, self.inp.t())
        if hasattr(self, 'biases'):
            self.gradBiases = gradOutput
        return self.gradInput
    

In [None]:
class lossMSE(Module):
    '''Compute MSE loss.'''
    
    # can this be omitted since it's unchanged?
    def __init__(self):
        super(lossMSE, self).__init__()
        self.inp = torch.Tensor() # to keep track of past input values
        self.type = 'MSE loss'
        
    def forward(self, inp, target):
        '''Calculate MSE loss.'''
        self.output = sum((inp-target).pow(2))/(2*len(inp))
        self.inp = inp
        return self.output
    
    def backward(self, target):
        self.gradInput = self.inp-target
        return self.gradInput

In [None]:
class ReLU(Module):
    '''Rectifed linear unit activation layer.'''
    
    def __init__(self):
        super(ReLU, self).__init__()
        self.type = 'ReLU'
        
    def forward(self, inp):
        self.output = inp.clone()
        self.output[self.output<0] = 0 # kill all negative terms
        return self.output
    
    def backward(self, gradOutput):
        self.gradInput = gradOutput.clone()
        self.gradInput[gradOutput<0] = 0
        self.gradInput[gradOutput>0] = 1
        return self.gradInput

In [None]:
class Sigmoid(Module):
    '''Sigmoid activation layer.'''
    
    def __init__(self):
        super(Sigmoid, self).__init__()
        self.type = 'Sigmoid'
    
    @staticmethod  # can be used without creating a sigmoid object
    def sigmoidFct(inp):
        '''Sigmoid function.'''
        return math.exp(inp)/(1. + math.exp(inp))
        
    def forward(self, inp):
        self.output = sigmoidFct(inp)
        return self.output
    
    def backward(self, gradOutput):
        # apply derivative of sigmoid
        self.gradInput = sigmoidFct(gradOutput)*(1.-sigmoidFct(gradOutput)) 
        return self.gradInput

In [None]:
class Sequential(Module):
    '''Container to store several layers sequentially.'''
    
    def __init__(self, *args):
        super(Sequential, self).__init__()
        self.modules = []
        self.size = 0
        self.type = 'Sequential containter'
        for arg in args:
            self.add(arg)
        print(self)
            
    def __str__(self):
        string = 'New neural net\n'
        for ind, module in enumerate(self.modules):
            string += '   Layer ' + str(ind) + ': ' + module.type + '\n'
        return string
    
    def add(self, module, index=None):
        '''Add new layer at position index. By default is added as new last layer.'''
        if index == None: index = self.size
        if index < 0 or index > self.size:
            raise ValueError('Supplied index is out of range for number of modules in this sequence.')
        self.modules.insert(index, module)
        self.size += 1
        # check if module was added as first or last layer
        if index == self.size:
            self.output = self.modules[-1].output
        if index == 0:
            self.gradInput = self.modules[0].gradInput
            
    def forward(self, inp):
        temp = inp.clone()
        for module in self.modules:
            temp = module(temp) # feed forward loop
            module.output = temp
        self.output = temp
        return self.output
    
    def backward(self, gradOutput):
        temp = gradOutput.clone()
        for module in reversed(self.modules):
            temp = module.backward(temp) # backpropagation
            module.gradInput = temp
        self.gradInput = temp
        return self.gradInput
            

### Testing the modules 

In [None]:
secondSeq = Sequential(Linear(2, 1), ReLU(), Sigmoid(), lossMSE())
print(secondSeq.size)
print(secondSeq.modules)
print(secondSeq.modules[0].param())
tripSeq = Sequential()

In [None]:
relutest = ReLU()
inp = torch.Tensor([3., 2., 1., 0., -1.])
print(inp)
print(relutest(inp))
print(relutest.backward(inp))
print(relutest.param())

In [None]:
mseTest = lossMSE()
inp = torch.full((2, 1), 6.)
target = torch.full((2, 1), 1.)
print(mseTest(inp, target))
print(mseTest.backward(target))
print(mseTest.param())

In [None]:
mseTest = lossMSE()

inp = torch.full((2, 1), 2.)
target = torch.full((5, 1), 1.)

linTest = Linear(len(inp), len(target))

pred = linTest(inp)
print(pred)
print(target)
loss = mseTest(pred, target)
print(loss)
linBackward = linTest.backward(mseTest.backward(target))

# todo : check this makes sense!!