In [158]:
import torch
from torch import empty
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fcb6850f390>

In [159]:
class Module(object) :
    
    def forward (self , *input) :
        raise NotImplementedError
    def backward (self , *gradwrtoutput):
        raise NotImplementedError

    def param(self):
        return []

In [160]:
class ReLU(Module):
    
    def __init__(self):
        self.input = None
        return
        
    def forward(self, input) :
        self.input = input
        return input.clamp(min=0)
    def backward(self, grad_output):
        assert(self.input is not None)
        assert(grad_output.size == self.input.size)
        
        grad_input = grad_output.clone()
        grad_input[self.input < 0] = 0
        return grad_input
        
    __call__ = forward

In [162]:
class Tanh(Module):
    
    def forward(self, input):
        return torch.tanh(input)
    def backward (self, *gradwrtoutput):
        raise NotImplementedError

In [315]:
class LossMSE(Module):
    def __init__(self):
        return
    
    def forward(self, input, target):
        """
        Compute the mean square error loss between 
        the input tensor and the target tensor.

        Args:
        input -- tensor of size (N, *)
        target -- tensor of size (N, *)

        Returns:
        loss -- mse loss between input and target, loss = 
        """
    
        assert(input.size() == target.size())

        N = input.size(0)
        loss = (target-input).pow(2).sum()
        loss = 1/(2*N) * loss
        return loss
    
    def backward(self, input, target):
        assert(input.size() == target.size())
        N = input.size(0)
        return 1/N * (input-target)

In [340]:
class Linear(Module):
   

    def __init__(self, in_features, out_features, bias=True):
        mean = 0
        std = 1
        self.in_features = in_features
        self.out_features = out_features
        self.params = []
        
        self.weight = torch.empty(size=(out_features, in_features)).normal_(mean=mean, std=std)
        self.dw = torch.zeros(size=(out_features, in_features))
        self.params.append((self.weight, self.dw))
        if bias:
            self.bias = torch.empty(out_features).normal_(mean=mean, std=std)
            self.db = torch.zeros(out_features)
            self.params.append((self.bias, self.db))
        else:
            self.bias = None
            self.db = None
        
      
        
        
    def forward(self, input):
        """
        Forward the input data by applying a linear transformation on it.
        
        Args:
        input -- tensor of size (N, *, in_features)
        
        Returns:
        output -- tensor of size (N, *, out_features),  output = input @ weight.T + bias
        """
        
        assert(input.size(-1) == self.in_features)
        
        self.input = input.clone() # Required information for the backward pass.
        
        output = input @ self.weight.T
        if self.bias is not None:
            output += self.bias
        return output
        
    def backward(self, grad_output):
        """
        Compute the derivative w.r.t. the input of the layer
        given the derivate w.r.t. to the output of the layer.
        
        Args:
        grad_output -- tensor of size (N, *, out_features)
        
        Returns 
        grad_input -- tensor of size (N, * , in_features), grad_input = grad_output @ weight
        """

        assert(grad_output.size(-1) == self.out_features)
        
        grad_input = grad_output @ self.weight
        
        if self.bias is not None:
            self.db += grad_output.sum(axis=0)
        self.dw += grad_output.T @  self.input
        
        return grad_input
       
    def zero_grad(self):
        """
        Sets the gradient w.r.t. the parametes to zero i.e.
        """
        
        self.dw = torch.zeros(size=(self.out_features, self.in_features))
        
        if self.bias is not None:
            self.db = torch.zeros(self.out_features)
        return
    
    def param(self):
        """
        Returns:
        params -- a list of pairs, each composed of a parameter tensor, 
        and a gradient tensor of same size.
        """
        
        # We just return a copy as we don't want the user
        # to be able to change the params of the model through this method.
        params = self.params.clone() 
        return params 
    

    def update_params(self, step_size):
        """
        Update the parameters of the linear layer going 
        in the opposite direction of the gradient.
        
        Args:
        step_size -- the size of an update step
        """
        
        self.weight -= step_size * self.dw
        if self.bias is not None:
            self.bias -= step_size * self.db
        return

In [161]:
class Sequential(Module):
    
    def __init__(self, *modules):
        

SyntaxError: unexpected EOF while parsing (<ipython-input-161-6d202781e7a6>, line 2)

# Little example with 1 linear Layer

In [342]:
linearLayer = Linear(2,3, bias=True)
loss_module = LossMSE()

#generate data
x = torch.empty(10,2).normal_()
real_w = torch.empty(3,2).normal_()
real_b = torch.empty(3).normal_()
y = x @ real_w.T + real_b
print(real_w)
print(real_b)

tensor([[-0.6002,  0.4820],
        [-0.0446,  1.4544],
        [-0.2405, -0.0255]])
tensor([ 0.1009, -1.1129,  0.3157])


In [345]:
linearLayer.param()[0][0][0] = 10

In [346]:
linearLayer.weight

tensor([[10.0000, 10.0000],
        [-0.9219, -0.3727],
        [ 1.2722, -0.0885]])

In [337]:
#train model
epoch = 1000
for e in range(epoch):
    output = linearLayer.forward(x)
    loss = loss_module.forward(output, y)
    if e%100 == 0:
        print("weight: {0}".format(linearLayer.weight))
        print("bias: {0}".format(linearLayer.bias))
        print("loss: {0}".format(loss))
    dloss = loss_module.backward(output, y)
    linearLayer.backward(dloss)
    linearLayer.update_params(0.1)
    linearLayer.zero_grad()

weight: tensor([[ 0.6695,  0.6480],
        [-0.7930,  0.1627],
        [-1.5000,  1.4641]])
bias: tensor([0.6385, 1.8439, 0.5255])
loss: 1.429478822638497e-12
weight: tensor([[ 0.6695,  0.6480],
        [-0.7930,  0.1627],
        [-1.5000,  1.4641]])
bias: tensor([0.6385, 1.8439, 0.5255])
loss: 1.429478822638497e-12
weight: tensor([[ 0.6695,  0.6480],
        [-0.7930,  0.1627],
        [-1.5000,  1.4641]])
bias: tensor([0.6385, 1.8439, 0.5255])
loss: 1.429478822638497e-12
weight: tensor([[ 0.6695,  0.6480],
        [-0.7930,  0.1627],
        [-1.5000,  1.4641]])
bias: tensor([0.6385, 1.8439, 0.5255])
loss: 1.429478822638497e-12
weight: tensor([[ 0.6695,  0.6480],
        [-0.7930,  0.1627],
        [-1.5000,  1.4641]])
bias: tensor([0.6385, 1.8439, 0.5255])
loss: 1.429478822638497e-12
weight: tensor([[ 0.6695,  0.6480],
        [-0.7930,  0.1627],
        [-1.5000,  1.4641]])
bias: tensor([0.6385, 1.8439, 0.5255])
loss: 1.429478822638497e-12
weight: tensor([[ 0.6695,  0.6480],
    

In [338]:
print(real_w)
print(real_b)

tensor([[ 0.6695,  0.6480],
        [-0.7930,  0.1627],
        [-1.5000,  1.4641]])
tensor([0.6385, 1.8439, 0.5255])
