In [1]:
import math
import torch
from torch import empty
import numpy as np
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x1d45650fbc8>

In [9]:
a = np.arange(0,10,1)
for i, number in enumerate(a):
    print(a[-i-1])

9
8
7
6
5
4
3
2
1
0


In [3]:
a = torch.tensor([[2,3,4],[3,2,3],[2,2,2]])
b = torch.tensor([[1,1,1],[2,2,2],[1,1,1]])

print(a*b)
print(a.mul(b))
print(a.matmul(b))

tensor([[2, 3, 4],
        [6, 4, 6],
        [2, 2, 2]])
tensor([[2, 3, 4],
        [6, 4, 6],
        [2, 2, 2]])
tensor([[12, 12, 12],
        [10, 10, 10],
        [ 8,  8,  8]])


In [26]:
M = torch.empty(5,3,dtype=float).normal_(0,1)
print(M.size())
print(M.dtype)
print(M)
x = torch.empty([5,1],dtype=float).normal_(0,1)
print(x)
print(x.t().mm(M))

torch.Size([5, 3])
torch.float64
tensor([[ 1.4316,  1.3041,  0.5361],
        [-1.2588, -0.4268, -0.9840],
        [-0.9309,  0.6688, -1.0878],
        [-1.3049, -1.3529, -0.0602],
        [-0.7150,  1.0859,  0.6840]], dtype=torch.float64)
tensor([[ 1.4177],
        [ 1.4022],
        [ 0.4705],
        [ 0.0183],
        [-0.0564]], dtype=torch.float64)
tensor([[-0.1569,  1.4790, -1.1713]], dtype=torch.float64)


In [32]:
test = torch.empty(3,1)
print(test)
print(torch.zeros(3,1).type())
print(torch.zeros(3))
print(torch.zeros(3,1))

tensor([[ 0.0000e+00],
        [ 0.0000e+00],
        [-2.1604e+16]])
torch.FloatTensor
tensor([0., 0., 0.])
tensor([[0.],
        [0.],
        [0.]])


In [8]:
s = torch.empty(5,1).normal_(0,1)
print(s)
print((s>0))
print((s>0).float())


tensor([[ 0.5529],
        [-0.9859],
        [ 0.8265],
        [-0.0341],
        [ 2.0729]])
tensor([[ True],
        [False],
        [ True],
        [False],
        [ True]])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.]])


In [10]:
print(torch.zeros(5,2))
print(torch.zeros(4,1))
print(torch.zeros(4))

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.],
        [0.],
        [0.],
        [0.]])
tensor([0., 0., 0., 0.])


In [5]:
class Module() :
    def __init__(self):
        #???
        pass
    def forward(self, *input):
        raise NotImplementedError
    def backward(self,*gradwrtoutput):
        raise NotImplementedError
    def param(self):
        return []
    def zero_grad(self):
        pass 
    #From stackoverflow : Must use pass to have an empty statement otherwise problem.
    #Is it really useful to have it in Module? I think this is only used by Linear.
    

class Linear(Module):
    """
        One fully connected layer.
        Input : input_dimension, output_dimension [int]
        Those corresponds to the numbers of nodes in a given input layer
        and the number of nodes in the output layer
        Weights and Bias are initialized using the Xavier initialization
        see course 5.5 slide 14 (What is the gain here??)
    """
    def __init__(self,in_dim,out_dim):
        Module.__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        std = math.sqrt(2./(in_dim+out_dim))
        self.w = torch.empty(out_dim,in_dim).normal_(0,std)
        self.b = torch.empty(out_dim).normal_(0,std)
        self.dw = torch.zeros(out_dim,in_dim)
        self.db = torch.zeros(out_dim)
    
     def param(self):
        return torch.Tensor([self.w,self.dw,self.b,self.db])

    def zero_grad(self):
        """
            Function to set grad to zero. Used in gradient step
        """
        self.dw.zero_()
        self.db.zero_()
        
   

    def forward(self,x):
        """
            results of FW pass of layer, returns s = x*w + b
            using formula from Practical 03
        """
        s = self.w.mv(x)+self.b
        return s
    
    def backward(self,x,dl_ds):
        """
            Backward pass, see practical 03 and slide 9,11 of course 3.6
            Accumulates gradient according to chain rule, with mat product.
            dl_ds is the derivative of loss wrt to s = x*w + b
            dl_ds = dl_dx.mul(sigma'(s)) which is given to him the
            backward of activation functions.
        """
        self.dw.add_(dl_ds.view(-1, 1).mm(x.view(1, -1)))
        self.db.add_(dl_ds.view(-1))
        dl_dx_prev = self.w.t().mm(dl_ds)
        return dl_dx_prev
        
#ACTIVATION FUNCTIONS
class ReLU(Module):
    def __init__(self):
        Module.__init__(self)
    
    
    def forward(self,s):
        return torch.max(x,torch.zeros(x.size()))
    
    def backward(self,s,dl_dx):
        """
            Def of backprop : dl_ds = dl_dx * dsigma(s)
            dsigma for relu is f : (1, x>0, 
                                    0, x<0)
            this should work as it gives a logical tensor then into float
            not sure about dimensions.....
        """
        return dl_dx * (s>0).float()
        

class Tanh(Module):
    
    def __init__(self):
        Module.__init__(self)
        
    def forward(self,x):
        return x.tanh()
    
    def backward(self,s,dl_dx):
        """ dtanh = 1/cosh^2"""
        return dl_dx* (1/(torch.pow(torch.cosh(s),2)))
        
        
#LOSS         
class MSELoss(Module):
    """ SEE PRACTICAL 03 UPDATE DOC LATER"""
    def __init__(self):
        Module.__init__(self)
    
    def forward(self,x,t):
        return (x - t).pow(2).sum()
    
    def backward(self, x, t):
        #This is dl_dx. (dloss wrt to x for MSE loss is 2(x-t))
        return 2 * (x - t)
    # MAYBEWRONG UPDATE THIS LATER
    
class CrossEntropyLoss(Module):        
    def __init__(self):
        Module.__init__(self)
        
    def softmax(x):
        """
            Computes softmax with shift to be numerically stable for
            large numbers or floats takes exp(x-max(x)) instead of exp(x)
        """
            #this is really stablesoftmax(x)
            #rather than softamx(x)
        z = x- x.max()
        exps = torch.exp(z)
        return (exps/torch.sum(exps))
    
    def forward(self,x,t):
        """
            With pj = exp(aj)/sum(exp(ak))
            Loss = -Sum_j (yj) log(pj), 
            with t the target being the y in the formula
            and pj = softmax(x)_j
            log(p)*t does the element-wise product then we sum
        """
        p = self.softmax(x)
        sumResult = -torch.sum(torch.log(p)*t)
        return sumResult
    
    def backward(self,x,t):
        """
            computes dLoss 
            dl/dx_i = pi-yi from the slides
        """
        p = self.softmax(x)
        return p-t
        

IndentationError: unindent does not match any outer indentation level (<ipython-input-5-6596a7c7dac7>, line 36)