In [14]:
from torch import FloatTensor, LongTensor, Tensor

In [19]:
class Module(object):
    
    def forward(self, *input):
        raise NotImplementedError
        
    def backward(self, *gradwrtoutput):
        raise NotImplementedError
        
    def param(self):
        return []

### ReLU Module

ReLU function: 
\begin{equation}
f(x) = max(0, x)
\end{equation}

the derivative of ReLU is

\begin{equation} 
f'(x)=
    \begin{cases}
      1, & \text{if}\ x>0 \\
      0, & \text{otherwise}
    \end{cases}
\end{equation}

In [115]:
class ReLU(Module):
    
    def __init__():
        self.z = None
    
    def forward(self, *input_):
        self.z = input_.clone()
        input_[input_ < 0] = 0
        return input_
        
    def backward(self, *gradwrtoutput):
        da = gradwrtoutput
        tensor = self.z.clone()
        # g'(z)
        tensor[tensor > 0] = 1
        tensor[tensor <= 0] = 0
        # dz[l]
        return da.mul(tensor)
        
    def param(self):
        return []

### Tanh Module

In [114]:
class tanh(Module):
    
    def __init__():
        self.z = None
    
    def forward(self, *input_):
        self.z = input_
        return input_.tanh()
        
    def backward(self, *gradwrtoutput):
        da = gradwrtoutput
        # g'(z)
        g_prime = (1 - self.z.tanh().pow(2))
        # dz[l]
        return da.mul(tensor)
        
    def param(self):
        return []

### Linear Module TODO
fully connected layer

In [118]:
class Linear(Module):
    
    def __init__(self, init_weights, init_biases):
        self.w = init_weights
        self.b = init_biases
        self.z = None
        self.x_previous_layer = None
        # sum the gradient wrt w / b for each batch in these variables
        self.grad_w_sum = Tensor(w.size)
        self.grad_b_sum = Tensor(b.size())
        self.grad_w_sum.fill_(0)
        self.grad_b_sum.fill(0)
    
    def forward(self, *input_):
        self.x_previous_layer = input_
        self.z = w.mm(input_) + b
        return self.z
        
    def backward(self, *gradwrtoutput):
        dz = gradwrtoutput
        dw = dz.mm(x_previous_layer)
        db = dz
        # sum the gradients for the weights and biases
        self.grad_w_sum.add(dw)
        self.grad_b_sum.add(db)
        da_previous_layer = self.w.t().mm(dz)
        return da_previous_layer
        
    def param(self):
        # TODO
        return []

### Sequential Module TODO
to combine several modules in basic sequential structure

In [117]:
class Sequential(Module):
    
    def __init__(*input_):
        self.modules = input_
        
        
    def forward(self, *input):
        # TODO
        raise NotImplementedError
        
    def backward(self, *gradwrtoutput):
        # TODO
        raise NotImplementedError
        
    def param(self):
        # TODO
        return []

### MSE Loss Function TODO

In [None]:
class LossMSE(Module):
    
    def forward(self, *input):
        # TODO
        raise NotImplementedError
        
    def backward(self, *gradwrtoutput):
        # TODO
        raise NotImplementedError
        
    def param(self):
        # TODO
        return []

### testing

In [95]:
a = FloatTensor(5, 2)
a.normal_(0, 5)
b = FloatTensor(5, 2)
b.normal_(0, 5)


 3.8565 -4.1135
-2.1110  7.3292
-3.4445 -7.4141
-2.4421  5.9425
-0.2927  0.4942
[torch.FloatTensor of size 5x2]

In [96]:
print(a)
print(b)


 3.0222 -3.8155
-6.9493  2.2446
-5.4683 -2.5238
 5.4729  0.5047
-5.3991  2.7134
[torch.FloatTensor of size 5x2]


 3.8565 -4.1135
-2.1110  7.3292
-3.4445 -7.4141
-2.4421  5.9425
-0.2927  0.4942
[torch.FloatTensor of size 5x2]



In [97]:
x = ReLU().forward(a)
type(x)

torch.FloatTensor