In [1]:
import numpy as np 

In [2]:
class ReLU:
    def __init__(self) -> None:
        pass
    def forward (self, x):
        """
            Y = ReLU(x)
            
            parameters:
                * x: input of relu
                    * size: (batch_size, in_dim)
                    
            output:
                * y: y = relu(x)
                    * size: (batch_size, in_dim) 
        """
        return np.maximum(0,x)
    
    def backward (self, x):
        """
            dY/dx 
            
            parameters:
                * X: input of relu
                    * size: (batch_size, in_dim)
                    
            outputs:
                * dy: dy/dx
                    * size: (batch_size, in_dim)
        """
        dy = np.array(x, dtype=float)
        dy[dy>0]=1 
        dy[dy==0]=0.5
        dy[dy<0]=0
       
        return dy
    
    def __repr__(self):
        return f"ReLU"

In [3]:
relu = ReLU()
print(relu)

ReLU


# Linear Layer

In [4]:
class LinearLayer:
   
   def __init__(self,in_dim,out_dim, act_func) -> None:
      self.weight = np.random.randn(out_dim , in_dim+1) * np.sqrt(2/(in_dim +out_dim ))
      self.act_func = act_func
      self.in_dim = in_dim
      self.out_dim = out_dim
      self.score = np.zeros (out_dim)
      self.activated = np.zeros (out_dim)
      
      
   def forward (self, x):
      x_b = np.ones((x.shape[0], self.in_dim+1))
      x_b[: , :self.in_dim] = x
      
      self.score = np.dot(self.weight , x_b.T).T
      self.activated = self.act_func.forward(self.score)
      return self.activated
   
   def backward (self, dloss_da , activated_prev):
      
      da_ds = self.act_func.backward(self.score)
      
      dloss_ds = np.multiply (dloss_da, da_ds)
      
      ds_dw = activated_prev
      
      dloss_dw = np.dot (dloss_ds.T, ds_dw)
      
      dloss_da_prev = np.dot (dloss_ds,self.weight)
      
      dloss_da_prev = dloss_da_prev[:, :-1]
      
      return dloss_dw, dloss_da_prev
   
   def update(self,dloss_dw, lr):
      self.weight = self.weight - np.multiply(lr, dloss_dw)
   
   def __repr__(self):
      return f"LinearLayer(in_dim={self.in_dim}, out_dim={self.out_dim}, activation={self.act_func})"


In [5]:
X = np.array([
    [-10, 5, 0, -5, 10],
    [4, -1, 0, 1, -4],
    [-5 , -3, 0, 3, 6]
], dtype=np.float64)

X.shape, X.dtype

((3, 5), dtype('float64'))

In [6]:
relu = ReLU()
linear_layer = LinearLayer(
    in_dim = 5,
    out_dim = 4,
    act_func = relu,
)


In [7]:
print(linear_layer)

LinearLayer(in_dim=5, out_dim=4, activation=ReLU)


## Test `.forward()`

In [35]:
linear_layer.forward(X).shape

(3, 4)

## Test `.backward()`

In [36]:
activated_prev = np.array([
    [-10, 5, 0, -5, 10],
    [4, -1, 0, 1, -4],
    [-5 , -3, 0, 3, 6]
], dtype=np.float64)

dloss_da = np.array([
    [-10, 5, 0, 2],
    [4, -1, 1, 3],
    [-5 , -3, 6, 4]
], dtype=np.float64)

In [37]:
linear_layer.backward(dloss_da, activated_prev)

(array([[ 16.,  -4.,   0.,   4., -16.],
        [-35.,  34.,   0., -34.,  32.],
        [  0.,   0.,   0.,   0.,   0.],
        [ -8., -15.,   0.,  15.,  12.]]),
 array([[-0.9935543 , -0.35606727,  5.03234059,  1.24950395,  0.54719047],
        [ 1.79389205, -0.62453631,  1.43614817,  2.37924421, -0.47955066],
        [ 0.16321562,  0.60394208, -2.36083875,  1.98477711, -0.97724301]]))

# MLP

In [38]:
class MLP:
    def __init__ (self,n_nodes, act_func_list, lr):
        self.n_nodes = n_nodes
        self.act_func_list = act_func_list
        self.lr = lr
        self.layers = self.get_layers()
        
        
    def get_layers(self):
        layers = []
        for i in range(len(self.act_func_list)):
            in_dim = self.n_nodes[i]
            out_dim = self.n_nodes[i+1]
            act_func = self.act_func_list[i]
            
            layer = LinearLayer(in_dim ,out_dim,act_func)
            layers.append(layer)
                
        return layers
   
    def forward(self,x):
        y_current = x
        for i in range (len(self.layers) ):
           y_next = self.layers[i].forward(y_current)
           y_current = y_next
           
        return y_current
    
    def backward(self, dloss_dy):
        dloss_da = dloss_dy
        for i in reversed(range(len(self.act_func_list))):
            a_prev = self.layers[i-1].activated
            
            dloss_dw, dloss_da_prev = self.layers[i].backward(dloss_da , a_prev)
            dloss_da = dloss_da_prev
            
            self.layers[i].update(dloss_dw,self.lr)
    
    def __repr__ (self):
        mess = ''
        for i in range (len(self.act_func_list)):
            # self.layers[i]
            # mess= mess + f"{i}: LinearLayer(in_dim={self.n_nodes[i]}, out_dim={self.n_nodes[i+1]},activation={self.act_func_list[i]})\n"
            mess = mess + f"{i}: {self.layers[i]}\n"
        return mess
    
    

In [41]:
for i in reversed(range(10)):
    print(i)

9
8
7
6
5
4
3
2
1
0


In [24]:
n_nodes = [128, 64, 40 ,16 ,8, 3]
relu = ReLU()
act_func_list = [relu , relu ,relu,relu,relu]
lr = 2.5

In [25]:
mlp = MLP(n_nodes , act_func_list, lr)

In [26]:
print (mlp)

0: LinearLayer(in_dim=128, out_dim=64, activation=ReLU)
1: LinearLayer(in_dim=64, out_dim=40, activation=ReLU)
2: LinearLayer(in_dim=40, out_dim=16, activation=ReLU)
3: LinearLayer(in_dim=16, out_dim=8, activation=ReLU)
4: LinearLayer(in_dim=8, out_dim=3, activation=ReLU)



## Test `.forward()`

In [27]:
X = np.random.rand(4, 128)
X.shape

(4, 128)

In [28]:
Y = mlp.forward(X)

Y.shape

(4, 3)

In [29]:
mlp.layers

[LinearLayer(in_dim=128, out_dim=64, activation=ReLU),
 LinearLayer(in_dim=64, out_dim=40, activation=ReLU),
 LinearLayer(in_dim=40, out_dim=16, activation=ReLU),
 LinearLayer(in_dim=16, out_dim=8, activation=ReLU),
 LinearLayer(in_dim=8, out_dim=3, activation=ReLU)]

In [45]:
ReLU()

ReLU

In [44]:
!python ./test.py

ReLU


Traceback (most recent call last):
  File "e:\UT\Summer 02\Deep Learning (Reshad)\HWs\01\codes\test.py", line 8, in <module>
    relu(relu.forward(np.random.randn(3, 4)))
TypeError: 'ReLU' object is not callable
