In [4]:
import numpy as np

# Activations

- ## ReLU

In [149]:
class ReLU:
    def __init__(self) -> None:
        pass
    def forward (self, x):
        """
            Y = ReLU(x)
            
            parameters:
                * x: input of relu
                    * size: (batch_size, in_dim)
                    
            output:
                * y: y = relu(x)
                    * size: (batch_size, in_dim) 
        """
        return np.maximum(0,x)
    
    def backward (self, x):
        """
            dY/dx 
            
            parameters:
                * X: input of relu
                    * size: (batch_size, in_dim)
                    
            outputs:
                * dy: dy/dx
                    * size: (batch_size, in_dim)
        """
        dy = np.array(x, dtype=float)
        dy[dy>0]=1 
        dy[dy==0]=0.5
        dy[dy<0]=0
       
        return dy
    

In [181]:
X = np.array([
    [-10, 5, 0, -5, 10],
    [4, -1, 0, 1, -4],
    [-5 , -3, 0, 3, 6]
], dtype=np.float64)

X.shape, X.dtype

((3, 5), dtype('float64'))

In [182]:
Y_expected = np.array([
    [0, 5, 0, 0, 10],
    [4, 0, 0, 1, 0],
    [0, 0, 0, 3, 6]
])

dY_expected = np.array([
    [0, 1, 0.5, 0, 1],
    [1, 0, 0.5, 1, 0],
    [0, 0, 0.5, 1, 1]
])

Y_expected.shape, dY_expected.shape

((3, 5), (3, 5))

In [183]:
relu = ReLU()

Y = relu.forward(X)

print(Y==Y_expected)
print(Y.shape)

[[ True  True  True  True  True]
 [ True  True  True  True  True]
 [ True  True  True  True  True]]
(3, 5)


In [184]:
dY = relu.backward(X) 

print(dY==dY_expected)
print(dY.shape)

[[ True  True  True  True  True]
 [ True  True  True  True  True]
 [ True  True  True  True  True]]
(3, 5)


- ## Leaky ReLU

In [205]:
class LeakyReLU:
    def __init__(self,alpha) -> None:
        self.alpha = alpha
        
    def forward (self, x):
        """
            Y = LeakyReLU(x)
            
            parameters:
                * x: input of leaky-relu
                    * size: (batch_size, in_dim)
                    
            output:
                * y: y = leaky-relu(x)
                    * size: (batch_size, in_dim) 
        """
        # return np.maximum(x , self.alpha*x)
        return np.maximum(x, np.multiply(self.alpha,x))         # Self.alpha * x does not calculate the multiplication accurately!!!!!!
    
    def backward (self, x):
        """
            dY/dx 
            
            parameters:
                * X: input of leaky-relu
                    * size: (batch_size, in_dim)
                    
            outputs:
                * dy: dy/dx
                    * size: (batch_size, in_dim)
        """
        dy = np.array(X, dtype=float)
        dy[dy>0]=1 
        dy[dy==0]=(1+self.alpha)/2
        dy[dy<0]=self.alpha
       
        return dy
    

In [1]:
Y_expected = np.array([
    [-1, 5, 0, -0.5, 10],
    [4, -0.1, 0, 1, -0.4],
    [-0.5, -0.3, 0, 3, 6]
])

dY_expected = np.array([
    [0.1, 1, 0.55, 0.1, 1],
    [1, 0.1, 0.55, 1, 0.1],
    [0.1, 0.1, 0.55, 1, 1]
])

NameError: name 'np' is not defined

In [222]:
X

array([[-10.,   5.,   0.,  -5.,  10.],
       [  4.,  -1.,   0.,   1.,  -4.],
       [ -5.,  -3.,   0.,   3.,   6.]])

In [219]:
alpha = 0.1
leaky_relu = LeakyReLU(alpha)

In [220]:
Y = leaky_relu.forward(X)

print(Y==Y_expected)
print(Y.shape)

[[ True  True  True  True  True]
 [ True  True  True  True  True]
 [ True False  True  True  True]]
(3, 5)


In [221]:
dY = leaky_relu.backward(X) 

print(dY==dY_expected)
print(dY.shape)

[[ True  True  True  True  True]
 [ True  True  True  True  True]
 [ True  True  True  True  True]]
(3, 5)


- ## Sigmoid

In [230]:
class Sigmoid:
    def __init__(self) -> None:
        pass
    def forward (self, x):
        """
            Y = Sigmoid(x)
            
            parameters:
                * x: input of Sigmoid
                    * size: (batch_size, in_dim)
                    
            output:
                * y: y = Sigmoid(x)
                    * size: (batch_size, in_dim) 
        """
        
        return 1/(1+np.exp(-x))
    
    def backward (self, x):
        """
            dY/dx 
            
            parameters:
                * X: input of Sigmoid
                    * size: (batch_size, in_dim)
                    
            outputs:
                * dy: dy/dx
                    * size: (batch_size, in_dim)
        """
        s=self.forward(x)
        
       
        return np.multiply(s,1-s)
    

In [231]:
sigmoid=Sigmoid()

In [232]:
sigmoid.forward(X)

array([[4.53978687e-05, 9.93307149e-01, 5.00000000e-01, 6.69285092e-03,
        9.99954602e-01],
       [9.82013790e-01, 2.68941421e-01, 5.00000000e-01, 7.31058579e-01,
        1.79862100e-02],
       [6.69285092e-03, 4.74258732e-02, 5.00000000e-01, 9.52574127e-01,
        9.97527377e-01]])

In [233]:
sigmoid.backward(X)

array([[4.53958077e-05, 6.64805667e-03, 2.50000000e-01, 6.64805667e-03,
        4.53958077e-05],
       [1.76627062e-02, 1.96611933e-01, 2.50000000e-01, 1.96611933e-01,
        1.76627062e-02],
       [6.64805667e-03, 4.51766597e-02, 2.50000000e-01, 4.51766597e-02,
        2.46650929e-03]])

- ## Tanh

In [249]:
class tanh:
    def __init__(self) -> None:
        pass
    def forward (self, x):
        """
            Y = tanh(x)
            
            parameters:
                * x: input of tanh
                    * size: (batch_size, in_dim)
                    
            output:
                * y: y = tanh(x)
                    * size: (batch_size, in_dim) 
        """
        
        return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
    
    def backward (self, x):
        """
            dY/dx 
            
            parameters:
                * X: input of tanh
                    * size: (batch_size, in_dim)
                    
            outputs:
                * dy: dy/dx
                    * size: (batch_size, in_dim)
        """
        tanh=self.forward(x)
        
       
        return 1 - np.power(tanh, 2)
    

- ## Softmax

In [272]:
class Softmax:
    def __init__(self) -> None:
        pass
    def forward (self, x):
        """
            Y = Softmax(x)
            
            parameters:
                * x: input of Softmax
                    * size: (batch_size, in_dim)
                    
            output:
                * y: y = Softmax(x)
                    * size: (batch_size, in_dim) 
        """
        exps = np.exp(x)
        # shiftx = x - np.max(x, axis=1)
        # exps = np.exp(shiftx)
        return exps/ np.sum(exps , axis=1)[:,np.newaxis]
    
    def backward (self, x):
        """
            dY/dx 
            
            parameters:
                * X: input of Softmax
                    * size: (batch_size, in_dim)
                    
            outputs:
                * dy: dy/dx
                    * size: (batch_size, in_dim)
        """
        softmax = self.forward(x)
        output = np.multiply(softmax , np.eye(x.shape[1]) - softmax.T)
        return output
    

In [273]:
softmax = Softmax()

In [274]:
Y = softmax.forward(X)

print(Y.shape)
print(Y)

(3, 5)
[[2.04726568e-09 6.69254707e-03 4.50940274e-05 3.03841167e-07
  9.93262053e-01]
 [9.30080183e-01 6.26683098e-03 1.70350128e-02 4.63059657e-02
  3.12007142e-04]
 [1.58700177e-05 1.17264451e-04 2.35531946e-03 4.73078561e-02
  9.50203690e-01]]


In [None]:
dY = softmax.backward(X)

print(dY.shape)
print(dY)

In [257]:
exps = np.exp(X)
exps

array([[4.53999298e-05, 1.48413159e+02, 1.00000000e+00, 6.73794700e-03,
        2.20264658e+04],
       [5.45981500e+01, 3.67879441e-01, 1.00000000e+00, 2.71828183e+00,
        1.83156389e-02],
       [6.73794700e-03, 4.97870684e-02, 1.00000000e+00, 2.00855369e+01,
        4.03428793e+02]])

In [268]:
np.sum(exps, axis=1)[:, np.newaxis].shape

(3, 1)

In [261]:
exps.shape

(3, 5)

In [264]:
exps / np.sum(exps, axis=1)

ValueError: operands could not be broadcast together with shapes (3,5) (3,) 

In [8]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])