In [1]:
import numpy as np
from numba import njit
from scipy.special import expit as sigmoid

In [2]:
@njit
def relu(x):
    return np.maximum(0,x)


@njit
def d_relu(output):
    return 1 * (output > 0)

@njit
def leaky_relu(x):
    return np.where(x > 0, x, 0.01*x)    


@njit
def d_leaky_relu(output):
    return np.where(output > 0, 1, 0.01)     


In [3]:
class activation_layer:
    def __init__(self, size):
        
        self.size = size
        self.prev_size = size
        self.params = 0
    
    
class relu_activation_layer(activation_layer):

    def gradient(self):
        return np.diag(d_relu(self.out).flatten())
    

    def forward(self, x):
        assert x.shape == (self.size, 1), f"input and layer size incompatible, {x.shape} passed"
        
        self.out = relu(x)
        self.gradient_to_prev = self.gradient()
        
        return self.out
    


class sigmoid_activation_layer(activation_layer):
    
    def gradient(self):
        return np.diag(self.out.flatten()*(1-self.out.flatten()))
    

    def forward(self, x):
        assert x.shape == (self.size, 1), f"input and layer size incompatible, {x.shape} passed"
        
        self.out = sigmoid(x)
        self.gradient_to_prev = self.gradient()
        
        return self.out

class leaky_relu__activation_layer(activation_layer):

    def gradient(self):
        return np.diag(d_leaky_relu(self.out).flatten())
    

    def forward(self, x):
        assert x.shape == (self.size, 1), f"input and layer size incompatible, {x.shape} passed"
        
        self.out = leaky_relu(x)
        self.gradient_to_prev = self.gradient()
        
        return self.out
    

In [4]:
relu_layer = relu_activation_layer(size = 3)
sigm_layer = sigmoid_activation_layer(size = 3)
l_relu_layer = leaky_relu__activation_layer(size = 3)

In [5]:
nodes = np.linspace(-1,1,3).reshape(-1,1)
nodes

array([[-1.],
       [ 0.],
       [ 1.]])

In [6]:
%%timeit
relu_layer.forward(nodes)

The slowest run took 14.15 times longer than the fastest. This could mean that an intermediate result is being cached.
7.04 µs ± 10.2 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%%timeit
sigm_layer.forward(nodes)

2.52 µs ± 30.1 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [8]:
%%timeit
l_relu_layer.forward(nodes)

  return np.where(x > 0, x, 0.01*x)


The slowest run took 13.24 times longer than the fastest. This could mean that an intermediate result is being cached.
8.3 µs ± 12 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


  return np.where(output > 0, 1, 0.01)


In [9]:
l_relu_layer.forward(nodes)

array([[-0.01],
       [ 0.  ],
       [ 1.  ]])