In [468]:
import numpy as np

In [1105]:
class Layer():
    
    def __init__(self,no_of_neurons,previous_layer=None,activation=None):
        # number of neurons in current layer
        self.n = no_of_neurons
        # layer that sends input into this layer
        self.previous_layer = previous_layer
        # layer that recieves this layer's output
        self.next_layer = None
        if (self.previous_layer is not None):
            # sort the hierarchy
            previous_layer.next_layer = self
            # initialize the weights between the layers
            previous_layer.initialize_weights()
            # set the activation function
            self.set_activation(activation)
        else:
            # this is the input layer
            self.set_activation('none')
    
    def initialize_weights(self):
        if (self.next_layer is not None):
            # weights
            self.weights = np.random.normal(0,0.1,(self.n,self.next_layer.n))
            # bias separately
            self.bias = np.random.normal(0,0.1,self.next_layer.n)
    
    def set_activation(self,func='ReLU'):
        if (func=='ReLU'):
            # set ReLU and the derivative
            self.func = ReLU
            self.deriv = dReLU
        elif (func=='none'):
            self.func = lambda x:x
            self.deriv = lambda x:1

    def feed_forward(self,z):
        # remember z and a for later
        self.z = z
        self.a = self.func(z)
        if (self.next_layer is not None):
            return self.next_layer.feed_forward(self.a @ self.weights + self.bias)
        else:
            return self.a
    
    def update_weights(self,learning_rate,error):
        self.weights -= self.weights*learning_rate*error
        self.bias -= self.bias*learning_rate*error
        
    def back_propagate(self,y=None,learning_rate=0.001):
        
        if (self.next_layer is None):
            self.error = self.a - y
            self.delta = self.error * self.deriv(self.z)
        else:
            thetaT = np.hstack((self.weights,self.bias[:,None])).T
            self.error = thetaT @ self.next_layer.delta
            self.delta = self.error * self.deriv(self.z)
            # there is something wrong here:
            #self.weights -= learning_rate * (self.a @ self.delta[:-1])
            #self.bias -= learning_rate * (self.a * self.delta[-1])
        if (self.previous_layer is not None):
            self.previous_layer.back_propagate(learning_rate=learning_rate)
        
def ReLU(x):
    # ReLU function
    return np.maximum(x,0)

def dReLU(x):
    # derivative of ReLU
    if (x<=0):
        return 0
    else:
        return 1

In [1106]:
a = Layer(2)

In [1107]:
b = Layer(2,a,activation='none')

In [1130]:
x = np.array([1,4])
y = np.array([x.sum(),3*x[0]+2*x[1]])
a.feed_forward(x)

array([nan, nan])

In [1129]:
for i in range(1000):
    b.back_propagate(y)
    a.feed_forward(x)



In [1127]:
for i in range(1000):
    x = np.random.random(size=2)
    y = np.array([x.sum(),3*x[0]+2*x[1]])
    a.feed_forward(x)
    b.back_propagate(y)

In [906]:
a.feed_forward([0.6,0.2])

array([6.68571747, 2.78078495])

In [461]:
a.bias

array([-0.0020577 ,  0.08637008])

In [462]:
a.weights

array([[-0.00404494,  0.00867371],
       [ 0.02071155, -0.00183865]])