In [2]:
import numpy as np

def activation(z):
    return np.tanh(z)

def derivative_activation(z):
    return 1 - np.tanh(z)**2

class Neuron:

    def __init__(self, activation, derivative_activation, alpha, num):
        self.W = np.random.random(num) # num is the number of neurons in previous layer or input vector size
        self.b = np.random.random()
        self.alpha = alpha # learning rate
        self.loss = 0
        self.X = []
        self.grad = 0
        self.activation = activation 
        self.derivative_activation = derivative_activation 

    def __repr__(self):
        return f'Weights={self.W} Bias={self.b} loss={self.loss}'
    
    def forward(self, X):
        self.X = X # X is a vector of size num
        self.loss = 0
        self.dl_dw = np.zeros(len(X))
        self.dl_db = 0
        z = np.dot(self.W,self.X) + self.b 
        self.grad = self.derivative_activation(z)
        return self.activation(z) 

    def update_loss(self, loss):
        self.loss += loss
        
    def backward(self): 
        self.dl_dw = self.loss * self.grad * self.X  
        self.dl_db =  self.loss * self.grad          
        return self.loss * self.grad * self.W 
    
    def update_weights(self):
        self.W = self.W - self.alpha * self.dl_dw
        self.b = self.b - self.alpha * self.dl_db
    

In [40]:
from joblib import Parallel, delayed

layer1 = [Neuron(activation, derivative_activation, 0.1, 2) for i in range(2)]
layer2 = [Neuron(activation, derivative_activation, 0.1, 2) for i in range(2)]
layer3 = [Neuron(activation, derivative_activation, 0.1, 2)]
network = [layer1, layer2, layer3]

X_input = np.array([[0,0],[0,1],[1,1],[1,0]])
Y_label = np.array([0,1,0,1])
prev_loss = 0
iters = 5000
epsilon = 0.00000001
    
for it in range(iters):
    
    for i,xinput in enumerate(X_input):

        nlen = len(network)
        X = xinput
        for layer in range(nlen):
            a_in = Parallel()(delayed(lambda neuron: neuron.forward(X))(network[layer][i]) for i in range(len(network[layer])))
            X = np.array(a_in)

        Y = Y_label[i]
      
        del_loss = sum(2 * (Y-X) * (-1))   
        loss = [del_loss for i in range(len(network[-1]))] # make this a vector with output layer's size
        loss_value = sum(np.power((Y-X),2))
        if loss_value < epsilon/1000 and abs(loss_value-prev_loss) < epsilon:
            prev_loss = loss_value
            break
        prev_loss = loss_value

        for layer in range(nlen-1,-1,-1):
            Parallel()(delayed(lambda neuron, unit_loss: neuron.update_loss(unit_loss))(network[layer][i], loss[i]) for i in range(len(network[layer])))
            loss_dim = len(network[layer-1]) if layer > 0 else len(X_input)
            loss = np.zeros(loss_dim)
            parallel_loss = Parallel()(delayed(lambda neuron: neuron.backward())(network[layer][i]) for i in range(len(network[layer])))
            loss = np.sum(parallel_loss, axis=0)

        Parallel()(delayed(lambda neuron: neuron.update_weights())(network[layer][i]) for layer in range(nlen) for i in range(len(network[layer])) )

print('Final Loss=',prev_loss)

Final Loss= 4.13952524789251e-05


In [34]:
def predict(X):
    nlen = len(network)
    for layer in range(nlen):
        a_in = list()
        for nrn in range(len(network[layer])):
            val = network[layer][nrn].forward(X)
            a_in.append(val)
        X = np.array(a_in)
    return X

In [39]:
for X in X_input:
    print(X, np.round(abs(predict(X))))

[0 0] [0.]
[0 1] [1.]
[1 1] [0.]
[1 0] [1.]
