In [84]:
import numpy as np

def activation(z):
    return np.tanh(z)

def derivative_activation(z):
    return 1 - np.tanh(z)**2


In [71]:
import numpy as np

class Neuron:

    def __init__(self, activation, derivative_activation, alpha, num):
        self.W = np.random.random(num) # num is the number of neurons in previous layer or input vector size
        self.b = np.random.random()
        self.alpha = alpha # learning rate
        self.loss = 0
        self.X = []
        self.grad = 0
        self.activation = activation 
        self.derivative_activation = derivative_activation 

    def __repr__(self):
        return f'Weights={self.W} Bias={self.b} loss={self.loss}'
    
    def forward(self, X):
        self.X = X # X is a vector of size num
        self.loss = 0
        z = np.dot(self.W,self.X) + self.b 
        self.grad = self.derivative_activation(z)
        return self.activation(z) 

    def update_loss(self, loss):
        self.loss += loss
        
    def backward(self): 
        dl_dW = self.loss * self.grad * self.X  
        dl_db =  self.loss * self.grad          
        dl_dX = self.loss * self.grad * self.W 
        self.update_weights(dl_dW, dl_db)
        return dl_dX
    
    def update_weights(self, dl_dW, dl_db):
        self.W = self.W - self.alpha * dl_dW
        self.b = self.b - self.alpha * dl_db
    

In [94]:
class NeuralNetwork:

    def __init__(self, activation, derivative_activation, input_size, structure, learning_rate):
        self.network = []
        first_layer = [Neuron(activation, derivative_activation, learning_rate, input_size) for _ in range(structure[0])]
        self.network.append(first_layer)
        for i in range(1, len(structure)):
            layer_i = [Neuron(activation, derivative_activation, learning_rate, structure[i - 1]) for _ in range(structure[i])]
            self.network.append(layer_i)

    def train(self, X_input, Y_label, iters):
        final_loss = 0
        epsilon = 0.00000001
        n = len(self.network)
        for it in range(iters):
            for i, xinput in enumerate(X_input):
                Y_hat = self.predict(xinput)
                Y = Y_label[i]

                del_loss = sum(2 * (Y - Y_hat) * (-1))
                loss = [del_loss for i in range(len(self.network[-1]))]  # a vector with output layer's size
                loss_value = sum(np.power((Y - Y_hat), 2))
                if loss_value < epsilon / 1000 and abs(loss_value - prev_loss) < epsilon:
                    final_loss = loss_value
                    break
                final_loss = loss_value

                for layer in range(n-1, -1, -1):
                    Parallel()(delayed(lambda neuron, unit_loss: neuron.update_loss(unit_loss))(self.network[layer][ni], loss[ni]) for ni in range(len(self.network[layer])))
                    parallel_loss = Parallel()(delayed(lambda neuron: neuron.backward())(self.network[layer][ni]) for ni in range(len(self.network[layer])))
                    loss = np.sum(parallel_loss, axis=0)  # loss to be propagated to the previous layer.
        
        print(f'Training concluded with a loss of {final_loss}.')

    def predict(self, X):
        nlen = len(self.network)
        for layer in range(nlen):
            a_in = Parallel()(delayed(lambda neuron: neuron.forward(X))(self.network[layer][ni]) for ni in range(len(self.network[layer])))
            X = np.array(a_in)
        return X

In [95]:
network = NeuralNetwork(activation, derivative_activation, input_size=2, structure=[2,2,1], learning_rate=0.1)
# 3 layered network, [*] \-> [*] \
#                                 [*]
#                    [*] /-> [*] / 
X_input = np.array([[0,0],[0,1],[1,1],[1,0]])
Y_label = np.array([0,1,0,1])

network.train(X_input, Y_label, 5000)
for X in X_input:
    print(X, np.round(abs(network.predict(X))))

Training concluded with a loss of 4.783018253836828e-05.
[0 0] [0.]
[0 1] [1.]
[1 1] [0.]
[1 0] [1.]
