In [31]:
import numpy as np

In [32]:
class MLP:
    
    def __init__(self, X, y, lr=0.1, no_of_epochs=100, input_layer=2, hidden_layer=2, output_layer = 1):
        self.X = X
        self.y = y
        self.lr = lr
        self.no_of_epochs = no_of_epochs 

        self.weight_I_H = np.random.uniform(size = (input_layer,hidden_layer))
        self.weight_H_O = np.random.uniform(size = (hidden_layer,output_layer))

        self.bias_I_H = np.random.uniform(size=(1,hidden_layer))
        self.bias_H_O = np.random.uniform(size=(1,output_layer))

        self.loss = []

    def forward_prop(self,batch):

        self.hidden_output = self.sigmoid(np.dot(batch , self.weight_I_H)+self.bias_I_H)
        self.output = self.sigmoid(np.dot(self.hidden_output , self.weight_H_O) + self.bias_H_O)
        return self.output
    def back_prop(self):
        error = self.y-self.output
        loss = (error**2)/2
        self.loss.append(np.sum(loss))
        grad1 = self.X.T @ (((error * self.sigmoid_derivative(self.output)) * self.weight_H_O.T) * self.sigmoid_derivative(self.hidden_output))
        self.weight_I_H += self.lr * grad1

        grad2 = self.hidden_output.T @ (error * self.sigmoid_derivative(self.output))
        self.weight_H_O += self.lr * grad2

        self.bias_I_H += np.sum(self.lr * ((error * self.sigmoid_derivative(self.output)) * self.weight_H_O.T) * self.sigmoid_derivative(self.hidden_output), axis=0)
        self.bias_H_O += np.sum(self.lr * error * self.sigmoid_derivative(self.output), axis=0)


    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        # return self.sigmoid(x) * self.sigmoid(1 - x)
        return x*(1-x)

    def predict(self, X):
        pred = self.forward_prop(X)
        return np.array([1 if i >= 0.5 else 0 for i in pred])

    def train(self):
        for epoch in range(self.no_of_epochs):
            self.forward_prop(self.X)
            self.back_prop()
            if epoch % 500 == 0:
                print("[Epoch]:",epoch,"Loss: ", self.loss[epoch])

In [33]:
X = np.array(
    [[0,0],
     [0,1],
     [1,0],
     [1,1]]
)

y = np.array(
    [[0],
     [1],
     [1],
     [0]]
)

In [34]:
xor_mlp = MLP(X, y, 0.2, 10000)
xor_mlp.train()

[Epoch]: 0 Loss:  0.7476381492229618
[Epoch]: 500 Loss:  0.4995034267485938
[Epoch]: 1000 Loss:  0.49703543074200407
[Epoch]: 1500 Loss:  0.47961469243904015
[Epoch]: 2000 Loss:  0.39846647786523637
[Epoch]: 2500 Loss:  0.3230153033332077
[Epoch]: 3000 Loss:  0.20914146425775515
[Epoch]: 3500 Loss:  0.04987298722527287
[Epoch]: 4000 Loss:  0.021985086473687458
[Epoch]: 4500 Loss:  0.013370930740062448
[Epoch]: 5000 Loss:  0.009424187201215814
[Epoch]: 5500 Loss:  0.00720887549691671
[Epoch]: 6000 Loss:  0.005805830692826646
[Epoch]: 6500 Loss:  0.00484359913485435
[Epoch]: 7000 Loss:  0.004145472634776795
[Epoch]: 7500 Loss:  0.003617313076632968
[Epoch]: 8000 Loss:  0.003204619050173335
[Epoch]: 8500 Loss:  0.002873759118168672
[Epoch]: 9000 Loss:  0.0026029062466384092
[Epoch]: 9500 Loss:  0.002377302832431112


In [35]:
preds = xor_mlp.predict(X)

for i in range(len(X)):
    print(X[i][0],'XOR',X[i][1],'=',preds[i])

0 XOR 0 = 0
0 XOR 1 = 1
1 XOR 0 = 1
1 XOR 1 = 0
