In [1]:
import numpy as np

In [8]:
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initializing weights
        # self.w1_2_3_4 = np.random.random((self.input_size, self.hidden_size))
        self.w1_2_3_4 = [[1, 10],[1, 10]]
        # self.w5_6 = np.random.random((self.hidden_size, self.output_size))
        self.w5_6 = [[-40], [40]]

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, X):
        # Propagate inputs through the network
        self.z1_2 = np.dot(X, self.w1_2_3_4)        # 1 x 2
        self.h = self.sigmoid(self.z1_2)            # 1 x 2
        self.z3 = np.dot(self.h, self.w5_6)         # 1 x 1
        self.o = self.sigmoid(self.z3)              # 1 x 1
        return self.o

    def mse_loss(self, y_true, y_pred):
        # MSE loss
        return np.mean((y_true - y_pred) ** 2)

    def backward(self, X, y, y_pred, learning_rate):
        # chain rule
        dc_do1 = -2 * (y - y_pred)
        do1_dz3 = y_pred * (1 - y_pred)
        dz3_dw5_6 = self.h
        dc_dw5_6 = dc_do1 * do1_dz3 * dz3_dw5_6         # 1 x 2
        self.w5_6 = self.w5_6 + learning_rate * -dc_dw5_6.T # 2 x 1
        dc_dw1_2_3_4 = dc_do1 * do1_dz3 * np.dot(self.w5_6 * (self.h * (1 - self.h)).T,X)

        self.w1_2_3_4 = self.w1_2_3_4 + learning_rate * -dc_dw1_2_3_4.T

    def train(self, X_train, y_train, epochs, learning_rate):
        # for epoch in range(epochs):
        for epoch in range(epochs):
            for i in range(len(X_train)):
                # Forward pass
                y_pred = self.forward([X_train[i]])
                # Compute and print loss
                loss = self.mse_loss([y_train[i]], y_pred)
                # Backward pass
                self.backward([X_train[i]], [y_train[i]], y_pred, learning_rate)
            if np.mod(epoch,100)==0:
                print('epoch=',epoch, 'loss=',loss)

In [2]:
# Data Generation
X_train = np.random.randint(0, 2, (100,2))
y_train = (X_train[:,0]!=X_train[:,1]).astype(int)

In [5]:
X_train[1]

array([0, 0])

In [6]:
y_train[1]

0

In [9]:
mlp = MLP(input_size=2, hidden_size=2, output_size=1)

In [10]:
# training
mlp.train(X_train, y_train, epochs=1000, learning_rate=0.1)

epoch= 0 loss= 9.31341347885779e-06
epoch= 100 loss= 0.0008112620699512502
epoch= 200 loss= 0.0007272852413584552
epoch= 300 loss= 0.0007022283058618446
epoch= 400 loss= 0.0006910202559692084
epoch= 500 loss= 0.0006844206289513337
epoch= 600 loss= 0.0006794681182430631
epoch= 700 loss= 0.0006750247749540448
epoch= 800 loss= 0.0006706245447735783
epoch= 900 loss= 0.0006660830809736016


In [11]:
# prediction
test_input = np.array([[0, 0]])
predicted_output = mlp.forward(test_input)
print("Predicted Output:", test_input, predicted_output)
test_input = np.array([[1, 0]])
predicted_output = mlp.forward(test_input)
print("Predicted Output:", test_input, predicted_output)
test_input = np.array([[0, 1]])
predicted_output = mlp.forward(test_input)
print("Predicted Output:", test_input, predicted_output)
test_input = np.array([[1, 1]])
predicted_output = mlp.forward(test_input)
print("Predicted Output:", test_input, predicted_output)

Predicted Output: [[0 0]] [[0.01250288]]
Predicted Output: [[1 0]] [[0.96881318]]
Predicted Output: [[0 1]] [[0.97454612]]
Predicted Output: [[1 1]] [[0.03978315]]
