<a href="https://colab.research.google.com/github/sidhu2690/ai-from-scratch/blob/main/01_multi_layer_perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np

In [68]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
  s = sigmoid(x)
  return s * (1 - s)

In [69]:
def initialisation(input_size, hidden_size, output_size):
  w1 = np.random.randn(input_size, hidden_size)
  b1 = np.random.randn(1, hidden_size)

  w2 = np.random.randn(hidden_size, output_size)
  b2 = np.random.randn(1, output_size)

  return w1, b1, w2, b2

In [70]:
def forward_pass(x, w1, b1, w2, b2):
  z1 = x @ w1 + b1
  a1 = sigmoid(z1)

  z2 = a1 @ w2 + b2
  a2 = sigmoid(z2)

  return a2

In [71]:
w1, b1, w2, b2 = initialisation(2, 10, 1)

X = np.array([[0,0],[0,1],[1,0],[1,1]])
Y = np.array([[0],[1],[1],[0]])

for x in X:
  out = forward_pass(x, w1, b1, w2, b2)
  print(out)

[[0.36436158]]
[[0.41194022]]
[[0.3339375]]
[[0.37287611]]


Now let's make a MLP class

In [72]:
class MLP:
  def __init__(self, input_dim, hidden_dim, output_dim):

    self.w1 = np.random.randn(hidden_dim, input_dim)
    self.b1 = np.random.randn(hidden_dim, 1)

    self.w2 = np.random.randn(output_dim, hidden_dim)
    self.b2 = np.random.randn(output_dim, 1)

  def forward(self, x):

    self.z1 = self.w1 @ x + self.b1
    self.a1 = sigmoid(self.z1)

    self.z2 = self.w2 @ self.a1 + self.b2
    self.a2 = sigmoid(self.z2)

    return self.a2

  def backward(self, x, y, lr):

    dz2 = (self.a2 - y) * sigmoid_deriv(self.z2)
    dw2 = dz2 @ self.a1.T
    db2 = dz2

    dz1 = (self.w2.T @ dz2) * sigmoid_deriv(self.z1)
    dw1 = dz1 @ x.T
    db1 = dz1

    self.w2 -= lr * dw2
    self.b2 -= lr * db2
    self.w1 -= lr * dw1
    self.b1 -= lr * db1

  def train(self, X, Y, lr = 0.0001, epochs = 1000):
    for epoch in range(epochs):
      for x, y in zip(X, Y):
        x = x.reshape(-1, 1)
        y = np.array([[y]])
        self.forward(x)
        self.backward(x, y, lr)


In [73]:
X = np.array([[i] for i in range(1, 101)])
Y = np.array([0 if x < 50 else 1 for x in range(1, 101)])

In [78]:
model = MLP(input_dim=1, hidden_dim=3, output_dim=1)

model.train(X, Y, lr=0.01, epochs=2000)

In [79]:
y_pred = []
for x in X:
  pred = (model.forward(x.reshape(-1, 1))).item()
  y_pred.append(round(pred))

In [80]:
def accuracy(y_pred, Y):
  total = 0
  for i in range(len(y_pred)):
    if y_pred[i] == Y[i]:
      total += 1
  return total / len(Y) * 100

In [81]:
accuracy(y_pred, Y)

94.0