In [2]:
# Rupesh Nitin Bharambe (AI3007)

In [7]:
import numpy as np

class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        # Initialize weights and biases
        self.W1 = np.random.rand(input_size, hidden_size) * 0.01 # Weights for input to hidden
        self.b1 = np.zeros((1, hidden_size)) # Biases for hidden layer
        self.W2 = np.random.rand(hidden_size, output_size) * 0.01 # Weights for hidden to output
        self.b2 = np.zeros((1, output_size)) # Biases for output layer
        self.learning_rate = learning_rate
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def sigmoid_derivative(self, z):
        return z * (1 - z)
        
    def forward(self, X):
        # Forward pass
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    def backward(self, X, y):
        # Backward pass
        m = X.shape[0]
        output_error = self.a2 - y
        output_delta = output_error * self.sigmoid_derivative(self.a2)
        
        hidden_error = output_delta.dot(self.W2.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.a1)
        
        # Update weights and biases
        self.W2 -= self.a1.T.dot(output_delta) * self.learning_rate / m
        self.b2 -= np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate / m
        self.W1 -= X.T.dot(hidden_delta) * self.learning_rate / m
        self.b1 -= np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate / m
    
    def train(self, X, y, epochs=1000):
        for epoch in range(epochs):
            self.forward(X)
            self.backward(X, y)
            if epoch % 100 == 0:
                loss = np.mean(np.square(y - self.a2))
                print(f'Epoch {epoch}, Loss: {loss}')
                
    def predict(self, X):
        return self.forward(X)

# Example Usage

if __name__ == '__main__':
    # Sample dataset: XOR problem
    X = np.array([[0, 0],
                  [0, 1],
                  [1, 0],
                  [1, 1]])
    y = np.array([[0], [1], [1], [0]])
    
    # Create MLP
    mlp = MLP(input_size=2, hidden_size=2, output_size=1, learning_rate=0.1)
    # Train MLP
    mlp.train(X, y, epochs=1000)
    # Predictions
    print('Predictions after training:')
    print(mlp.predict(X))

Epoch 0, Loss: 0.25000070118463286
Epoch 100, Loss: 0.25000010625756497
Epoch 200, Loss: 0.2500000160997592
Epoch 300, Loss: 0.2500000024369042
Epoch 400, Loss: 0.250000000366384
Epoch 500, Loss: 0.25000000005260953
Epoch 600, Loss: 0.25000000000505895
Epoch 700, Loss: 0.24999999999785294
Epoch 800, Loss: 0.24999999999676095
Epoch 900, Loss: 0.24999999999659545
Predictions after training:
[[0.49999856]
 [0.50000061]
 [0.49999952]
 [0.50000157]]
