Multilayer Perceptron using Step function

In [16]:
import numpy as np

class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1, epochs=1000):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        
        # Initialize weights for hidden and output layers
        self.weights_input_hidden = np.random.randn(self.input_size, self.hidden_size)
        self.weights_hidden_output = np.random.randn(self.hidden_size, self.output_size)
        
        # Initialize biases for hidden and output layers
        self.bias_hidden = np.random.randn(self.hidden_size)
        self.bias_output = np.random.randn(self.output_size)

    def step_function(self, x):
        # Step activation function
        return np.where(x >= 0, 1, 0)

    def forward(self, X):
        # Forward pass: input -> hidden layer -> output layer
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = self.step_function(self.hidden_input)
        self.final_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.final_output = self.step_function(self.final_input)
        return self.final_output

    def backward(self, X, y):
        # Backward pass (Gradient Descent)
        output_error = y - self.final_output
        hidden_error = np.dot(output_error, self.weights_hidden_output.T) * self.hidden_output * (1 - self.hidden_output)
        
        # Update weights and biases
        self.weights_hidden_output += self.learning_rate * np.dot(self.hidden_output.T, output_error)
        self.weights_input_hidden += self.learning_rate * np.dot(X.T, hidden_error)
        self.bias_output += self.learning_rate * np.sum(output_error, axis=0)
        self.bias_hidden += self.learning_rate * np.sum(hidden_error, axis=0)

    def fit(self, X, y):
        # Train the network
        for epoch in range(self.epochs):
            self.forward(X)
            self.backward(X, y)
            if epoch % 100 == 0:
                loss = np.mean(np.square(y - self.final_output))  # Mean Squared Error (MSE)
                print(f"Epoch {epoch}, Loss: {loss}")

    def predict(self, X):
        # Predict the output for given input
        return self.forward(X)

# Example usage:
if __name__ == "__main__":
    # XOR problem (Non-linearly separable - MLP can solve this)
    # Training data (inputs)
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    # Labels (outputs)
    y = np.array([[0], [1], [1], [0]])

    # Create MLP instance
    mlp = MLP(input_size=2, hidden_size=4, output_size=1, learning_rate=0.1, epochs=1000)

    # Train the model
    mlp.fit(X, y)

    # Predictions
    predictions = mlp.predict(X)
    print("\nPredictions:")
    print(predictions)


Epoch 0, Loss: 0.5
Epoch 100, Loss: 0.25
Epoch 200, Loss: 0.25
Epoch 300, Loss: 0.5
Epoch 400, Loss: 0.25
Epoch 500, Loss: 0.25
Epoch 600, Loss: 0.5
Epoch 700, Loss: 0.25
Epoch 800, Loss: 0.25
Epoch 900, Loss: 0.5

Predictions:
[[0]
 [0]
 [1]
 [0]]
