In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [8]:
class MLP:
    def __init__(self, layers, learning_rate=0.01):
        self.layers = layers
        self.learning_rate = learning_rate
        self.weights = []
        self.bias = []
        
        for i in range(len(layers) - 1):
            w = np.random.randn(layers[i], layers[i+1]) * 0.1
            b = np.zeros((1, layers[i+1]))
            self.weights.append(w)
            self.bias.append(b)
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -250, 250)))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def forward(self, x):
        activations = [x]
        for i in range(len(self.weights)):
            z = np.dot(activations[-1], self.weights[i]) + self.bias[i]
            a = self.sigmoid(z)
            activations.append(a)
        return activations

    def backward(self, activations, y):
        m = y.shape[0]
        gradients_w = []
        gradients_b = []
        
        error = activations[-1] - y
        delta = error * self.sigmoid_derivative(activations[-1])
        
        for i in range(len(self.weights) - 1, -1, -1):
            grad_w = np.dot(activations[i].T, delta) / m
            grad_b = np.sum(delta, axis=0, keepdims=True) / m
            gradients_w.insert(0, grad_w)
            gradients_b.insert(0, grad_b)
        
            if i > 0:
                delta = np.dot(delta, self.weights[i].T) * self.sigmoid_derivative(activations[i])
        
        return gradients_w, gradients_b
    
    def fit(self, x, y, epochs=1000):
        y = y.reshape(-1, 1)
        for epoch in range(epochs):
            activations = self.forward(x)
            grad_w, grad_b = self.backward(activations, y)
            
            for i in range(len(self.weights)):
                self.weights[i] -= self.learning_rate * grad_w[i]
                self.bias[i] -= self.learning_rate * grad_b[i]
                
            if epoch % 100 == 0:
                loss = np.mean((activations[-1] - y) ** 2)
                print(f"Epoch {epoch}, Loss: {loss:.4f}")
    
    def predict(self, X):
        activations = self.forward(X)
        return activations[-1]

In [None]:
X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y_xor = np.array([0, 1, 1, 0])

mlp = MLP(layers=[2, 8, 1], learning_rate=0.1)
mlp.fit(X_xor, Y_xor, epochs=1000)
mlp_xor = mlp.predict(X_xor)

pred_labels = (mlp_xor >= 0.5).astype(int) 

for i, (x, y_true, y_pred) in enumerate(zip(X_xor, Y_xor, pred_labels)):
    print(f"  Input: {x}, Target: {y_true}, Prediction: {y_pred}, {'✓' if y_true == y_pred else '✗'}")

Epoch 0, Loss: 0.2607
Epoch 100, Loss: 0.2500
Epoch 200, Loss: 0.2500
Epoch 300, Loss: 0.2500
Epoch 400, Loss: 0.2500
Epoch 500, Loss: 0.2500
Epoch 600, Loss: 0.2500
Epoch 700, Loss: 0.2500
Epoch 800, Loss: 0.2500
Epoch 900, Loss: 0.2500
  Input: [0 0], Target: 0, Prediction: [1], ✗
  Input: [0 1], Target: 1, Prediction: [0], ✗
  Input: [1 0], Target: 1, Prediction: [1], ✓
  Input: [1 1], Target: 0, Prediction: [0], ✓
