In [1]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


def relu(x):
    return np.maximum(0, x)


def relu_derivative(x):
    return np.where(x > 0, 1, 0)


def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)


class MLP:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.001, epochs=10000):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.epochs = epochs

        # Initialize weights and biases with He initialization
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.hidden_input = np.dot(X, self.W1) + self.b1
        self.hidden_output = relu(self.hidden_input)
        self.output_input = np.dot(self.hidden_output, self.W2) + self.b2
        self.output = softmax(self.output_input)
        return self.output

    def backward(self, X, y):
        # Compute errors and deltas
        output_error = self.output - y
        output_delta = output_error
        hidden_error = output_delta.dot(self.W2.T)
        hidden_delta = hidden_error * relu_derivative(self.hidden_output)

        # Update weights and biases
        self.W2 -= self.learning_rate * self.hidden_output.T.dot(output_delta)
        self.b2 -= self.learning_rate * np.sum(output_delta, axis=0, keepdims=True)
        self.W1 -= self.learning_rate * X.T.dot(hidden_delta)
        self.b1 -= self.learning_rate * np.sum(hidden_delta, axis=0, keepdims=True)

    def train(self, X, y):
        for epoch in range(self.epochs):
            self.forward(X)
            self.backward(X, y)

            if epoch % 1000 == 0:
                # Compute and print loss every 1000 epochs
                loss = -np.mean(np.sum(y * np.log(self.output + 1e-8), axis=1))
                print(f'Epoch {epoch}/{self.epochs}, Loss: {loss}')

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)


# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# One-hot encode labels
encoder = OneHotEncoder(sparse_output=False)
y_onehot = encoder.fit_transform(y.reshape(-1, 1))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.3, random_state=42)

# Initialize and train the MLP model
mlp = MLP(input_size=4, hidden_size=5, output_size=3, learning_rate=0.001, epochs=10000)
mlp.train(X_train, y_train)

# Predict and evaluate the model
y_pred = mlp.predict(X_test)
y_true = np.argmax(y_test, axis=1)
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Epoch 0/10000, Loss: 1.182035254895409
Epoch 1000/10000, Loss: 0.0696812182487262
Epoch 2000/10000, Loss: 0.05597464075240986
Epoch 3000/10000, Loss: 0.05206132920787736
Epoch 4000/10000, Loss: 0.05044812012567026
Epoch 5000/10000, Loss: 0.049474717553755106
Epoch 6000/10000, Loss: 0.04874105959947561
Epoch 7000/10000, Loss: 0.04810424444815263
Epoch 8000/10000, Loss: 0.047491408593254396
Epoch 9000/10000, Loss: 0.04685860963515138
Accuracy: 100.00%
