In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy(predictions, targets):
    n = predictions.shape[0]
    loss = -np.sum(targets * np.log(predictions + 1e-15)) / n
    return loss

def cross_entropy_derivative(predictions, targets):
    return (predictions - targets) / predictions.shape[0]


class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = relu(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, y, output, learning_rate):
        dZ2 = cross_entropy_derivative(output, y)
        dW2 = self.A1.T @ dZ2
        db2 = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * relu_derivative(self.Z1)
        dW1 = X.T @ dZ1
        db1 = np.sum(dZ1, axis=0, keepdims=True)

        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            output = self.forward(X)
            loss = cross_entropy(output, y)
            self.backward(X, y, output, learning_rate)

            if epoch % 100 == 0 or epoch == epochs - 1:
                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return np.argmax(output, axis=1)

def prepare_data():
    X, y = make_classification(
        n_samples=500,
        n_features=2,
        n_classes=3,
        n_informative=2,
        n_redundant=0,
        n_clusters_per_class=1,  
        random_state=42
    )
    y = y.reshape(-1, 1)

    encoder = OneHotEncoder(sparse=False)
    y_encoded = encoder.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test

def main():
    X_train, X_test, y_train, y_test = prepare_data()

    input_size = X_train.shape[1]
    hidden_size = 10
    output_size = y_train.shape[1]

    mlp = MLP(input_size, hidden_size, output_size)
    mlp.train(X_train, y_train, epochs=1000, learning_rate=0.1)

    y_pred = mlp.predict(X_test)
    y_true = np.argmax(y_test, axis=1)

    accuracy = np.mean(y_pred == y_true)
    print(f"\nTest Accuracy: {accuracy * 100:.2f}%")

if __name__ == "__main__":
    main()


Epoch 1/1000, Loss: 1.0982
Epoch 101/1000, Loss: 0.6814
Epoch 201/1000, Loss: 0.3183
Epoch 301/1000, Loss: 0.2614
Epoch 401/1000, Loss: 0.2368
Epoch 501/1000, Loss: 0.2260
Epoch 601/1000, Loss: 0.2194
Epoch 701/1000, Loss: 0.2141
Epoch 801/1000, Loss: 0.2096
Epoch 901/1000, Loss: 0.2049
Epoch 1000/1000, Loss: 0.1978

Test Accuracy: 93.00%


