In [1]:
import numpy as np

# Activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Binary Cross Entropy Loss
def binary_cross_entropy(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred + 1e-8) + (1 - y_true) * np.log(1 - y_pred + 1e-8))

class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        # Xavier initialization
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(1. / input_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(1. / hidden_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = sigmoid(self.z1)

        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = sigmoid(self.z2)

        return self.a2

    def backward(self, X, y, output, learning_rate):
        m = X.shape[0]

        # Output layer error
        d_a2 = output - y
        d_z2 = d_a2 * sigmoid_derivative(output)
        d_W2 = np.dot(self.a1.T, d_z2) / m
        d_b2 = np.sum(d_z2, axis=0, keepdims=True) / m

        # Hidden layer error
        d_a1 = np.dot(d_z2, self.W2.T)
        d_z1 = d_a1 * sigmoid_derivative(self.a1)
        d_W1 = np.dot(X.T, d_z1) / m
        d_b1 = np.sum(d_z1, axis=0, keepdims=True) / m

        # Update weights
        self.W2 -= learning_rate * d_W2
        self.b2 -= learning_rate * d_b2
        self.W1 -= learning_rate * d_W1
        self.b1 -= learning_rate * d_b1

    def train(self, X, y, epochs=1000, learning_rate=0.1):
        for epoch in range(epochs):
            output = self.forward(X)
            loss = binary_cross_entropy(y, output)
            self.backward(X, y, output, learning_rate)

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.forward(X)
        return (output > 0.5).astype(int)

# Example usage
if __name__ == "__main__":
    # XOR dataset
    X = np.array([[0,0],[0,1],[1,0],[1,1]])
    y = np.array([[0],[1],[1],[0]])

    mlp = MLP(input_size=2, hidden_size=4, output_size=1)
    mlp.train(X, y, epochs=10000, learning_rate=0.1)

    predictions = mlp.predict(X)
    print("Predictions:\n", predictions)


Epoch 0, Loss: 0.7593
Epoch 100, Loss: 0.6978
Epoch 200, Loss: 0.6933
Epoch 300, Loss: 0.6930
Epoch 400, Loss: 0.6929
Epoch 500, Loss: 0.6928
Epoch 600, Loss: 0.6927
Epoch 700, Loss: 0.6926
Epoch 800, Loss: 0.6925
Epoch 900, Loss: 0.6924
Epoch 1000, Loss: 0.6923
Epoch 1100, Loss: 0.6922
Epoch 1200, Loss: 0.6921
Epoch 1300, Loss: 0.6920
Epoch 1400, Loss: 0.6919
Epoch 1500, Loss: 0.6917
Epoch 1600, Loss: 0.6916
Epoch 1700, Loss: 0.6915
Epoch 1800, Loss: 0.6914
Epoch 1900, Loss: 0.6912
Epoch 2000, Loss: 0.6911
Epoch 2100, Loss: 0.6909
Epoch 2200, Loss: 0.6908
Epoch 2300, Loss: 0.6906
Epoch 2400, Loss: 0.6904
Epoch 2500, Loss: 0.6902
Epoch 2600, Loss: 0.6900
Epoch 2700, Loss: 0.6898
Epoch 2800, Loss: 0.6896
Epoch 2900, Loss: 0.6893
Epoch 3000, Loss: 0.6891
Epoch 3100, Loss: 0.6888
Epoch 3200, Loss: 0.6885
Epoch 3300, Loss: 0.6882
Epoch 3400, Loss: 0.6879
Epoch 3500, Loss: 0.6875
Epoch 3600, Loss: 0.6871
Epoch 3700, Loss: 0.6867
Epoch 3800, Loss: 0.6863
Epoch 3900, Loss: 0.6858
Epoch 4000, 

In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
data = load_iris()
X = data.data
y = data.target

# Use only two classes: 0 (setosa) and 1 (versicolor)
binary_filter = y < 2
X = X[binary_filter]
y = y[binary_filter].reshape(-1, 1)  # Make it a column vector

# Normalize features
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Train-test split (optional but good practice)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
mlp = MLP(input_size=4, hidden_size=6, output_size=1)
mlp.train(X_train, y_train, epochs=5000, learning_rate=0.1)
predictions = mlp.predict(X_test)
accuracy = np.mean(predictions == y_test)
print("Test Accuracy:", accuracy * 100, "%")
print("Predictions:\n", predictions)
print("True Labels:\n", y_test)

Epoch 0, Loss: 0.7341
Epoch 100, Loss: 0.6442
Epoch 200, Loss: 0.6300
Epoch 300, Loss: 0.6153
Epoch 400, Loss: 0.5982
Epoch 500, Loss: 0.5784
Epoch 600, Loss: 0.5559
Epoch 700, Loss: 0.5307
Epoch 800, Loss: 0.5031
Epoch 900, Loss: 0.4735
Epoch 1000, Loss: 0.4428
Epoch 1100, Loss: 0.4119
Epoch 1200, Loss: 0.3816
Epoch 1300, Loss: 0.3527
Epoch 1400, Loss: 0.3258
Epoch 1500, Loss: 0.3011
Epoch 1600, Loss: 0.2787
Epoch 1700, Loss: 0.2587
Epoch 1800, Loss: 0.2408
Epoch 1900, Loss: 0.2248
Epoch 2000, Loss: 0.2106
Epoch 2100, Loss: 0.1980
Epoch 2200, Loss: 0.1867
Epoch 2300, Loss: 0.1766
Epoch 2400, Loss: 0.1676
Epoch 2500, Loss: 0.1594
Epoch 2600, Loss: 0.1520
Epoch 2700, Loss: 0.1453
Epoch 2800, Loss: 0.1392
Epoch 2900, Loss: 0.1336
Epoch 3000, Loss: 0.1285
Epoch 3100, Loss: 0.1238
Epoch 3200, Loss: 0.1195
Epoch 3300, Loss: 0.1155
Epoch 3400, Loss: 0.1118
Epoch 3500, Loss: 0.1083
Epoch 3600, Loss: 0.1051
Epoch 3700, Loss: 0.1021
Epoch 3800, Loss: 0.0993
Epoch 3900, Loss: 0.0966
Epoch 4000, 