In [1]:
import numpy as np

# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def leaky_relu(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)

def leaky_relu_derivative(x, alpha=0.01):
    return np.where(x > 0, 1, alpha)

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2


In [2]:
class SingleLayerPerceptron:
    def __init__(self, activation="sigmoid"):
        self.input_size = 2
        self.output_size = 2
        self.weights = np.random.rand(self.input_size, self.output_size) - 0.5
        self.bias = np.random.rand(self.output_size) - 0.5

        # Set activation functions
        activations = {
            "sigmoid": (sigmoid, sigmoid_derivative),
            "relu": (relu, relu_derivative),
            "leaky_relu": (leaky_relu, leaky_relu_derivative),
            "tanh": (tanh, tanh_derivative),
        }
        if activation not in activations:
            raise ValueError(f"Unsupported activation function: {activation}")
        self.activation, self.activation_derivative = activations[activation]

    def feedforward(self, X):
        self.input = np.dot(X, self.weights) + self.bias
        self.output = self.activation(self.input)
        return self.output

    def train(self, X, y, epochs=1000, learning_rate=0.1):
        for epoch in range(epochs):
            self.feedforward(X)
            error = self.output - y
            output_delta = error * self.activation_derivative(self.input)
            self.weights -= learning_rate * np.dot(X.T, output_delta)
            self.bias -= learning_rate * np.sum(output_delta, axis=0)
            if epoch % 100 == 0:
                loss = np.mean(np.abs(error))
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        return np.argmax(self.feedforward(X), axis=1)


In [11]:
# XOR Problem
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])  # One-hot encoding

# Example: Training with Sigmoid activation
slp = SingleLayerPerceptron(activation="sigmoid")
print("\nTraining Single-Layer Perceptron (Sigmoid):")
slp.train(X, y, epochs=5000, learning_rate=0.1)

# Example: Predictions
print("\nPredictions:")
for sample, target in zip(X, y):
    prediction = slp.predict(sample.reshape(1, -1))
    print(f"Input: {sample}, Prediction: {prediction}, Target: {np.argmax(target)}")


Training Single-Layer Perceptron (Sigmoid):
Epoch 0, Loss: 0.5014
Epoch 100, Loss: 0.5000
Epoch 200, Loss: 0.5000
Epoch 300, Loss: 0.5000
Epoch 400, Loss: 0.5000
Epoch 500, Loss: 0.5000
Epoch 600, Loss: 0.5000
Epoch 700, Loss: 0.5000
Epoch 800, Loss: 0.5000
Epoch 900, Loss: 0.5000
Epoch 1000, Loss: 0.5000
Epoch 1100, Loss: 0.5000
Epoch 1200, Loss: 0.5000
Epoch 1300, Loss: 0.5000
Epoch 1400, Loss: 0.5000
Epoch 1500, Loss: 0.5000
Epoch 1600, Loss: 0.5000
Epoch 1700, Loss: 0.5000
Epoch 1800, Loss: 0.5000
Epoch 1900, Loss: 0.5000
Epoch 2000, Loss: 0.5000
Epoch 2100, Loss: 0.5000
Epoch 2200, Loss: 0.5000
Epoch 2300, Loss: 0.5000
Epoch 2400, Loss: 0.5000
Epoch 2500, Loss: 0.5000
Epoch 2600, Loss: 0.5000
Epoch 2700, Loss: 0.5000
Epoch 2800, Loss: 0.5000
Epoch 2900, Loss: 0.5000
Epoch 3000, Loss: 0.5000
Epoch 3100, Loss: 0.5000
Epoch 3200, Loss: 0.5000
Epoch 3300, Loss: 0.5000
Epoch 3400, Loss: 0.5000
Epoch 3500, Loss: 0.5000
Epoch 3600, Loss: 0.5000
Epoch 3700, Loss: 0.5000
Epoch 3800, Loss:

In [5]:
# Multi-Layer Perceptron class
class MultiLayerPerceptron:
    def __init__(self, hidden_size=4, activation="sigmoid"):
        self.input_size = 2
        self.hidden_size = hidden_size
        self.output_size = 2
        self.weights_input_hidden = np.random.rand(self.input_size, self.hidden_size) - 0.5
        self.weights_hidden_output = np.random.rand(self.hidden_size, self.output_size) - 0.5
        self.bias_hidden = np.random.rand(self.hidden_size) - 0.5
        self.bias_output = np.random.rand(self.output_size) - 0.5

        # Set activation functions
        activations = {
            "sigmoid": (sigmoid, sigmoid_derivative),
            "relu": (relu, relu_derivative),
            "leaky_relu": (leaky_relu, leaky_relu_derivative),
            "tanh": (tanh, tanh_derivative),
        }
        if activation not in activations:
            raise ValueError(f"Unsupported activation function: {activation}")
        self.activation, self.activation_derivative = activations[activation]

    def feedforward(self, X):
        # Hidden layer
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = self.activation(self.hidden_input)

        # Output layer
        self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        exp_output = np.exp(self.output_input - np.max(self.output_input, axis=1, keepdims=True))  # Avoid overflow
        self.output = exp_output / np.sum(exp_output, axis=1, keepdims=True)
        return self.output

    def backpropagate(self, X, y, learning_rate=0.1):
        output_error = self.output - y
        output_delta = output_error  # Softmax + cross-entropy

        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.activation_derivative(self.hidden_input)

        self.weights_hidden_output -= learning_rate * np.dot(self.hidden_output.T, output_delta)
        self.bias_output -= learning_rate * np.sum(output_delta, axis=0)

        self.weights_input_hidden -= learning_rate * np.dot(X.T, hidden_delta)
        self.bias_hidden -= learning_rate * np.sum(hidden_delta, axis=0)

    def train(self, X, y, epochs=1000, learning_rate=0.1):
        for epoch in range(epochs):
            self.feedforward(X)
            self.backpropagate(X, y, learning_rate)
            if epoch % 100 == 0:
                loss = -np.sum(y * np.log(self.output + 1e-9)) / X.shape[0]
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        output = self.feedforward(X)
        return np.argmax(output, axis=1)

In [10]:
# Example: Training with Sigmoid activation for MLP
mlp = MultiLayerPerceptron(hidden_size=4, activation="sigmoid")
print("\nTraining Multi-Layer Perceptron (Sigmoid):")
mlp.train(X, y, epochs=5000, learning_rate=0.1)

# Example: Predictions
print("\nPredictions:")
for sample, target in zip(X, y):
    prediction = mlp.predict(sample.reshape(1, -1))
    print(f"Input: {sample}, Prediction: {prediction}, Target: {np.argmax(target)}")


Training Multi-Layer Perceptron (Sigmoid):
Epoch 0, Loss: 0.7785
Epoch 100, Loss: 0.6935
Epoch 200, Loss: 0.6933
Epoch 300, Loss: 0.6932
Epoch 400, Loss: 0.6932
Epoch 500, Loss: 0.6931
Epoch 600, Loss: 0.6931
Epoch 700, Loss: 0.6931
Epoch 800, Loss: 0.6930
Epoch 900, Loss: 0.6929
Epoch 1000, Loss: 0.6928
Epoch 1100, Loss: 0.6925
Epoch 1200, Loss: 0.6918
Epoch 1300, Loss: 0.6898
Epoch 1400, Loss: 0.6836
Epoch 1500, Loss: 0.6641
Epoch 1600, Loss: 0.6133
Epoch 1700, Loss: 0.5227
Epoch 1800, Loss: 0.3954
Epoch 1900, Loss: 0.2370
Epoch 2000, Loss: 0.1288
Epoch 2100, Loss: 0.0777
Epoch 2200, Loss: 0.0527
Epoch 2300, Loss: 0.0389
Epoch 2400, Loss: 0.0303
Epoch 2500, Loss: 0.0247
Epoch 2600, Loss: 0.0207
Epoch 2700, Loss: 0.0177
Epoch 2800, Loss: 0.0154
Epoch 2900, Loss: 0.0137
Epoch 3000, Loss: 0.0122
Epoch 3100, Loss: 0.0111
Epoch 3200, Loss: 0.0101
Epoch 3300, Loss: 0.0093
Epoch 3400, Loss: 0.0086
Epoch 3500, Loss: 0.0079
Epoch 3600, Loss: 0.0074
Epoch 3700, Loss: 0.0069
Epoch 3800, Loss: 