In [1]:
import numpy as np

# Set a seed for reproducibility
np.random.seed(42)

class SimpleNeuralNetwork:
    """
    A simple two-layer Neural Network (Input -> Hidden -> Output) implemented from scratch.
    """

    def __init__(self, input_size, hidden_size, output_size):
        """
        Initializes the weights and biases for the network layers.

        Args:
            input_size (int): Number of features in the input data.
            hidden_size (int): Number of neurons in the hidden layer.
            output_size (int): Number of neurons in the output layer (1 for binary classification).
        """
        print(f"Initializing Neural Network: Input={input_size}, Hidden={hidden_size}, Output={output_size}")
        
        # --- Weights and Biases Initialization ---
        # W1: Weights for Input -> Hidden layer (input_size x hidden_size)
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01 
        # b1: Biases for Hidden layer (1 x hidden_size)
        self.b1 = np.zeros((1, hidden_size))

        # W2: Weights for Hidden -> Output layer (hidden_size x output_size)
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        # b2: Biases for Output layer (1 x output_size)
        self.b2 = np.zeros((1, output_size))

        # Store layer sizes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Cache for intermediate values needed during backpropagation
        self.cache = {}

    # --- Activation Function: Sigmoid ---
    def sigmoid(self, z):
        """
        Sigmoid activation function: g(z) = 1 / (1 + e^(-z))
        """
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, a):
        """
        Derivative of the sigmoid function: g'(a) = a * (1 - a)
        where 'a' is the output of the sigmoid function.
        """
        return a * (1 - a)

    # --- Forward Propagation ---
    def forward(self, X):
        """
        Performs the forward pass through the network.
        
        X: Input data (m x input_size)
        """
        
        # 1. Input Layer to Hidden Layer
        # Z1 = X * W1 + b1 (Linear combination)
        self.cache['Z1'] = np.dot(X, self.W1) + self.b1
        
        # A1 = sigmoid(Z1) (Activation)
        self.cache['A1'] = self.sigmoid(self.cache['Z1'])
        
        # 2. Hidden Layer to Output Layer
        # Z2 = A1 * W2 + b2 (Linear combination)
        self.cache['Z2'] = np.dot(self.cache['A1'], self.W2) + self.b2
        
        # A2 = sigmoid(Z2) (Output prediction)
        self.cache['A2'] = self.sigmoid(self.cache['Z2'])
        
        # A2 is the final predicted output
        return self.cache['A2']

    # --- Backward Propagation ---
    def backward(self, X, y, output, learning_rate):
        """
        Performs the backward pass to calculate gradients and update weights/biases.

        Args:
            X (np.array): Input data.
            y (np.array): True labels.
            output (np.array): Predicted output from forward pass (A2).
            learning_rate (float): The step size for gradient descent.
        """
        m = X.shape[0] # Number of samples

        # 1. Output Layer Gradient (dZ2)
        # Error (difference between actual and predicted)
        error = output - y.reshape(-1, 1)
        
        # Derivative of cost w.r.t Z2 (using derivative of sigmoid at A2)
        dZ2 = error * self.sigmoid_derivative(output)
        
        # 2. Calculate gradients for W2 and b2
        # dW2 = (1/m) * A1.T * dZ2
        dW2 = (1 / m) * np.dot(self.cache['A1'].T, dZ2)
        # db2 = (1/m) * sum(dZ2, axis=0)
        db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)

        # 3. Hidden Layer Gradient (dZ1)
        # Delta for hidden layer: propagate error backwards through W2
        dA1 = np.dot(dZ2, self.W2.T)
        
        # Derivative of cost w.r.t Z1 (using derivative of sigmoid at A1)
        dZ1 = dA1 * self.sigmoid_derivative(self.cache['A1'])

        # 4. Calculate gradients for W1 and b1
        # dW1 = (1/m) * X.T * dZ1
        dW1 = (1 / m) * np.dot(X.T, dZ1)
        # db1 = (1/m) * sum(dZ1, axis=0)
        db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)

        # --- Update Weights and Biases (Gradient Descent) ---
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1

    def calculate_loss(self, y, output):
        """Calculates the Mean Squared Error (MSE) loss."""
        return np.mean((y.reshape(-1, 1) - output) ** 2)

    def train(self, X, y, epochs=1000, learning_rate=0.01):
        """Trains the neural network for a specified number of epochs."""
        print(f"\nStarting training for {epochs} epochs with LR={learning_rate}...")
        
        for epoch in range(epochs):
            # Forward Pass
            output = self.forward(X)
            
            # Calculate Loss (for monitoring)
            loss = self.calculate_loss(y, output)
            
            # Backward Pass (Updates weights and biases)
            self.backward(X, y, output, learning_rate)

            if epoch % (epochs // 10) == 0 or epoch == epochs - 1:
                print(f"Epoch {epoch}/{epochs} - Loss: {loss:.6f}")

        print("Training complete.")
        return output

    def predict(self, X):
        """Makes predictions using the trained network."""
        # Run forward pass and apply threshold
        output = self.forward(X)
        return (output >= 0.5).astype(int)

# --- Demonstration and Execution ---

# 1. Define Sample Data (XOR-like problem, 4 samples, 2 features)
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])
# True labels (XOR result)
y = np.array([0, 1, 1, 0])

# 2. Initialize the Network
input_features = X.shape[1]  # 2 features
hidden_neurons = 4           # Choose a size for the hidden layer
output_classes = 1           # Binary classification (0 or 1)

nn = SimpleNeuralNetwork(input_features, hidden_neurons, output_classes)

# 3. Train the Network
epochs = 50000
learning_rate = 0.1
final_output = nn.train(X, y, epochs=epochs, learning_rate=learning_rate)

# 4. Evaluate and Print Results
predictions = nn.predict(X)

print("\n--- Final Results ---")
print("Input (X):\n", X)
print("True Labels (y):\n", y)
print("Predicted Probabilities (A2):\n", final_output.flatten())
print("Final Predictions:\n", predictions.flatten())

accuracy = np.mean(predictions.flatten() == y) * 100
print(f"\nFinal Accuracy: {accuracy:.2f}%")


Initializing Neural Network: Input=2, Hidden=4, Output=1

Starting training for 50000 epochs with LR=0.1...
Epoch 0/50000 - Loss: 0.250001
Epoch 5000/50000 - Loss: 0.250000
Epoch 10000/50000 - Loss: 0.250000
Epoch 15000/50000 - Loss: 0.250000
Epoch 20000/50000 - Loss: 0.250000
Epoch 25000/50000 - Loss: 0.250000
Epoch 30000/50000 - Loss: 0.250000
Epoch 35000/50000 - Loss: 0.250000
Epoch 40000/50000 - Loss: 0.250000
Epoch 45000/50000 - Loss: 0.250000
Epoch 49999/50000 - Loss: 0.250000
Training complete.

--- Final Results ---
Input (X):
 [[0 0]
 [0 1]
 [1 0]
 [1 1]]
True Labels (y):
 [0 1 1 0]
Predicted Probabilities (A2):
 [0.50000605 0.50000046 0.49999957 0.49999397]
Final Predictions:
 [1 1 0 0]

Final Accuracy: 50.00%
