In [None]:
import numpy as np
from typing import List, Tuple, Optional
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

class NeuralNetwork:
    """
    A flexible implementation of a feedforward neural network using NumPy.
    Supports multiple hidden layers and mini-batch gradient descent.
    """

    def __init__(self, input_size: int, hidden_layers_sizes: List[int], output_size: int):
        """
        Initialize the neural network with the specified architecture.

        Args:
            input_size: Number of input features
            hidden_layers_sizes: List containing the size of each hidden layer
            output_size: Number of output neurons
        """
        self.input_size = input_size
        self.hidden_layers_sizes = hidden_layers_sizes
        self.output_size = output_size

        # Initialize network architecture
        self.weights = []
        self.biases = []

        # Input layer to first hidden layer
        layer_sizes = [input_size] + hidden_layers_sizes + [output_size]
        for i in range(len(layer_sizes) - 1):
            # Xavier/Glorot initialization
            limit = np.sqrt(6 / (layer_sizes[i] + layer_sizes[i + 1]))
            self.weights.append(
                np.random.uniform(-limit, limit, (layer_sizes[i], layer_sizes[i + 1]))
            )
            self.biases.append(np.zeros((1, layer_sizes[i + 1])))

    @staticmethod
    def sigmoid(x: np.ndarray) -> np.ndarray:
        """
        Compute the sigmoid activation function with numerical stability.

        Args:
            x: Input array

        Returns:
            Sigmoid activation output
        """
        x_clipped = np.clip(x, -500, 500)  # Prevent overflow
        return 1 / (1 + np.exp(-x_clipped))

    @staticmethod
    def sigmoid_derivative(x: np.ndarray) -> np.ndarray:
        """
        Compute the derivative of the sigmoid function.

        Args:
            x: Sigmoid activation output

        Returns:
            Derivative of sigmoid function
        """
        return x * (1 - x)

    def forward(self, X: np.ndarray) -> np.ndarray:
        """
        Perform forward propagation through the network.

        Args:
            X: Input features

        Returns:
            Network predictions
        """
        # Input validation and reshaping
        X = np.array(X)
        if X.ndim == 1:
            X = X.reshape(1, -1)

        self.activations = [X]

        # Forward propagation through each layer
        for i in range(len(self.weights)):
            z = np.dot(self.activations[-1], self.weights[i]) + self.biases[i]
            activation = self.sigmoid(z)
            self.activations.append(activation)

        return self.activations[-1]

    def backward(self, X: np.ndarray, y: np.ndarray, learning_rate: float) -> None:
        """
        Perform backpropagation to update network weights.

        Args:
            X: Input features
            y: Target values
            learning_rate: Learning rate for gradient descent
        """
        # Ensure proper array formatting
        y = np.array(y)
        if y.ndim == 1:
            y = y.reshape(-1, 1)

        # Calculate initial error
        error = y - self.activations[-1]
        delta = error * self.sigmoid_derivative(self.activations[-1])

        # Store gradients
        weight_gradients = []
        bias_gradients = []

        # Backpropagate through layers
        for i in range(len(self.weights) - 1, -1, -1):
            # Calculate gradients
            weight_grad = np.dot(self.activations[i].T, delta)
            bias_grad = np.sum(delta, axis=0, keepdims=True)

            # Gradient clipping
            weight_grad = np.clip(weight_grad, -1, 1)
            bias_grad = np.clip(bias_grad, -1, 1)

            weight_gradients.insert(0, weight_grad)
            bias_gradients.insert(0, bias_grad)

            if i > 0:
                delta = np.dot(delta, self.weights[i].T) * self.sigmoid_derivative(self.activations[i])

        # Update weights and biases
        for i in range(len(self.weights)):
            self.weights[i] += learning_rate * weight_gradients[i]
            self.biases[i] += learning_rate * bias_gradients[i]

    def train(self,
             X: np.ndarray,
             y: np.ndarray,
             epochs: int,
             learning_rate: float,
             batch_size: Optional[int] = None,
             verbose: bool = True) -> List[float]:
        """
        Train the neural network using mini-batch gradient descent.

        Args:
            X: Input features
            y: Target values
            epochs: Number of training iterations
            learning_rate: Learning rate for gradient descent
            batch_size: Size of mini-batches (optional)
            verbose: Whether to print training progress

        Returns:
            List of loss values during training
        """
        X = np.array(X)
        y = np.array(y)

        if batch_size is None:
            batch_size = len(X)

        losses = []

        try:
            for epoch in range(epochs):
                # Shuffle data
                indices = np.random.permutation(len(X))
                X_shuffled = X[indices]
                y_shuffled = y[indices]

                # Mini-batch training
                for i in range(0, len(X), batch_size):
                    batch_X = X_shuffled[i:i + batch_size]
                    batch_y = y_shuffled[i:i + batch_size]

                    self.forward(batch_X)
                    self.backward(batch_X, batch_y, learning_rate)

                # Calculate and store loss
                if verbose and epoch % 100 == 0:
                    predictions = self.forward(X)
                    loss = np.mean(np.square(y - predictions))
                    accuracy = np.mean((predictions > 0.5) == y)
                    losses.append(loss)
                    logging.info(f"Epoch {epoch}/{epochs} - Loss: {loss:.6f} - Accuracy: {accuracy:.2%}")

        except Exception as e:
            logging.error(f"Training error: {str(e)}")
            raise

        return losses

    def evaluate(self, X: np.ndarray, y: np.ndarray) -> Tuple[float, float]:
        """
        Evaluate the network's performance on test data.

        Args:
            X: Test features
            y: Test targets

        Returns:
            Tuple of (loss, accuracy)
        """
        predictions = self.forward(X)
        loss = np.mean(np.square(y - predictions))
        accuracy = np.mean((predictions > 0.5) == y)
        return loss, accuracy

def main():
    """Example usage with the XOR problem."""
    # XOR problem dataset
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([[0], [1], [1], [0]])

    try:
        # Create network
        nn = NeuralNetwork(input_size=2, hidden_layers_sizes=[4], output_size=1)

        # Train network
        losses = nn.train(X, y, epochs=5000, learning_rate=0.1, batch_size=4)

        # Evaluate and print results
        loss, accuracy = nn.evaluate(X, y)
        print("\nFinal Results:")
        print(f"Loss: {loss:.6f}")
        print(f"Accuracy: {accuracy:.2%}")

        # Show predictions
        predictions = nn.forward(X)
        print("\nPredictions:")
        for i in range(len(X)):
            print(f"Input: {X[i]} -> Predicted: {predictions[i][0]:.4f} (Expected: {y[i][0]})")

    except Exception as e:
        logging.error(f"Error in main: {str(e)}")
        raise

if __name__ == "__main__":
    main()