### Feed-Forward Neural Network Implementation

In [4]:
import numpy as np


class FNN:
    '''
    A simple feedforward neural network implementation using numpy.
    
    Attributes:
        input_size (int): Number of input features
        hidden_size (int): Number of hidden units
        output_size (int): Number of output units
        learning_rate (float): Learning rate for the model
        epochs (int): Number of epochs for training
        activation (str): Activation function for the hidden layer

        weights_input_hidden (ndarray): Weights between input and hidden layer
        weights_hidden_output (ndarray): Weights between hidden and output layer
        bias_hidden (ndarray): Bias for the hidden layer
        bias_output (ndarray): Bias for the output layer
        activation (function): Activation function for the hidden layer
        activation_derivative (function): Derivative of the activation function
    '''
    def __init__(
        self,
        input_size: int,
        hidden_size: int,
        output_size: int,
        learning_rate: float = 0.01,
        epochs: int = 1000,
        activation: str = "sigmoid",
    ):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.learning_rate = learning_rate
        self.epochs = epochs

        self.weights_input_hidden = np.random.randn(input_size, hidden_size)
        self.weights_hidden_output = np.random.randn(hidden_size, output_size)
        self.bias_hidden = np.zeros((1, hidden_size))
        self.bias_output = np.zeros((1, output_size))

        if activation == "sigmoid":
            self.activation = lambda x: 1 / (1 + np.exp(-x))
            self.activation_derivative = lambda x: x * (1 - x)
        elif activation == "relu":
            self.activation = lambda x: np.maximum(0, x)
            self.activation_derivative = lambda x: np.where(x >= 0, 1, 0)
        elif activation == "softmax":
            self.activation = lambda x: np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
            self.activation_derivative = lambda x: x * (1 - x)
        else:
            raise ValueError(
                "Activation function not supported. Please use 'sigmoid', 'relu', or 'softmax'."
            )

    def _forward_pass(self, X: np.ndarray) -> tuple:
        """
        Perform the forward pass.

        Args:
            X: Input features numpy array

        Returns:
            Tuple containing hidden layer output and final predicted output
        """
        hidden_layer_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        hidden_layer_output = self.activation(hidden_layer_input)
        output_layer_input = (
            np.dot(hidden_layer_output, self.weights_hidden_output) + self.bias_output
        )
        predicted_output = self.activation(output_layer_input)
        return hidden_layer_output, predicted_output

    def _backward_pass(
        self,
        X: np.ndarray,
        y: np.ndarray,
        hidden_layer_output: np.ndarray,
        predicted_output: np.ndarray,
    ) -> None:
        """
        Perform the backward pass and update weights and biases.

        Args:
            X: Input features numpy array
            y: Target numpy array
            hidden_layer_output: Output from the hidden layer
            predicted_output: Final predicted output
        """
        output_error = y - predicted_output
        output_delta = output_error * self.activation_derivative(predicted_output)

        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.activation_derivative(hidden_layer_output)

        # Update weights and biases
        self.weights_hidden_output += self.learning_rate * np.dot(
            hidden_layer_output.T, output_delta
        )
        self.bias_output += self.learning_rate * np.sum(
            output_delta, axis=0, keepdims=True
        )
        self.weights_input_hidden += self.learning_rate * np.dot(X.T, hidden_delta)
        self.bias_hidden += self.learning_rate * np.sum(
            hidden_delta, axis=0, keepdims=True
        )

    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        """
        Train the model using the given numpy arrays X and y.

        Args:
            X: numpy array of input features
            y: numpy array of target values
        """
        for _ in range(self.epochs):
            hidden_layer_output, predicted_output = self._forward_pass(X)
            self._backward_pass(X, y, hidden_layer_output, predicted_output)

    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Predict output for the given input features.

        Args:
            X: numpy array of input features

        Returns:
            numpy array of predicted values
        """
        _, predicted_output = self._forward_pass(X)
        return predicted_output

    def evaluate(self, X: np.ndarray, y: np.ndarray, metric: str = "mse") -> float:
        """
        Evaluate the model performance using the given metric.

        Args:
            X: numpy array of input features
            y: numpy array of target values
            metric: Evaluation metric ("mse", "mae", "r2_score", or "accuracy")

        Returns:
            Evaluation result as a float
        """
        predictions = self.predict(X)
        if metric == "mse":
            return np.mean((predictions - y) ** 2)
        elif metric == "mae":
            return np.mean(np.abs(predictions - y))
        elif metric == "r2_score":
            return 1 - (np.sum((y - predictions) ** 2) / np.sum((y - np.mean(y)) ** 2))
        elif metric == "accuracy":
            y_pred = np.argmax(predictions, axis=1)
            y_true = np.argmax(y, axis=1)
            return np.mean(y_pred == y_true)
        else:
            raise ValueError("Invalid metric")

    def __str__(self) -> str:
        """
        String representation of the FNN class.

        Returns:
            String description of the model
        """
        return f"FNN: input_size={self.input_size}, hidden_size={self.hidden_size}, output_size={self.output_size}, learning_rate={self.learning_rate}, epochs={self.epochs}"

### Training and Testing the Neural Network on the MNIST dataset

In [3]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

digits = load_digits()
X, y = digits.data, digits.target
X = X / X.max()
y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

fnn = FNN(
    input_size=64,
    hidden_size=128,
    output_size=10,
    learning_rate=0.01,
    epochs=1000,
    activation="softmax",
)

fnn.fit(X_train, y_train)
print(f'Accuracy: {fnn.evaluate(X_test, y_test, metric="accuracy"):.2f}')

Accuracy: 0.72
