In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

class NeuralNetwork:
    def __init__(self, input_size, hidden_layers, hidden_neurons, output_size, learning_rate=0.01):
        """
        Initialize the neural network.
        :param input_size: Number of input features
        :param hidden_layers: Number of hidden layers (int)
        :param hidden_neurons: Number of neurons in each hidden layer (int)
        :param output_size: Number of output neurons (number of classes)
        :param learning_rate: Learning rate for gradient descent
        """
        self.learning_rate = learning_rate
        self.hidden_layers = hidden_layers
        
        # Initialize weights and biases
        self.weights = []
        self.biases = []
        
        # Input to first hidden layer
        self.weights.append(np.random.randn(input_size, hidden_neurons) * np.sqrt(2. / input_size))
        self.biases.append(np.zeros((1, hidden_neurons)))
        
        # Hidden layers weights
        for _ in range(hidden_layers - 1):
            self.weights.append(np.random.randn(hidden_neurons, hidden_neurons) * np.sqrt(2. / hidden_neurons))
            self.biases.append(np.zeros((1, hidden_neurons)))
        
        # Last hidden layer to output layer
        self.weights.append(np.random.randn(hidden_neurons, output_size) * np.sqrt(2. / hidden_neurons))
        self.biases.append(np.zeros((1, output_size)))
    
    def relu(self, x):
        return np.maximum(0, x)
    
    def relu_derivative(self, x):
        return (x > 0).astype(float)
    
    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
    def forward(self, X):
        """
        Forward pass through the network.
        :param X: Input data
        :return: Output probabilities
        """
        self.z_values = []
        self.a_values = [X]
        
        # Hidden layers
        for i in range(self.hidden_layers):
            z = np.dot(self.a_values[-1], self.weights[i]) + self.biases[i]
            a = self.relu(z)
            self.z_values.append(z)
            self.a_values.append(a)
        
        # Output layer
        z = np.dot(self.a_values[-1], self.weights[-1]) + self.biases[-1]
        self.z_values.append(z)
        a = self.softmax(z)
        self.a_values.append(a)
        
        return a
    
    def compute_loss(self, y_true, y_pred):
        """
        Compute cross-entropy loss.
        :param y_true: True labels (one-hot encoded)
        :param y_pred: Predicted probabilities
        :return: Loss value
        """
        m = y_true.shape[0]
        loss = -np.sum(y_true * np.log(y_pred + 1e-9)) / m
        return loss
    
    def backward(self, y_true):
        """
        Backward pass through the network to update weights and biases.
        :param y_true: True labels (one-hot encoded)
        """
        m = y_true.shape[0]
        grads_w = [None] * len(self.weights)
        grads_b = [None] * len(self.biases)
        
        # Output layer error
        delta = self.a_values[-1] - y_true  # shape (m, output_size)
        
        # Gradient for last layer weights and biases
        grads_w[-1] = np.dot(self.a_values[-2].T, delta) / m
        grads_b[-1] = np.sum(delta, axis=0, keepdims=True) / m
        
        # Backpropagate through hidden layers
        for i in reversed(range(self.hidden_layers)):
            delta = np.dot(delta, self.weights[i+1].T) * self.relu_derivative(self.z_values[i])
            grads_w[i] = np.dot(self.a_values[i].T, delta) / m
            grads_b[i] = np.sum(delta, axis=0, keepdims=True) / m
        
        # Update weights and biases
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * grads_w[i]
            self.biases[i] -= self.learning_rate * grads_b[i]
    
    def train(self, X_train, y_train, epochs=1000, verbose=True):
        """
        Train the neural network.
        :param X_train: Training data features
        :param y_train: Training data labels (one-hot encoded)
        :param epochs: Number of training epochs
        :param verbose: Whether to print loss during training
        """
        for epoch in range(epochs):
            y_pred = self.forward(X_train)
            loss = self.compute_loss(y_train, y_pred)
            self.backward(y_train)
            if verbose and (epoch % 100 == 0 or epoch == epochs - 1):
                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}")
    
    def predict(self, X):
        """
        Predict class labels for input data.
        :param X: Input data
        :return: Predicted class labels
        """
        y_pred = self.forward(X)
        return np.argmax(y_pred, axis=1)

# Example usage with Iris dataset
if __name__ == "__main__":
    # Load Iris dataset
    iris = load_iris()
    X = iris.data
    y = iris.target.reshape(-1, 1)
    
    # One-hot encode the target labels
    encoder = OneHotEncoder(sparse_output=False)
    y_onehot = encoder.fit_transform(y)
    
    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)
    
    # Create neural network instance
    input_size = X_train.shape[1]
    hidden_layers = 1  # Can be changed to more layers if needed
    hidden_neurons = 100
    output_size = y_onehot.shape[1]
    learning_rate = 0.01
    
    nn = NeuralNetwork(input_size, hidden_layers, hidden_neurons, output_size, learning_rate)
    
    # Train the network
    nn.train(X_train, y_train, epochs=1000, verbose=True)
    
    # Predict on test set
    y_pred = nn.predict(X_test)
    y_test_labels = np.argmax(y_test, axis=1)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test_labels, y_pred)
    print(f"Test set accuracy: {accuracy:.4f}")


Epoch 1/1000, Loss: 4.0087
Epoch 101/1000, Loss: 0.6118
Epoch 201/1000, Loss: 0.1337
Epoch 301/1000, Loss: 0.1183
Epoch 401/1000, Loss: 0.1086
Epoch 501/1000, Loss: 0.1018
Epoch 601/1000, Loss: 0.0967
Epoch 701/1000, Loss: 0.0928
Epoch 801/1000, Loss: 0.0896
Epoch 901/1000, Loss: 0.0870
Epoch 1000/1000, Loss: 0.0849
Test set accuracy: 0.9667
