Object - Hyperparameter Tuning in BPNN(Backpropagation Neural Network

In [2]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights
        self.weights_input_hidden = np.random.randn(self.input_size, self.hidden_size)
        self.weights_hidden_output = np.random.randn(self.hidden_size, self.output_size)

        # Initialize the biases
        self.bias_hidden = np.zeros((1, self.hidden_size))
        self.bias_output = np.zeros((1, self.output_size))

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return (x > 0).astype(float)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def cross_entropy_loss(self, y, y_hat):
        return -np.mean(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))

    def feedforward(self, X):
        # Input to hidden
        self.hidden_activation = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = self.relu(self.hidden_activation)

        # Hidden to output
        self.output_activation = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.predicted_output = self.sigmoid(self.output_activation)

        return self.predicted_output

    def backward(self, X, y, learning_rate):
        # Compute the output layer error
        output_error = y - self.predicted_output
        output_delta = output_error * self.sigmoid_derivative(self.predicted_output)

        # Compute the hidden layer error
        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.relu_derivative(self.hidden_output)

        # Update weights and biases
        self.weights_hidden_output += np.dot(self.hidden_output.T, output_delta) * learning_rate
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * learning_rate
        self.weights_input_hidden += np.dot(X.T, hidden_delta) * learning_rate
        self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            output = self.feedforward(X)
            self.backward(X, y, learning_rate)
            if epoch % 1000 == 0:
                loss = self.cross_entropy_loss(y, output)
                print(f"Epoch {epoch}, Loss: {loss}")

    def evaluate(self, X, y):
        output = self.feedforward(X)
        predictions = (output > 0.5).astype(int)
        accuracy = np.mean(predictions == y)
        return accuracy


# Function to tune hyperparameters
def hyperparameter_tuning(X, y):
    # Define the hyperparameter search space
    hidden_sizes = [2, 4, 6, 8]  # Number of hidden neurons
    learning_rates = [0.01, 0.05, 0.1, 0.2]  # Learning rates
    epochs = [5000, 10000]  # Number of training epochs

    best_accuracy = 0
    best_params = {}

    # Grid search over all combinations of hyperparameters
    for hidden_size in hidden_sizes:
        for learning_rate in learning_rates:
            for epoch in epochs:
                print(f"Training with hidden_size={hidden_size}, learning_rate={learning_rate}, epochs={epoch}")
                
                # Initialize the neural network with the current hyperparameters
                nn = NeuralNetwork(input_size=2, hidden_size=hidden_size, output_size=1)
                
                # Train the neural network
                nn.train(X, y, epochs=epoch, learning_rate=learning_rate)
                
                # Evaluate the model
                accuracy = nn.evaluate(X, y)
                print(f"Accuracy: {accuracy}")

                # Keep track of the best hyperparameters based on accuracy
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = {
                        'hidden_size': hidden_size,
                        'learning_rate': learning_rate,
                        'epochs': epoch
                    }

    print("Best Hyperparameters:", best_params)
    print("Best Accuracy:", best_accuracy)

# Example XOR problem
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Perform hyperparameter tuning
hyperparameter_tuning(X, y)


Training with hidden_size=2, learning_rate=0.01, epochs=5000
Epoch 0, Loss: 0.7989019611082255
Epoch 1000, Loss: 0.6978367348315275
Epoch 2000, Loss: 0.6950709723626467
Epoch 3000, Loss: 0.6940659971732196
Epoch 4000, Loss: 0.6936200014770554
Accuracy: 0.5
Training with hidden_size=2, learning_rate=0.01, epochs=10000
Epoch 0, Loss: 0.6485456547489131
Epoch 1000, Loss: 0.5438858495476728
Epoch 2000, Loss: 0.5103580290931646
Epoch 3000, Loss: 0.4990531386902628
Epoch 4000, Loss: 0.4938486097970386
Epoch 5000, Loss: 0.49088241943392896
Epoch 6000, Loss: 0.48896214362620427
Epoch 7000, Loss: 0.4876039231113567
Epoch 8000, Loss: 0.48657627143035465
Epoch 9000, Loss: 0.4858046512229521
Accuracy: 0.75
Training with hidden_size=2, learning_rate=0.05, epochs=5000
Epoch 0, Loss: 0.645708320036906
Epoch 1000, Loss: 0.11267879345539125
Epoch 2000, Loss: 0.06452715789653268
Epoch 3000, Loss: 0.04824327515223874
Epoch 4000, Loss: 0.03968654890136725
Accuracy: 1.0
Training with hidden_size=2, learnin