<a href="https://colab.research.google.com/github/sushanttwayana/ML-DL-Strategies-Toolkit/blob/main/Optimizing_a_neural_network_with_backward_propagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

In [2]:
# Sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [3]:
# Neural network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights and biases randomly
        self.weights_input_hidden = np.random.randn(self.input_size, self.hidden_size)
        self.bias_input_hidden = np.random.randn(1, self.hidden_size)

        self.weights_hidden_output = np.random.randn(self.hidden_size, self.output_size)
        self.bias_hidden_output = np.random.randn(1, self.output_size)

    def forward(self, X):
        # Forward propagation
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_input_hidden
        self.hidden_output = sigmoid(self.hidden_input)

        self.output = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_hidden_output
        output_predicted = sigmoid(self.output)
        return output_predicted

    def backward(self, X, y, output_predicted, learning_rate):
        # Backward propagation
        error = y - output_predicted
        d_output = error * sigmoid_derivative(output_predicted)

        error_hidden = d_output.dot(self.weights_hidden_output.T)
        d_hidden = error_hidden * sigmoid_derivative(self.hidden_output)

        # Update weights and biases
        self.weights_hidden_output += self.hidden_output.T.dot(d_output) * learning_rate
        self.bias_hidden_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate

        self.weights_input_hidden += X.T.dot(d_hidden) * learning_rate
        self.bias_input_hidden += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            output_predicted = self.forward(X)
            self.backward(X, y, output_predicted, learning_rate)
            if epoch % 1000 == 0:
                loss = np.mean(np.square(y - output_predicted))
                print(f'Epoch {epoch}, Loss: {loss:.4f}')



In [4]:
# Sample data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Initialize neural network
input_size = X.shape[1]
hidden_size = 4
output_size = 1

# Create a neural network instance
nn = NeuralNetwork(input_size, hidden_size, output_size)

# Train the neural network
nn.train(X, y, epochs=10000, learning_rate=0.1)

Epoch 0, Loss: 0.2649
Epoch 1000, Loss: 0.2312
Epoch 2000, Loss: 0.1785
Epoch 3000, Loss: 0.1409
Epoch 4000, Loss: 0.0367
Epoch 5000, Loss: 0.0136
Epoch 6000, Loss: 0.0076
Epoch 7000, Loss: 0.0051
Epoch 8000, Loss: 0.0038
Epoch 9000, Loss: 0.0030
