Import the necessary libraries:

In [None]:
import numpy as np

Define the activation function and its derivative. Let's use the sigmoid function:

In [None]:
def sigmoid(x):
    s = 1 / (1 + np.exp(-x))
    return s

def sigmoid_derivative(x):
    s = sigmoid(x)
    ds = s * (1 - s)
    return ds


Define the forward pass function, which calculates the output of the neural network given the input and the current weights and biases:

In [None]:
def forward_pass(X, W1, b1, W2, b2):

    # Calculate the weighted sum and activation of the hidden layer
    hidden_sum = np.dot(X, W1) + b1
    hidden_output = sigmoid(hidden_sum)

    # Calculate the weighted sum and activation of the output layer
    output_sum = np.dot(hidden_output, W2) + b2
    output = sigmoid(output_sum)

    return hidden_output, output


Define the backward pass function, which calculates the gradients of the loss with respect to the weights and biases using the chain rule:

In [None]:
def backward_pass(X, y, hidden_output, output, W2):
    # Calculate the error at the output layer
    output_error = y - output

    # Calculate the gradient of the loss with respect to the weights and biases
    #output_gradient = np.dot(output_error, hidden_output.T)
    output_gradient = output_error * sigmoid_derivative(output)
    #hidden_gradient = np.dot(np.dot(W2.T, output_error) * sigmoid_derivative(hidden_output), X.T)
    hidden_gradient = np.dot(output_gradient, W2.T) * sigmoid_derivative(hidden_output)

    return output_gradient, hidden_gradient


Define the weight update function, which adjusts the weights and biases based on the calculated gradients and a learning rate:
python

In [None]:
def weight_update(X, hidden_output, output, output_gradient, hidden_gradient, W1, b1, W2, b2, learning_rate):

    # Update the weights and biases of the output layer
    W2 += np.dot(hidden_output.T, output_gradient) * learning_rate
    b2 += np.sum(output_gradient, axis=0) * learning_rate

    # Update the weights and biases of the hidden layer
    W1 += np.dot(X.T, hidden_gradient) * learning_rate
    b1 += np.sum(hidden_gradient, axis=0) * learning_rate

    return W1, b1, W2, b2


Now, let's test the implementation by training the neural network on a simple dataset. Initialize the weights and biases, and define the training loop:

In [None]:
# Dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Initialize weights and biases
W1 = np.random.randn(2, 4)  # Weights of the hidden layer
b1 = np.zeros((1, 4))  # Biases of the hidden layer
W2 = np.random.randn(4, 1)  # Weights of the output layer
b2 = np.zeros((1, 1))  # Biases of the output layer

# Training loop
epochs = 10000
learning_rate = 0.1
for epoch in range(epochs):
    # Forward pass
    hidden_output, output = forward_pass(X, W1, b1, W2, b2)

    # Backward pass
    output_gradient, hidden_gradient = backward_pass(X, y, hidden_output, output, W2)

    # Weight update
    W1, b1, W2, b2 = weight_update(X, hidden_output, output, output_gradient, hidden_gradient,
                                  W1, b1, W2, b2, learning_rate)

# Print the final output after training
_, output = forward_pass(X, W1, b1, W2, b2)
print("Final Output:")
print(output)


Final Output:
[[0.24357279]
 [0.71233379]
 [0.72307707]
 [0.25187655]]
