# Task 33

In [3]:
import numpy as np
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Initialize data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])  

# Initialize weights and biases
input_layer_neurons = X.shape[1]
hidden_layer_neurons = 2
output_neurons = 1

np.random.seed(42)
weights_input_hidden = np.random.uniform(size=(input_layer_neurons, hidden_layer_neurons))
bias_hidden = np.random.uniform(size=(1, hidden_layer_neurons))
weights_hidden_output = np.random.uniform(size=(hidden_layer_neurons, output_neurons))
bias_output = np.random.uniform(size=(1, output_neurons))

# Hyperparameters
learning_rate = 0.1
epochs = 10000

# Training 
for epoch in range(epochs):
    # Forward pass
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_output = relu(hidden_layer_input)
    
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    predicted_output = output_layer_input  

    # Computing loss
    loss = np.mean((y - predicted_output) ** 2)
    
    # Backward pass 
    error = y - predicted_output
    d_predicted_output = -2 * error  
    
    # Gradients for weights and biases
    weights_hidden_output_gradient = np.dot(hidden_layer_output.T, d_predicted_output)
    bias_output_gradient = np.sum(d_predicted_output, axis=0, keepdims=True)
    
    hidden_layer_error = np.dot(d_predicted_output, weights_hidden_output.T)
    hidden_layer_gradient = hidden_layer_error * relu_derivative(hidden_layer_output)
    
    weights_input_hidden_gradient = np.dot(X.T, hidden_layer_gradient)
    bias_hidden_gradient = np.sum(hidden_layer_gradient, axis=0, keepdims=True)
    
    # Update weights and biases
    weights_hidden_output -= learning_rate * weights_hidden_output_gradient
    bias_output -= learning_rate * bias_output_gradient
    
    weights_input_hidden -= learning_rate * weights_input_hidden_gradient
    bias_hidden -= learning_rate * bias_hidden_gradient
    
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")

# Final prediction 
print("Final predicted output: ", predicted_output)


Epoch 0, Loss: 1.4068559462238825
Epoch 1000, Loss: 0.16666666666666669
Epoch 2000, Loss: 0.16666666666666669
Epoch 3000, Loss: 0.16666666666666669
Epoch 4000, Loss: 0.16666666666666669
Epoch 5000, Loss: 0.16666666666666669
Epoch 6000, Loss: 0.16666666666666669
Epoch 7000, Loss: 0.16666666666666669
Epoch 8000, Loss: 0.16666666666666669
Epoch 9000, Loss: 0.16666666666666669
Final predicted output:  [[6.66666667e-01]
 [6.66666667e-01]
 [6.66666667e-01]
 [2.22044605e-16]]
