## Python program using NumPy that implements three different neural networks:

- No Hidden Layer: Logistic regression-like model.
- One Hidden Layer: Single-layer neural network with ReLU activation.
- Two Hidden Layers: Two-layer neural network with ReLU activation for hidden layers.

Each implementation includes forward and backward propagation. The models are trained using the gradient descent algorithm. The code is vectorized using NumPy to improve performance.

This program demonstrates:

- Initialization of weights and biases for different network configurations.
- Forward propagation for computing activations layer-by-layer.
- Backward propagation for calculating gradients and updating weights.
- Training and prediction for evaluating the model.

You can modify the number of layers and nodes in the network by adjusting the layers and activations variables. The training example uses a simple circle pattern for classification.

In [None]:
from IPython.display import clear_output

In [None]:
# Install the necessary packages
%pip install numpy matplotlib

clear_output()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

## Methods Used:
Some of the methods used in this program are:
- `initialize_parameters`: Initializes weights and biases for each layer.
- `sigmoid`: Computes the sigmoid activation function.
- `relu`: Computes the ReLU activation function.
- `forward_propagation`: Computes the forward propagation for the network.
- `compute_cost`: Computes the cross-entropy loss.
- `backward_propagation`: Computes the backward propagation for the network.
- `update_parameters`: Updates the weights and biases using the gradients.
- `train_model`: Trains the neural network using gradient descent.
- `predict`: Predicts the output for a given input.

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def initialize_weights(layers):
    weights = {}
    for i in range(len(layers) - 1):
        weights[f"W{i+1}"] = np.random.randn(layers[i], layers[i+1]) * 0.01
        weights[f"b{i+1}"] = np.zeros((1, layers[i+1]))
    return weights

def forward_propagation(X, weights, layers, activations):
    caches = {"A0": X}
    for i in range(1, len(layers)):
        Z = caches[f"A{i-1}"].dot(weights[f"W{i}"]) + weights[f"b{i}"]
        if activations[i-1] == "relu":
            A = relu(Z)
        elif activations[i-1] == "sigmoid":
            A = sigmoid(Z)
        caches[f"Z{i}"] = Z
        caches[f"A{i}"] = A
    return caches

def backward_propagation(Y, weights, caches, layers, activations):
    gradients = {}
    m = Y.shape[0]
    dA = -(Y / caches[f"A{len(layers)-1}"] - (1 - Y) / (1 - caches[f"A{len(layers)-1}"]))
    for i in range(len(layers) - 1, 0, -1):
        dZ = dA
        if activations[i-1] == "sigmoid":
            dZ *= sigmoid_derivative(caches[f"Z{i}"])
        elif activations[i-1] == "relu":
            dZ *= relu_derivative(caches[f"Z{i}"])
        gradients[f"dW{i}"] = caches[f"A{i-1}"].T.dot(dZ) / m
        gradients[f"db{i}"] = np.sum(dZ, axis=0, keepdims=True) / m
        dA = dZ.dot(weights[f"W{i}"].T)
    return gradients

def update_weights(weights, gradients, learning_rate):
    for key in weights.keys():
        weights[key] -= learning_rate * gradients[f"d{key}"]
    return weights

def train(X, Y, layers, activations, epochs=1000, learning_rate=0.01):
    weights = initialize_weights(layers)
    losses = []
    for epoch in range(epochs):
        caches = forward_propagation(X, weights, layers, activations)
        gradients = backward_propagation(Y, weights, caches, layers, activations)
        weights = update_weights(weights, gradients, learning_rate)
        if epoch % 100 == 0:
            loss = -np.mean(Y * np.log(caches[f"A{len(layers)-1}"]) + (1 - Y) * np.log(1 - caches[f"A{len(layers)-1}"]))
            losses.append(loss)
            print(f"Epoch {epoch}, Loss: {loss}")
    return weights, losses

def predict(X, weights, layers, activations):
    caches = forward_propagation(X, weights, layers, activations)
    predictions = caches[f"A{len(layers)-1}"] > 0.5
    return predictions

def plot_decision_boundary(X, Y, weights, layers, activations):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200), np.linspace(y_min, y_max, 200))
    grid = np.c_[xx.ravel(), yy.ravel()]
    predictions = predict(grid, weights, layers, activations).reshape(xx.shape)
    plt.contourf(xx, yy, predictions, alpha=0.8, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], c=Y.flatten(), edgecolor='k', cmap=plt.cm.Spectral)
    plt.title("Decision Boundary")
    plt.show()