In [1]:
import numpy as np

In [2]:
# ---------- Activation Functions ----------
def sigmoid(x):
    x = np.clip(x, -50, 50)
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(y):
    return y * (1 - y)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(y):
    return (y > 0).astype(float)


In [3]:
# ---------- Activation Dictionary ----------
activations = {
    "sigmoid": (sigmoid, sigmoid_derivative),
    "relu": (relu, relu_derivative)
}

# ---------- Input & Target ----------
X = np.array([[1, 1, 0, 1]], dtype=np.float64)   # Sample input
y = np.array([[1]], dtype=np.float64)            # Desired output


In [4]:
# ---------- Hyperparameters ----------
learning_rate = 0.74
tolerance = 1e-3

# ---------- Weight Initialization ----------
rng = np.random.default_rng(42)

W1 = rng.normal(0, 1, (4, 3))
b1 = np.zeros((1, 3))

W2 = rng.normal(0, 1, (3, 2))
b2 = np.zeros((1, 2))

W3 = rng.normal(0, 1, (2, 1))
b3 = np.zeros((1, 1))

# ---------- Activation Selection ----------
act1, d_act1 = activations["relu"]
act2, d_act2 = activations["relu"]
act3, d_act3 = activations["sigmoid"]

In [5]:
# ---------- Training Loop ----------
epoch = 0

while True:
    # ----- Forward Pass -----
    z1 = X.dot(W1) + b1
    h1 = act1(z1)

    z2 = h1.dot(W2) + b2
    h2 = act2(z2)

    z3 = h2.dot(W3) + b3
    o = act3(z3)

    # ----- Error Calculation -----
    error = y - o
    mse = 0.5 * np.mean(error ** 2)

    # ----- Backpropagation -----
    delta_out = (o - y) * d_act3(o)
    delta_h2 = delta_out.dot(W3.T) * d_act2(h2)
    delta_h1 = delta_h2.dot(W2.T) * d_act1(h1)

    # ----- Weight Updates -----
    W3 -= learning_rate * h2.T.dot(delta_out)
    b3 -= learning_rate * delta_out

    W2 -= learning_rate * h1.T.dot(delta_h2)
    b2 -= learning_rate * delta_h2

    W1 -= learning_rate * X.T.dot(delta_h1)
    b1 -= learning_rate * delta_h1

    # ----- Stopping Condition -----
    if mse < tolerance:
        print("Converged at epoch:", epoch)
        print("Final Output (O):", o)
        print("Final Error (MSE):", mse)
        print("\nUpdated W1:\n", W1)
        print("Updated b1:\n", b1)
        print("Updated W2:\n", W2)
        print("Updated b2:\n", b2)
        print("Updated W3:\n", W3)
        print("Updated b3:\n", b3)
        break

    epoch += 1




Converged at epoch: 38
Final Output (O): [[0.95530503]]
Final Error (MSE): 0.0009988201092671605

Updated W1:
 [[ 0.40040796 -1.03998411  1.04724543]
 [ 1.0362556  -1.95103519 -1.00538527]
 [ 0.1278404  -0.31624259 -0.01680116]
 [-0.75735305  0.87939797  1.07458617]]
Updated b1:
 [[0.09569088 0.         0.29679424]]
Updated W2:
 [[ 0.35528704  1.12400792]
 [ 0.46750934 -0.85929246]
 [ 0.77805364 -0.96139341]]
Updated b2:
 [[ 0.57116743 -0.00817777]]
Updated W3:
 [[ 1.28902952]
 [-0.02276028]]
Updated b3:
 [[0.57550188]]
