In [24]:
import numpy as np
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def sigmoid_derivative(y):
    return y * (1 - y)
def tanh(x):
    return np.tanh(x)
def tanh_derivative(y):
    return 1 - y**2
def relu(x):
    return np.maximum(0, x)
def relu_derivative(x):
    return (x > 0).astype(float)
def leaky_relu(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)
def leaky_relu_derivative(x, alpha=0.01):
    return np.where(x > 0, 1, alpha)
def linear(x):
    return x
def linear_derivative(x):
    return np.ones_like(x)
activation = sigmoid
activation_derivative = sigmoid_derivative
X = np.array([[1, 1, 0, 1]])
y = np.array([[1]])
lr = 0.8
tolerance = 0.001
W1 = np.array([
    [0.3, 0.1],
    [-0.2, 0.4],
    [0.2, -0.3],
    [0.1, 0.4]
])
b1 = np.array([[0.2, 0.1]])
W2 = np.array([
    [0.3],
    [0.2]
])
b2 = np.array([[0.3]])
epoch = 0
first_epoch_printed = False
while True:
    epoch += 1
    z1 = np.dot(X, W1) + b1
    h = activation(z1)
    z2 = np.dot(h, W2) + b2
    o = sigmoid(z2)
    error = y - o
    delta_out = error * sigmoid_derivative(o)
    delta_hidden = delta_out.dot(W2.T) * activation_derivative(h)
    W2 += lr * h.T.dot(delta_out)
    b2 += lr * delta_out
    W1 += lr * X.T.dot(delta_hidden)
    b1 += lr * delta_hidden
    if not first_epoch_printed:
        print("\n--- First Epoch ---")
        print("Epoch:", epoch)
        print("Output (o):", o)
        print("Error (D-o):", error)
        print("Updated W1:\n", W1)
        print("Updated b1:\n", b1)
        print("Updated W2:\n", W2)
        print("Updated b2:\n", b2)
        first_epoch_printed = True
    if abs(error[0][0]) < tolerance:
        print("\n--- Last Epoch (converged) ---")
        print("Epoch:", epoch)
        print("Output (o):", o)
        print("Error (D-o):", error)
        print("Updated W1:\n", W1)
        print("Updated b1:\n", b1)
        print("Updated W2:\n", W2)
        print("Updated b2:\n", b2)
        break



--- First Epoch ---
Epoch: 1
Output (o): [[0.65154061]]
Error (D-o): [[0.34845939]]
Updated W1:
 [[ 0.30456184  0.10248872]
 [-0.19543816  0.40248872]
 [ 0.2        -0.3       ]
 [ 0.10456184  0.40248872]]
Updated b1:
 [[0.20456184 0.10248872]]
Updated W2:
 [[0.33789101]
 [0.24626878]]
Updated b2:
 [[0.36329011]]

--- Last Epoch (converged) ---
Epoch: 216600
Output (o): [[0.999]]
Error (D-o): [[0.001]]
Updated W1:
 [[ 0.7541691   0.46217126]
 [ 0.2541691   0.76217126]
 [ 0.2        -0.3       ]
 [ 0.5541691   0.76217126]]
Updated b1:
 [[0.6541691  0.46217126]]
Updated W2:
 [[2.20185207]
 [2.28110474]]
Updated b2:
 [[2.82158791]]
