In [2]:
import numpy as np

# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(y):
    return y * (1 - y)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

# Input and output
X = np.array([[1, 1, 0, 1]])
Y = np.array([[1]])

# Learning rate and tolerance
l = 0.69
t = 0.001

# Weights and biases
W1 = np.array([
    [0.3, 0.1, -0.2],
    [-0.2, 0.4, 0.3],
    [0.2, -0.3, 0.1],
    [0.1, 0.4, -0.1]
])
B1 = np.array([[0.2, 0.1, 0.05]])

W2 = np.array([
    [0.3, -0.2],
    [0.1, 0.4],
    [-0.3, 0.2]
])
B2 = np.array([[0.1, -0.2]])

V = np.array([[0.2], [-0.3]])
C = np.array([[0.1]])

e = 0
p = False

while True:
    e += 1

    # Forward pass
    z1 = np.dot(X, W1) + B1
    h1 = relu(z1)

    z2 = np.dot(h1, W2) + B2
    h2 = relu(z2)

    u = np.dot(h2, V) + C
    o = sigmoid(u)

    # Error
    d = Y - o

    # Backpropagation
    do = d * sigmoid_derivative(o)
    dh2 = np.dot(do, V.T) * relu_derivative(z2)
    dh1 = np.dot(dh2, W2.T) * relu_derivative(z1)

    # Update weights
    V += l * np.dot(h2.T, do)
    C += l * do

    W2 += l * np.dot(h1.T, dh2)
    B2 += l * dh2

    W1 += l * np.dot(X.T, dh1)
    B1 += l * dh1

    # First epoch print
    if not p:
        print("\n--- First Epoch ---")
        print("Output:", o.item())
        print("Error:", d.item())
        p = True

    # Stop condition
    if abs(d.item()) < t:
        print("\n--- Last Epoch ---")
        print("Epoch:", e)
        print("Output:", o.item())
        print("Error:", d.item())
        break



--- First Epoch ---
Output: 0.5304622260550769
Error: 0.46953777394492313

--- Last Epoch ---
Epoch: 18192
Output: 0.9990000306062666
Error: 0.0009999693937333687
