#### Forward pass

In [4]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def forward(X, W1, B1, W2, B2):
    Z1 = np.dot(X, W1) + B1
    A1 = tanh(Z1)
    Z2 = np.dot(A1, W2) + B2
    A2 = sigmoid(Z2)
    print("Z1", Z1.shape)
    print("A1", A1.shape)
    print("Z2", Z2.shape)
    print("A2", A2.shape)

    return A2

np.random.seed(39)

X = np.array([[0, 0],[0, 1],[1, 0],[1, 1],]) # 2 input feature
y = np.array([[0], [1], [1], [0]])

W1 = np.random.randn(2,2)
B1 = np.zeros((1,2))
W2 = np.random.randn(2, 1)
B2 = np.zeros((1, 1))

pridict = forward(X, W1, B1, W2, B2)
print(*pridict)
print("\n",*y)

Z1 (4, 2)
A1 (4, 2)
Z2 (4, 1)
A2 (4, 1)
[0.5] [0.55219041] [0.74351901] [0.78891869]

 [0] [1] [1] [0]


In [26]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def binary_cross_entropy(y_true, y_pred):
    epsilon = 1e-8
    return -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))

# Data
X = np.array([[0, 0],[0, 1],[1, 0],[1, 1]])
y = np.array([[0], [1], [1], [0]])

# Initialize weights
np.random.seed(39)
W1 = np.random.randn(2, 2)
B1 = np.zeros((1, 2))
W2 = np.random.randn(2, 1)
B2 = np.zeros((1, 1))

# Training parameters
lr = 0.1
epochs = 10000

# Training loop
for epoch in range(epochs):
    # Forward pass
    Z1 = np.dot(X, W1) + B1
    A1 = tanh(Z1)
    Z2 = np.dot(A1, W2) + B2
    A2 = sigmoid(Z2)

    # Compute loss
    loss = binary_cross_entropy(y, A2)

    # Backpropagation
    dZ2 = A2 - y                         # Derivative of loss w.r.t Z2
    dW2 = np.dot(A1.T, dZ2) / X.shape[0]
    dB2 = np.sum(dZ2, axis=0, keepdims=True) / X.shape[0]

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * tanh_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / X.shape[0]
    dB1 = np.sum(dZ1, axis=0, keepdims=True) / X.shape[0]

    # Update weights and biases
    W1 -= lr * dW1
    B1 -= lr * dB1
    W2 -= lr * dW2
    B2 -= lr * dB2

    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch} Loss: {loss:.4f}")

# Final prediction
print("\nFinal Predictions:")
print(*A2.round())
print(*y)

Epoch 0 Loss: 0.7847
Epoch 1000 Loss: 0.0815
Epoch 2000 Loss: 0.0228
Epoch 3000 Loss: 0.0131
Epoch 4000 Loss: 0.0092
Epoch 5000 Loss: 0.0071
Epoch 6000 Loss: 0.0057
Epoch 7000 Loss: 0.0048
Epoch 8000 Loss: 0.0042
Epoch 9000 Loss: 0.0037

Final Predictions:
[0.] [1.] [1.] [0.]
[0] [1] [1] [0]
