In [4]:
import numpy as np

In [6]:
W1 = np.random.randn(2, 2) * 0.01
print(W1)

[[ 0.00494835 -0.00898249]
 [ 0.00832973  0.00873026]]


In [7]:
import numpy as np

# XOR data
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])

# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

# Initialize weights and biases
np.random.seed(42)
W1 = np.random.randn(2, 2) * 0.01
b1 = np.zeros((1, 2))
W2 = np.random.randn(2, 1) * 0.01
b2 = np.zeros((1, 1))

# Training parameters
learning_rate = 0.1
max_epochs = 10000
loss_threshold = 0.01

# Training loop
for epoch in range(max_epochs):
    # ----- FORWARD PASS -----
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)  # Output prediction (y_hat)

    # ----- LOSS (Binary Cross-Entropy) -----
    m = y.shape[0]
    loss = -np.mean(y * np.log(A2 + 1e-8) + (1 - y) * np.log(1 - A2 + 1e-8))

    # Optional: print loss every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.5f}")

    # ----- TERMINATION -----
    if loss < loss_threshold:
        print(f"Training stopped at epoch {epoch}, Loss: {loss:.5f}")
        break

    # ----- BACKPROPAGATION -----
    dZ2 = A2 - y
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * sigmoid_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    # ----- UPDATE WEIGHTS -----
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

# Final predictions
Z1 = np.dot(X, W1) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(A1, W2) + b2
y_pred = sigmoid(Z2)

print("\nFinal Predictions (after training):")
print(np.round(y_pred))


Epoch 0, Loss: 0.69315
Epoch 1000, Loss: 0.69315
Epoch 2000, Loss: 0.69315
Epoch 3000, Loss: 0.69315
Epoch 4000, Loss: 0.69315
Epoch 5000, Loss: 0.69315
Epoch 6000, Loss: 0.69315
Epoch 7000, Loss: 0.69315
Epoch 8000, Loss: 0.69315
Epoch 9000, Loss: 0.69315

Final Predictions (after training):
[[1.]
 [0.]
 [1.]
 [0.]]


In [8]:
def predict(x):
    z1 = np.dot(x, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)
    return np.round(a2)

# Try new input
print("Predict [1, 1]:", predict(np.array([[1, 1]])))
print("Predict [0, 1]:", predict(np.array([[0, 1]])))


Predict [1, 1]: [[0.]]
Predict [0, 1]: [[0.]]
