In [10]:
# Section 1: Gradient Checking
import numpy as np
from network import Network
from layers import Dense
from activations import Sigmoid, Tanh
from losses import MSE

# XOR dataset
X = np.array([[0,0],
              [0,1],
              [1,0],
              [1,1]])
y = np.array([[0],
              [1],
              [1],
              [0]])

# Small network for XOR
model = Network([
    Dense(2, 4),
    Tanh(),
    Dense(4, 1),
    Sigmoid()
])

loss_fn = MSE()
epsilon = 1e-5

# Forward pass
pred = model.forward(X)
loss = loss_fn.forward(y, pred)

# Backward pass to compute analytical gradients
grad_loss = loss_fn.backward(y, pred)
model.backward(grad_loss)

# Pick first layer weights for demonstration
W = model.layers[0].W
grad_analytic = model.layers[0].dW  # now this should not be None

# Compute numerical gradient
numerical_grad = np.zeros_like(W)
for i in range(W.shape[0]):
    for j in range(W.shape[1]):
        W[i,j] += epsilon
        loss_plus = loss_fn.forward(y, model.forward(X))
        W[i,j] -= 2*epsilon
        loss_minus = loss_fn.forward(y, model.forward(X))
        W[i,j] += epsilon  # reset

        numerical_grad[i,j] = (loss_plus - loss_minus) / (2*epsilon)

# Compare with analytical gradient
print("Analytical Gradient:\n", grad_analytic)
print("Numerical Gradient:\n", numerical_grad)
diff = np.linalg.norm(grad_analytic - numerical_grad) / (np.linalg.norm(grad_analytic) + np.linalg.norm(numerical_grad))
print("Relative Difference:", diff)
if diff < 1e-7:
    print("Gradient check passed!")
else:
    print("Gradient check failed.")


Analytical Gradient:
 [[-0.07902713  0.03102881 -0.03066677  0.01235341]
 [-0.02321936  0.02022031 -0.0303712   0.01507246]]
Numerical Gradient:
 [[-0.07902713  0.03102881 -0.03066677  0.01235341]
 [-0.02321936  0.02022031 -0.0303712   0.01507246]]
Relative Difference: 3.8799112938026197e-11
Gradient check passed!


In [11]:
#Section 2: The XOR problem (training and results) using my libraries
import numpy as np
from network import Network
from layers import Dense
from activations import Sigmoid, Tanh
from losses import MSE
from optimizer import SGD



# XOR dataset
X = np.array([[0,0],
              [0,1],
              [1,0],
              [1,1]])

y = np.array([[0],
              [1],
              [1],
              [0]])

# Build network: 2 → 4 → 1
#2 input neurons (x1,x2), 4 hidden neurons, 1 output neuron
model = Network([
    Dense(2, 4),
    #tanh provide non linearity to the 4 hidden neurons
    Tanh(),
    #takes 4 inputs from the hidden layer and outputs 0 or 1
    Dense(4, 1),
    #sigmoid outputs values between 0 and 1
    Sigmoid()
])

loss_fn = MSE()
optimizer = SGD(lr=0.1)

# Training loop, 5000 iterations
for epoch in range(5000):
    pred = model.forward(X) #y^
    loss = loss_fn.forward(y, pred)
    grad = loss_fn.backward(y, pred) #dL/dy^
    model.backward(grad) #backpropagate through the network
    model.update(optimizer)
#print loss each 50 iterations
    if epoch % 500 == 0:
        print(f"Epoch {epoch}, Loss = {loss:.5f}")

# Final predictions
print("\nFinal Predictions:")
print(model.forward(X))

Epoch 0, Loss = 0.31052
Epoch 500, Loss = 0.09946
Epoch 1000, Loss = 0.02994
Epoch 1500, Loss = 0.01284
Epoch 2000, Loss = 0.00742
Epoch 2500, Loss = 0.00503
Epoch 3000, Loss = 0.00374
Epoch 3500, Loss = 0.00295
Epoch 4000, Loss = 0.00242
Epoch 4500, Loss = 0.00205

Final Predictions:
[[0.02308154]
 [0.95173663]
 [0.95966229]
 [0.05075619]]
