# Section 1: Gradiant Checking

Step 0: imorting lib

In [7]:
import numpy as np
from lib import Network
from lib.layers import Dense
from lib.activations import Tanh
from lib.losses import Loss_MSE

Step 1: Checking Gradiant

In [None]:
def check_gradients():
    print("--- Starting Gradient Checking ---")
    
    # 1. Setup a small dummy network
    input_size = 2
    output_size = 1
    
    # Simple network: Input -> Dense -> Tanh -> Output
    layer = Dense(input_size, output_size)
    activation = Tanh()
    loss_fn = Loss_MSE()
    
    # Dummy data
    x = np.array([[0.5, -0.2]]) # One sample
    y = np.array([[0.1]])       # One target
    
    # 2. Forward & Backward Pass (Analytical Gradient)
    # Forward
    z = layer.forward(x)
    a = activation.forward(z)
    loss = loss_fn.forward(y, a)
    
    # Backward
    grad_loss = loss_fn.backward(y, a)
    grad_activation = activation.backward(grad_loss)
    grad_layer = layer.backward(grad_activation)
    
    # The gradient we want to check is dL/dW inside the Dense layer
    analytical_gradient = layer.weights_gradient
    
    # 3. Numerical Gradient Calculation
    # Formula: (Loss(W+epsilon) - Loss(W-epsilon)) / (2*epsilon)
    epsilon = 1e-4
    numerical_gradient = np.zeros_like(layer.weights)
    
    # Iterate over every single weight
    rows, cols = layer.weights.shape
    for i in range(rows):
        for j in range(cols):
            # Save original weight
            original_weight = layer.weights[i, j]
            
            # Plus Epsilon
            layer.weights[i, j] = original_weight + epsilon
            z_plus = layer.forward(x)
            a_plus = activation.forward(z_plus)
            loss_plus = loss_fn.forward(y, a_plus)
            
            # Minus Epsilon
            layer.weights[i, j] = original_weight - epsilon
            z_minus = layer.forward(x)
            a_minus = activation.forward(z_minus)
            loss_minus = loss_fn.forward(y, a_minus)
            
            # Calculate numerical derivative
            numerical_gradient[i, j] = (loss_plus - loss_minus) / (2 * epsilon)
            
            # Restore original weight
            layer.weights[i, j] = original_weight

    # 4. Compare
    print("Analytical Gradient:\n", analytical_gradient)
    print("Numerical Gradient:\n", numerical_gradient)
    
    # Calculate relative error
    numerator = np.linalg.norm(analytical_gradient - numerical_gradient)
    denominator = np.linalg.norm(analytical_gradient) + np.linalg.norm(numerical_gradient)
    relative_error = numerator / denominator

    print(f"\nRelative Error: {relative_error}")
    
    if relative_error < 1e-5:
        print("[SUCCESS] Gradients match! Backpropagation is correct.")
    else:
        print("[WARNING] Gradients do not match. Check backward formulas.")

# Run the check
check_gradients()

--- Starting Gradient Checking ---
Analytical Gradient:
 [[-0.28111934]
 [ 0.11244773]]
Numerical Gradient:
 [[-0.28111934]
 [ 0.11244773]]

Relative Error: 1.081162651403961e-09
[SUCCESS] Gradients match! Backpropagation is correct.


# Section 2: Solving XOR Problem

Step 0: importing libraries and my library

In [1]:
import sys
import os

# Add the parent directory (project root) to sys.path
sys.path.append(os.path.abspath('..'))

import numpy as np
import matplotlib.pyplot as plt
from lib.network import Network
from lib.layers import Dense
from lib.activations import Tanh, Sigmoid
from lib.losses import Loss_MSE
from lib.optimizer import SGD

Step 1: Data

In [2]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

Step 2: Model

In [3]:
# Initialize Network
model = Network()

# Build the 2-4-1 Architecture 
# Layer 1: Dense (Input 2 -> Hidden 4)
model.add(Dense(2, 4))
# Activation 1: Tanh
model.add(Tanh())

# Layer 2: Dense (Hidden 4 -> Output 1)
model.add(Dense(4, 1))
# Activation 2: Sigmoid (Output between 0 and 1)
model.add(Sigmoid())

Step 3: Training

In [4]:
# Define Loss and Optimizer
loss_function = Loss_MSE()
optimizer = SGD(learning_rate=0.1) 

# Compile the model
model.use(loss_function, optimizer)

# Train the network
# 10,000 epochs is usually enough for XOR to converge
print("Starting Training...")
model.train(X, y, epochs=10000)
print("Training Complete.")

Starting Training...
Epoch 100/10000 error=0.24070816695057243
Epoch 200/10000 error=0.21216375263817083
Epoch 300/10000 error=0.17758403387204919
Epoch 400/10000 error=0.13878605450350057
Epoch 500/10000 error=0.1015137696656771
Epoch 600/10000 error=0.07202243450940929
Epoch 700/10000 error=0.05160046712753476
Epoch 800/10000 error=0.038150160156227776
Epoch 900/10000 error=0.02925258092186219
Epoch 1000/10000 error=0.0231990456170884
Epoch 1100/10000 error=0.018935696981204148
Epoch 1200/10000 error=0.015830221637104226
Epoch 1300/10000 error=0.013498420769598295
Epoch 1400/10000 error=0.011700472376631906
Epoch 1500/10000 error=0.010281986450781537
Epoch 1600/10000 error=0.009140496153269239
Epoch 1700/10000 error=0.008206032269150579
Epoch 1800/10000 error=0.007429561097536048
Epoch 1900/10000 error=0.006775907229115115
Epoch 2000/10000 error=0.006219301144691454
Epoch 2100/10000 error=0.005740504666799353
Epoch 2200/10000 error=0.0053249099620346855
Epoch 2300/10000 error=0.00496

Step 4: Demonstrate Predictions

In [5]:
print("\n--- Final XOR Predictions ---")
output = model.predict(X)

for i in range(len(X)):
    input_val = X[i]
    true_val = y[i]
    pred_val = output[i]
    
    # Round prediction to 0 or 1 for clarity
    class_pred = 1 if pred_val > 0.5 else 0
    
    print(f"Input: {input_val} | True: {true_val} | Pred: {pred_val[0]:.4f} | Class: {class_pred}")

# Verification
if (output[0]<0.1 and output[1]>0.9 and output[2]>0.9 and output[3]<0.1):
    print("\n[SUCCESS] The network solved XOR!")
else:
    print("\n[FAILURE] The network did not converge. Try adjusting the learning rate or weights.")


--- Final XOR Predictions ---
Input: [0 0] | True: [0] | Pred: 0.0135 | Class: 0
Input: [0 1] | True: [1] | Pred: 0.9698 | Class: 1
Input: [1 0] | True: [1] | Pred: 0.9732 | Class: 1
Input: [1 1] | True: [0] | Pred: 0.0310 | Class: 0

[SUCCESS] The network solved XOR!
