# Computational Intelligence Project: Custom NN Library
## Part 1 Submission: Core Library Validation

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
from lib.network import Sequential
from lib.layers import Dense, BaseLayer
from lib.activations import ReLU, Tanh, Sigmoid
from lib.losses import MeanSquaredError
from lib.optimizer import SGD




In [None]:

# --- Gradient Check Function ---
def check_gradients(model, X, Y, epsilon=1e-7):
    """
    Compares analytical gradients (from backprop) with numerical gradients (finite difference).
    The relative error should be below 1e-7 for correctness.
    """
    # 1. Run forward and backward pass to calculate analytical gradients (dA, db)
    Y_pred = model.forward(X)
    model.backward(Y_pred, Y)

    for idx, layer in enumerate(model.layers):
        # Only check layers with parameters (Dense)
        if not hasattr(layer, 'A'):
            continue

        print(f"\nChecking Layer {idx+1} ({layer.__class__.__name__})...")

        # --- Check Weights (A) ---
        num_grad_A = np.zeros_like(layer.A)
        it = np.nditer(layer.A, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            i = it.multi_index
            orig = layer.A[i]

            # L(W + epsilon)
            layer.A[i] = orig + epsilon
            L_plus = model.loss_function.loss(model.forward(X), Y)

            # L(W - epsilon)
            layer.A[i] = orig - epsilon
            L_minus = model.loss_function.loss(model.forward(X), Y)

            # Restore original value
            layer.A[i] = orig

            # Numerical gradient approximation
            num_grad_A[i] = (L_plus - L_minus) / (2 * epsilon)
            it.iternext()

        # --- Check Biases (b) ---
        num_grad_b = np.zeros_like(layer.b)
        it = np.nditer(layer.b, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            i = it.multi_index
            orig = layer.b[i]

            # L(b + epsilon)
            layer.b[i] = orig + epsilon
            L_plus = model.loss_function.loss(model.forward(X), Y)

            # L(b - epsilon)
            layer.b[i] = orig - epsilon
            L_minus = model.loss_function.loss(model.forward(X), Y)

            # Restore original value
            layer.b[i] = orig

            # Numerical gradient approximation
            num_grad_b[i] = (L_plus - L_minus) / (2 * epsilon)
            it.iternext()

        # Compare analytical vs numerical (Relative Error)
        # Added 1e-12 to denominator to prevent division by zero if all values are zero
        diff_A = np.linalg.norm(num_grad_A - layer.dA) / (np.linalg.norm(num_grad_A) + np.linalg.norm(layer.dA) + 1e-12)
        diff_b = np.linalg.norm(num_grad_b - layer.db) / (np.linalg.norm(num_grad_b) + np.linalg.norm(layer.db) + 1e-12)

        print(f"  Weight Gradient Diff (A): {diff_A:.10f}")
        print(f"  Bias Gradient Diff (b):   {diff_b:.10f}")

        if diff_A < 1e-7 and diff_b < 1e-7:
            print("  Status: Backprop Correct ✅")
        else:
            print("  Status: Check Gradients ⚠️ (Error too high)")

# --- Build Simple Test Network for Gradient Check (2-2-1) ---
grad_check_model = Sequential()
grad_check_model.add(Dense(input_size=2, output_size=2, init_scale=0.1))
grad_check_model.add(Tanh())
grad_check_model.add(Dense(input_size=2, output_size=1, init_scale=0.1))
grad_check_model.add(Tanh())
grad_check_model.compile(optimizer=SGD(learning_rate=0.01), loss_function=MeanSquaredError())

# --- Run Gradient Check ---
# Using a single sample for simplicity
X_check = np.array([[-1.0], [0.5]]) 
Y_check = np.array([[-1.0]])       

print("Starting Gradient Check on Test Network...")
check_gradients(grad_check_model, X_check, Y_check)

In [None]:
# --- XOR Dataset (Using -1 and 1) ---
X_xor = np.array([[-1, -1,  1,  1],  # Input: 2 features, 4 samples
                  [-1,  1, -1,  1]])

Y_xor = np.array([[-1,  1,  1, -1]]) # Target: -1 or 1

# --- Build XOR Network (2-4-1) ---
xor_model = Sequential()
xor_model.add(Dense(input_size=2, output_size=4, init_scale=0.1))
xor_model.add(Tanh()) # Hidden Layer Activation
xor_model.add(Dense(input_size=4, output_size=1, init_scale=0.1))
xor_model.add(Tanh()) # Output Layer Activation (suitable for -1/1 targets)

# --- Training Hyperparameters & Compile ---
LR = 0.5 # Higher learning rate for faster convergence
EPOCHS = 10000 
xor_model.compile(optimizer=SGD(learning_rate=LR), loss_function=MeanSquaredError())


# --- Run Training ---
print("\n==============================")
print("Starting XOR Training (2-4-1)")
print("==============================")
print(f"Hyperparameters: LR={LR}, Epochs={EPOCHS}")

loss_history = xor_model.train(X_xor, Y_xor, epochs=EPOCHS)


# --- Predictions ---
Y_pred_raw = xor_model.predict(X_xor)
# Classification for -1/1 targets: values >= 0 classify as 1, values < 0 classify as -1
Y_pred_class = np.where(Y_pred_raw >= 0, 1, -1)


# --- Display Results ---
print("\nFinal XOR Predictions:")
print("------------------------")
print("Input (X):\n", X_xor.T)
print("Target (Y):\n", Y_xor.T)
print("\nRaw Prediction (Tanh Output):\n", Y_pred_raw.T)
print("Classified Prediction:\n", Y_pred_class.T)
print(f"\nFinal Loss: {loss_history[-1]:.6f}")


# --- Plot Loss Curve ---
plt.figure(figsize=(8,4))
plt.plot(loss_history)
plt.title("XOR Training Loss Curve (MSE)")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.grid(True)
plt.show()

print("\nConclusion: If the final loss is near zero and classified predictions match the target, the network successfully solved XOR.")