In [206]:
import sys
import os

sys.path.append(os.path.abspath(".."))

from src.activations import ActivationType, get_activation
from src.initialiaztion import get_initialization
from src.losses import get_loss
from src.losses import LossType
import numpy as np

In [207]:
xavier_initialization = get_initialization("xavier")
he_initialization = get_initialization("he")

relu = get_activation("relu")[0]
sigmoid = get_activation("sigmoid")[0]
softmax = get_activation("softmax")

In [208]:
class NeuralNetwork:
  def __init__(self, 
               layer_dims: list[int], 
               activations: list[ActivationType], 
               loss_type=LossType, 
               optimizer_type : str = "gd",
               seed : int = 42):
    self._validate_inputs(layer_dims, activations, loss_type, optimizer_type)
    
    self.layer_dims = layer_dims
    self.activations = activations
    self.loss_type = loss_type
    self.optimizer_type = optimizer_type
    self.seed = seed
    self.params = {}
    
    self._initialize_parameters()
    
  def forward_pass(self, X: np.ndarray):
    # Validation
    if self.layer_dims[0] != X.shape[0]:
        raise ValueError(
            f"Input dimension mismatch. Expected {self.layer_dims[0]} features, "
            f"but got {X.shape[0]}."
        )

    L = len(self.layer_dims)
    A = X
    caches = [] 
    
    print(f"\n{'='*15} STARTING FORWARD PASS {'='*15}")
    print(f"Input Batch Shape: {X.shape} ({X.shape[1]} examples)")

    for i in range(1, L):
        act_name = self.activations[i - 1]
        act_obj = get_activation(act_name)
        
        if isinstance(act_obj, tuple):
            act_fnc = act_obj[0]
        else:
            act_fnc = act_obj

        W = self.params[f"W{i}"]
        b = self.params[f"b{i}"]
        A_prev = A
        
        Z = np.dot(W, A_prev) + b 
        A = act_fnc(Z)
        
        print(f"\n--- Layer {i} ({act_name.upper()}) ---")
        print(f"{'Input Matrix (A_prev)':<25} : {A_prev.shape}")
        print(f"{'Weight Matrix (W)':<25} : {W.shape}")
        print(f"{'Bias Vector (b)':<25} : {b.shape} (Broadcasts automatically)")
        print(f"{'-'*45}")
        print(f"{'Linear Step (Z = WA+b)':<25} : {Z.shape}")
        print(f"{'Activation (A = f(Z))':<25} : {A.shape}")
        
        caches.append((A_prev, Z))
    
    print(f"\n{'='*15} FORWARD PASS COMPLETE {'='*14}\n")
    return A, caches

  def backward_pass(self, y_true: np.ndarray, y_hat: np.ndarray, caches: list) -> dict:
      if len(y_true) != len(y_hat):
          raise ValueError(f"y_true and y_hat must have the same shapes."
                           f"Got: y_true{y_true.shape}, y_hat{y_hat.shape}")
      
      L = len(self.layer_dims)
      m = y_true.shape[1]
      
      grads = {}
      
      #compute for last dA and dZ
      A, Z = caches[L - 2]
      
      if self.loss_type in ["bce", "cce"]:
          dZ = y_hat - y_true
      else:
          loss_derivative = get_loss(self.loss_type)[1]
          dA = loss_derivative(y_hat, y_true)
          
          act_name = self.activations[-1]
          act_obj = get_activation(act_name)
          
          if(isinstance(act_obj, tuple)):
              act_derivative = act_obj[1]
              dZ = dA * act_derivative(Z)
          else:
              dZ = dA
              
      grads[f"dL_dW{L - 1}"] = (1/m) * np.dot(dZ, A.T)
      grads[f"dL_db{L - 1}"] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
      
      dA = np.dot(self.params[f"W{L - 1}"].T, dZ)
      
      for l in range( L - 2, 0, -1):
          A, Z = caches[l - 1]
          
          act_obj = get_activation(self.activations[l - 1])
      
          if(isinstance(act_obj, tuple)):
            act_derivative = act_obj[1]
            dZ = act_derivative(Z) * dA
          else:
            raise ValueError(f"Activation {act_name} has no derivative defined.") 
          
          grads[f"dL_dW{l}"] = (1/m) * np.dot(dZ, A.T)
          grads[f"dL_db{l}"] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
          
          if(l > 1):
            dA = np.dot(self.params[f"W{l}"].T, dZ)
      
      return grads
      
  def _initialize_parameters(self):
      np.random.seed(self.seed)
      
      L = len(self.layer_dims)
      
      for i in range(1, L):
          D_o =  self.layer_dims[i]
          D_i = self.layer_dims[i - 1]
          act_fnc = self.activations[i - 1]
          
          if act_fnc == "relu":
              self.params[f"W{i}"] = he_initialization((D_o, D_i))
          if act_fnc == "sigmoid" or act_fnc == "softmax" or act_fnc == "linear":
              self.params[f"W{i}"] = xavier_initialization((D_o, D_i))
          
          self.params[f"b{i}"] = np.zeros((D_o, 1))
          
  def _validate_inputs(self, layer_dims, activations, loss_type, optimizer_type):
    """
    Private helper to validate all inputs before initialization.
    """
    if not isinstance(layer_dims, list):
        raise TypeError(f"layer_dims must be a list, got {type(layer_dims)}")
    
    if not all(isinstance(x, int) for x in layer_dims):
        raise TypeError("All elements in layer_dims must be integers!")

    if not isinstance(activations, list):
          raise TypeError(f"activations must be a list, got {type(activations)}")

    if len(layer_dims) < 2:
        raise ValueError("The length of layers must be at least 2 (Input -> Output)") 
    
    if min(layer_dims) < 1:
          raise ValueError("The number of neurons in every layer must be at least 1")
    
    if len(layer_dims) != len(activations) + 1:
          raise ValueError(
              f"Structure Error: You provided {len(layer_dims)} layers but {len(activations)} activations. "
              f"Expected {len(layer_dims) - 1} activations."
          )

    valid_activations = {"relu", "sigmoid", "softmax", "linear"}
    for act in activations:
        if act not in valid_activations:
            raise ValueError(f"Invalid activation '{act}'. Supported: {valid_activations}")

    valid_losses = {"mse", "bce", "cce"}
    if loss_type not in valid_losses:
        raise ValueError(f"Invalid loss_type '{loss_type}'. Supported: {valid_losses}")

In [209]:
nn = NeuralNetwork(
  activations=["relu", "relu", "relu", "linear"],
  layer_dims=[3, 4, 2, 3, 2],
  loss_type="mse",
  optimizer_type="adam",
  seed=4
)

In [210]:
nn.params

{'W1': array([[ 0.04128346,  0.40820855, -0.81315624],
        [ 0.56632081, -0.34154176, -1.29380189],
        [-0.52885036,  0.48873458,  0.27128102],
        [-0.93691075,  0.50514169, -0.07184103]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.]]),
 'W2': array([[ 0.30057157,  0.23493845, -0.81799262,  0.24819247],
        [-0.42913411,  1.09387957,  0.51147976,  0.03262277]]),
 'b2': array([[0.],
        [0.]]),
 'W3': array([[-0.98299165,  0.05443274],
        [ 0.15989294, -1.20894816],
        [ 2.22336022,  0.39429521]]),
 'b3': array([[0.],
        [0.],
        [0.]]),
 'W4': array([[ 0.97708318, -0.6424824 ,  0.94439928],
        [-0.78575385, -0.37598541,  0.31318441]]),
 'b4': array([[0.],
        [0.]])}

In [211]:
X = np.random.normal(size=(3, 20))

y_pred, caches = nn.forward_pass(X)


Input Batch Shape: (3, 20) (20 examples)

--- Layer 1 (RELU) ---
Input Matrix (A_prev)     : (3, 20)
Weight Matrix (W)         : (4, 3)
Bias Vector (b)           : (4, 1) (Broadcasts automatically)
---------------------------------------------
Linear Step (Z = WA+b)    : (4, 20)
Activation (A = f(Z))     : (4, 20)

--- Layer 2 (RELU) ---
Input Matrix (A_prev)     : (4, 20)
Weight Matrix (W)         : (2, 4)
Bias Vector (b)           : (2, 1) (Broadcasts automatically)
---------------------------------------------
Linear Step (Z = WA+b)    : (2, 20)
Activation (A = f(Z))     : (2, 20)

--- Layer 3 (RELU) ---
Input Matrix (A_prev)     : (2, 20)
Weight Matrix (W)         : (3, 2)
Bias Vector (b)           : (3, 1) (Broadcasts automatically)
---------------------------------------------
Linear Step (Z = WA+b)    : (3, 20)
Activation (A = f(Z))     : (3, 20)

--- Layer 4 (LINEAR) ---
Input Matrix (A_prev)     : (3, 20)
Weight Matrix (W)         : (2, 3)
Bias Vector (b)           : (2, 1) (

In [212]:
caches[0][0]

array([[ 0.04800625, -2.35807363, -1.10558404,  0.83783635,  2.08787087,
         0.91484096, -0.27620335,  0.7965119 , -1.14379857,  0.50991978,
        -1.3474603 , -0.0093601 , -0.13070464,  0.80208661, -0.30296397,
         1.20200259, -0.19674528,  0.8365287 ,  0.78660228, -1.84087587],
       [ 0.03754749,  0.03592805, -0.77873992,  0.17941071, -1.45553433,
         0.55618522,  0.50977885,  0.30044554,  2.47658416,  0.3523434 ,
         0.067471  , -0.7322647 ,  0.29714121, -0.9617768 ,  1.27181862,
        -0.64764453,  0.15846954,  1.99008302,  1.16418756,  0.24266016],
       [ 1.3799201 , -0.05455871,  0.79523395,  0.01908996, -0.90543814,
         0.43027133,  0.93465006, -0.34610187, -1.09712188, -0.52819607,
        -2.37977527, -0.60768369, -1.07529009,  2.02240507, -0.5648753 ,
        -1.54292905,  0.87084178, -0.17521053,  0.04860301,  0.1886462 ]])

In [219]:
y_true = y_pred - 0.2

grads = nn.backward_pass(y_true, y_pred, caches)

for i in range(1, 5):
  print(f"dL_dW{i}: ", grads[f"dL_dW{i}"].shape)
  print(f"W{i}: ", nn.params[f"W{i}"].shape)
  print(" ")

dL_dW1:  (4, 3)
W1:  (4, 3)
 
dL_dW2:  (2, 4)
W2:  (2, 4)
 
dL_dW3:  (3, 2)
W3:  (3, 2)
 
dL_dW4:  (2, 3)
W4:  (2, 3)
 
