In [55]:
import sys
import os

sys.path.append(os.path.abspath(".."))

from src.activations import ActivationType, get_activation
from src.initialiaztion import get_initialization
from src.optimizers import OptimizerType, get_optimizer
from src.losses import get_loss
from src.losses import LossType
import numpy as np
import math

In [56]:
xavier_initialization = get_initialization("xavier")
he_initialization = get_initialization("he")

relu = get_activation("relu")[0]
sigmoid = get_activation("sigmoid")[0]
softmax = get_activation("softmax")

In [57]:
class NeuralNetwork:
  def __init__(self, 
               layer_dims: list[int], 
               activations: list[ActivationType], 
               loss_type=LossType, 
               optimizer_type : OptimizerType = "gd",
               optimizer_params: dict = None,
               seed : int = 42):
    self._validate_inputs(layer_dims, activations, loss_type, optimizer_type)
    
    self.layer_dims = layer_dims
    self.activations = activations
    self.loss_type = loss_type
    self.optimizer_type = optimizer_type
    self.optimizer_params = optimizer_params if optimizer_params else {}
    self.seed = seed
    self.params = {}
    
    self._initialize_parameters()
    self.optimizer = get_optimizer(self.optimizer_type, **self.optimizer_params)
    
    
  def forward_pass(self, X: np.ndarray):
    # Validation
    if self.layer_dims[0] != X.shape[0]:
        raise ValueError(
            f"Input dimension mismatch. Expected {self.layer_dims[0]} features, "
            f"but got {X.shape[0]}."
        )

    L = len(self.layer_dims)
    A = X
    caches = [] 
    
    for i in range(1, L):
        act_name = self.activations[i - 1]
        act_obj = get_activation(act_name)
        
        if isinstance(act_obj, tuple):
            act_fnc = act_obj[0]
        else:
            act_fnc = act_obj

        W = self.params[f"W{i}"]
        b = self.params[f"b{i}"]
        A_prev = A
        
        Z = np.dot(W, A_prev) + b 
        A = act_fnc(Z)
        
        caches.append((A_prev, Z))

    return A, caches

  def backward_pass(self, y_true: np.ndarray, y_hat: np.ndarray, caches: list) -> dict:
      if len(y_true) != len(y_hat):
          raise ValueError(f"y_true and y_hat must have the same shapes."
                           f"Got: y_true{y_true.shape}, y_hat{y_hat.shape}")
      
      L = len(self.layer_dims)
      m = y_true.shape[1]
      
      grads = {}
      loss = get_loss(self.loss_type)[0](y_hat, y_true)
      
      #compute for last dA and dZ
      A, Z = caches[L - 2]
      
      if self.loss_type in ["bce", "cce"]:  
          dZ = y_hat - y_true
      else:
          loss_derivative = get_loss(self.loss_type)[1]
          dA = loss_derivative(y_hat, y_true)
          
          act_name = self.activations[-1]
          act_obj = get_activation(act_name)
          
          if(isinstance(act_obj, tuple)):
              act_derivative = act_obj[1]
              dZ = dA * act_derivative(Z)
          else:
              dZ = dA
              
      grads[f"dL_dW{L - 1}"] = (1/m) * np.dot(dZ, A.T)
      grads[f"dL_db{L - 1}"] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
      
      dA = np.dot(self.params[f"W{L - 1}"].T, dZ)
      
      for l in range( L - 2, 0, -1):
          A, Z = caches[l - 1]
          
          act_obj = get_activation(self.activations[l - 1])
      
          if(isinstance(act_obj, tuple)):
            act_derivative = act_obj[1]
            dZ = act_derivative(Z) * dA
          else:
            raise ValueError(f"Activation {act_name} has no derivative defined.") 
          
          grads[f"dL_dW{l}"] = (1/m) * np.dot(dZ, A.T)
          grads[f"dL_db{l}"] = (1/m) * np.sum(dZ, axis=1, keepdims=True)
          
          if(l > 1):
            dA = np.dot(self.params[f"W{l}"].T, dZ)
      
      return grads, loss
  
  def train(self,
          X_train: np.ndarray,
          y_train: np.ndarray,
          epochs: int = 1000,
          learning_rate: float = 0.01,
          batch_size: int = None,):
  
    if(X_train.shape[1] != y_train.shape[1]):
        raise ValueError(f"X_train and y_train must have the same size of observations."
                        f"Got:"
                        f"X_train: {X_train.shape[1]}"
                        f"y_train: {y_train.shape[1]}")
        
    n = X_train.shape[1]
        
    if(batch_size != None and batch_size > n):
        raise ValueError(f"batch_size must not be bigger than the number of observations in dataset"
                        f"Got:"
                        f"X_train: {X_train.shape[1]}"
                        f"batch_size: {batch_size}")
        
    losses = []
    idx = np.arange(0, n)
    
    for i in range(0, epochs):
        
        permutation = np.random.permutation(idx)
        X_shuffled = X_train[:, permutation]
        y_shuffled = y_train[:, permutation]
        
        for j in range(0, n, batch_size):
        
          y_hat, caches = self.forward_pass(X_shuffled[:, j: j + batch_size])
          grads, batch_loss = self.backward_pass(y_shuffled[:, j: j + batch_size], y_hat, caches)
          self.optimizer.update(
              self.params,
              grads,
              learning_rate
          )
          
          losses.append(batch_loss)
    
    return losses
              
  def _initialize_parameters(self):
      np.random.seed(self.seed)
      
      L = len(self.layer_dims)
      
      for i in range(1, L):
          D_o =  self.layer_dims[i]
          D_i = self.layer_dims[i - 1]
          act_fnc = self.activations[i - 1]
          
          if act_fnc == "relu":
              self.params[f"W{i}"] = he_initialization((D_o, D_i))
          if act_fnc == "sigmoid" or act_fnc == "softmax" or act_fnc == "linear":
              self.params[f"W{i}"] = xavier_initialization((D_o, D_i))
          
          self.params[f"b{i}"] = np.zeros((D_o, 1))
          
  def _validate_inputs(self, layer_dims, activations, loss_type, optimizer_type):
    """
    Private helper to validate all inputs before initialization.
    """
    if not isinstance(layer_dims, list):
        raise TypeError(f"layer_dims must be a list, got {type(layer_dims)}")
    
    if not all(isinstance(x, int) for x in layer_dims):
        raise TypeError("All elements in layer_dims must be integers!")

    if not isinstance(activations, list):
          raise TypeError(f"activations must be a list, got {type(activations)}")

    if len(layer_dims) < 2:
        raise ValueError("The length of layers must be at least 2 (Input -> Output)") 
    
    if min(layer_dims) < 1:
          raise ValueError("The number of neurons in every layer must be at least 1")
    
    if len(layer_dims) != len(activations) + 1:
          raise ValueError(
              f"Structure Error: You provided {len(layer_dims)} layers but {len(activations)} activations. "
              f"Expected {len(layer_dims) - 1} activations."
          )

    valid_activations = {"relu", "sigmoid", "softmax", "linear"}
    for act in activations:
        if act not in valid_activations:
            raise ValueError(f"Invalid activation '{act}'. Supported: {valid_activations}")

    valid_losses = {"mse", "bce", "cce"}
    if loss_type not in valid_losses:
        raise ValueError(f"Invalid loss_type '{loss_type}'. Supported: {valid_losses}")

In [58]:
from sklearn.datasets import make_moons

def get_classification_data(n_samples=1000, noise=0.1, seed=42):
    """
    Generates a binary classification dataset (Moons).
    Returns shapes: X=(2, m), y=(1, m)
    """
    np.random.seed(seed)
    
    # Generate data using sklearn
    # X_raw shape: (m, 2), y_raw shape: (m,)
    X_raw, y_raw = make_moons(n_samples=n_samples, noise=noise, random_state=seed)
    
    # --- TRANSFORMATION FOR YOUR NN CLASS ---
    # 1. Transpose X to get (features, samples)
    X = X_raw.T 
    
    # 2. Reshape y to (1, samples)
    y = y_raw.reshape(1, -1)
    
    return X, y

def get_regression_data(n_samples=1000, seed=42):
    """
    Generates a non-linear regression dataset (Noisy Sine Wave).
    Returns shapes: X=(1, m), y=(1, m)
    """
    np.random.seed(seed)
    
    # Generate X values between -5 and 5
    # Shape: (1, m)
    X = np.random.uniform(-5, 5, (1, n_samples))
    
    # Generate y = sin(x) + Gaussian Noise
    # Shape: (1, m)
    noise = np.random.normal(0, 0.1, (1, n_samples))
    y = np.sin(X) + noise
    
    # Normalize X to range [0, 1] or [-1, 1] usually helps NN convergence,
    # but strictly for generation we leave it raw here.
    # Note: Neural Nets struggle with unscaled data. 
    # It is recommended to scale X before training.
    
    return X, y

In [59]:
X, y = get_classification_data()

In [60]:
classification_nn = NeuralNetwork(
  layer_dims=[2, 3, 3, 1],
  activations=["relu", "relu", "sigmoid"],
  loss_type="bce",
  optimizer_type="adam"
)

In [61]:
classification_nn.params


{'W1': array([[ 0.49671415, -0.1382643 ],
        [ 0.64768854,  1.52302986],
        [-0.23415337, -0.23413696]]),
 'b1': array([[0.],
        [0.],
        [0.]]),
 'W2': array([[ 1.28942186,  0.62660783, -0.38332423],
        [ 0.44299842, -0.37837896, -0.38026675],
        [ 0.19756137, -1.56218678, -1.40838951]]),
 'b2': array([[0.],
        [0.],
        [0.]]),
 'W3': array([[-0.32463686, -0.58475832,  0.18143078]]),
 'b3': array([[0.]])}

In [62]:
from sklearn.model_selection import train_test_split

X_sklearn = X.T
y_sklearn = y.T

X_train, X_test, y_train, y_test = train_test_split(X_sklearn, y_sklearn, test_size=0.33, random_state=42)

X_train = X_train.T
X_test = X_test.T
y_train = y_train.T
y_test = y_test.T

In [63]:
losses = classification_nn.train(X_train,
                                 y_train,
                                 epochs=100,
                                 learning_rate=0.01,
                                 batch_size=34)

In [65]:
min(losses)

np.float64(0.03226919520137765)

In [66]:
y_pred, caches = classification_nn.forward_pass(X_test)

In [67]:
bce = get_loss("bce")[0]

bce(y_pred, y_test)

np.float64(0.20688741071401784)

In [68]:
y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0

In [73]:
y_test.T

array([[1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
    

In [72]:
y_pred.T

array([[1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],

In [75]:
from sklearn.metrics import accuracy_score, f1_score

print("accuracy: ", accuracy_score(y_test.T, y_pred.T))
print("f1-score: ", f1_score(y_test.T, y_pred.T))

accuracy:  0.8939393939393939
f1-score:  0.890282131661442
