In [1]:
import numpy as np
from sklearn.datasets import load_diabetes

X_diab, y_diab = load_diabetes(return_X_y=True) # returns diabetes data shapes: (442, 10) and (442,)

In [2]:
from sklearn.datasets import make_regression

X_reg, y_reg = make_regression(n_samples=60, n_features=10, noise=0.5, random_state=42)

In [3]:
class Layer:
    def __init__(
            self, 
            units, 
            *, 
            input_layer: bool = False,
            activation: str = "linear",
            use_bias: bool = True):
        """
        Initialize a neural network layer.

        Args:
            units (int): Count of neurons in the layer.
            input_layer (bool, optional): Whether the layer is an input layer. Defaults to False.
            activation (str, optional): Activation function for the layer. Can be "linear", "relu", or "sigmoid". Defaults to "linear".
            use_bias (bool, optional): Whether to use bias in the layer. Defaults to True.
        """
            
        
        self.units = units
        self.input_layer = input_layer
        self.activation = activation
        self.use_bias = use_bias

        self._input = None
        self._output = None

        self.w = None # Weights matrix
        self._weight_gradient = None # Weight derivative matrix
        self._bias_gradient = None # Bias derivative vector
        self.m = 0
        self.v = 0
        self.b1 = 0.9
        self.b2 = 0.99
        
    def activationFunction(self, z):
        """
        Apply the activation function to the given input.

        Args:
            z (numpy.ndarray): Input to the activation function.

        Returns:
            numpy.ndarray: Output after applying the activation function.
        """

        if self.activation == "linear":
            return z

        if self.activation == "relu":
            return np.maximum(z, np.zeros(z.shape))

        if self.activation == "sigmoid":
            return 1 / (1 + np.exp(-z))

    def _weightInit(self, input_size):
        """
        Initialize the weights matrix based on the input size.

        Args:
            input_size (int): Size of the input.

        Notes:
            Only executed for layers other than the input layer.
        """

        if self.input_layer:
            return # input_layer doesn't need weights

        self.w = np.random.normal(loc = 0, scale = 1 / input_size, size=(input_size, self.units)) # loc -> mean, scale -> variance
        self.bias = np.zeros((1, self.units))


    def _activationDerivative(self):
        """
        Compute the derivative of the activation function.

        Returns:
            numpy.ndarray: Derivative of the activation function.

        Notes:
            Only supports the "linear", "relu", and "sigmoid" activation functions.
        """

        if self.activation == "linear":
            return 1

        if self.activation == "relu":
            return (self._output > 0) * 1

        if self.activation == "sigmoid":
            return self._output * (1 - self._output)

    def _setGrad(self, grad):
        """
        Calculate the gradients of weights and bias for backpropagation.

        Args:
            grad (numpy.ndarray): Gradient from the previous layer.

        Returns:
            numpy.ndarray: Gradient to be passed to the previous layer.

        Notes:
            Only executed for layers other than the input layer.
        """

        if self.input_layer:
            return
        
        grad = grad * self._activationDerivative()
        self._weight_gradient = self._input.T @ grad

        if self.use_bias:
            self._bias_gradient = grad.sum(axis=0, keepdims=True)

        return grad @ self.w.T
    
    def _updateGrad(self, learning_rate, iteration, eps = 0.00001):
        """
        Update the weights and bias based on the computed gradients.

        Args:
            learning_rate (float): Learning rate for gradient descent.

        Notes:
            Only executed for layers other than the input layer.
        """
        if self.optimizer == 'adam':
            iteration += 1
            self.m = self.b1 * self.m + (1- self.b1) * self._weight_gradient
            self.v = self.b2 * self.v + (1- self.b2) * np.square(self._weight_gradient)
            self.m_ = self.m / (1 - np.pow(b1, iteration))
            self.v_ = self.v / (1 - np.pow(b2, iteration))
            self.w -= learning_rate * self.m_ / (np.sqrt(self.v_) + eps)
          
        else:
            self.w -= learning_rate * self._weight_gradient
            if self.use_bias:
                self.bias -= learning_rate * self._bias_gradient 

    def call(self, X):
        """
        Perform a forward pass through the layer.

        Args:
            X (numpy.ndarray): Input to the layer.

        Returns:
            numpy.ndarray: Output of the layer after applying the activation function.
        """
        if self.input_layer:
            return X
        
        self._input = X
        self._output = self.activationFunction(X @ self.w + self.bias)

        return self._output

In [28]:
class NeauralNetwork:
    def __init__(
            self, 
            layers: list, 
            loss_function: str = "mse", 
            learning_rate = 0.01, 
            max_iter=1000,
            verbose: bool = False):
        """
        Initialize a neural network.

        Args:
            layers (list): List of Layer objects defining the network architecture. 
            loss_function (str, optional): Loss function to use. Defaults to "mse".
            learning_rate (float, optional): Learning rate for gradient descent. Defaults to 0.01.
            max_iter (int, optional): Maximum number of iterations for training. Defaults to 1000.
            verbose (bool, optional): Whether to display training progress. Defaults to False.
        """

        self.layers = layers
        self.loss_function = loss_function
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.verbose = verbose

        # Weights initializing:
        for i in range(1, len(self.layers)):
            self.layers[i]._weightInit(self.layers[i - 1].units)

    def lossFunction(self, y_true, y_pred):
        """
        Compute the loss between the true values and predicted values.

        Args:
            y_true (numpy.ndarray): True values.
            y_pred (numpy.ndarray): Predicted values.

        Returns:
            float: Loss value.
        """

        if self.loss_function == "mse":
            return 0.5 * np.mean(np.linalg.norm(y_pred - y_true, axis=1)**2)

        # Can be add

    def _lossFunctionDerivative(self, y_pred, y_true):
        """
        Compute the derivative of the loss function.

        Args:
            y_pred (numpy.ndarray): Predicted values.
            y_true (numpy.ndarray): True values.

        Returns:
            numpy.ndarray: Derivative of the loss function.
        """

        if self.loss_function == "mse":
            return 1 / len(y_pred) * (y_pred - y_true)

        # Can be add

    def fit(self, X, y):
        """
        Train the neural network on the given input-output pairs.

        Args:
            X (numpy.ndarray): Input data.
            y (numpy.ndarray): Output data.

        Notes:
            Reshapes X and y to match the expected input shapes of the network.
        """

        for _ in range(self.max_iter):
            pred = self.forward(X)

            if self.verbose:
                process_percent = int(_ / self.max_iter * 20)
                print(f"\r {_}/{self.max_iter}: [{process_percent * '=' + '>' + (20 - process_percent) * '-'}] - loss: {self.lossFunction(y, pred)}", end="")
            self.backward(pred, y)

        if self.verbose:
            print(f"\r {self.max_iter}/{self.max_iter}: [{21 * '='}] - loss: {self.lossFunction(y, pred)}")

    def predict(self, X):
        """
        Perform predictions using the trained neural network.

        Args:
            X (numpy.ndarray): Input data.

        Returns:
            numpy.ndarray: Predicted output data.
        """

        return self.forward(X)
        
    def forward(self, X):
        """
        Perform a forward pass through the network.

        Args:
            X (numpy.ndarray): Input data.

        Returns:
            numpy.ndarray
        """

        X_ = np.copy(X)
        
        for layer in self.layers:
            X_ = layer.call(X_)
        return X_

    def backward(self, y_pred, y_true):
        """
        Perform backpropagation to update the weights of the network.

        Args:
            y_pred (numpy.ndarray): Predicted values.
            y_true (numpy.ndarray): True values.
        """
        
        gradient = self._lossFunctionDerivative(y_pred, y_true)

        for i in range(len(self.layers) - 1, 0, -1):
            gradient = self.layers[i]._setGrad(gradient)
            self.layers[i]._updateGrad(self.learning_rate)

In [29]:
nn = NeauralNetwork(layers=[
        Layer(units=10, input_layer=True),
        Layer(units=40, activation="sigmoid"),
        Layer(units=40, activation="relu"),
        Layer(units=1),
    ],
    loss_function = "mse",
    learning_rate=0.001, 
    max_iter=100000,
    verbose=True,
)
y_reg = y_reg.reshape(-1, 1)
nn.fit(X_reg, y_reg)



In [30]:
nn.predict(X_reg)

array([[ -81.64367582],
       [ 437.86437198],
       [ 104.15502486],
       [ 168.70187103],
       [ 293.72877761],
       [  34.34481735],
       [  82.08138321],
       [ 154.43813612],
       [  91.2402544 ],
       [ 167.28073431],
       [ -23.0387585 ],
       [ -76.52328597],
       [ -99.68779939],
       [-184.07195072],
       [-187.26427736],
       [  80.30344265],
       [  41.03782639],
       [  53.67150682],
       [  89.53823638],
       [ 210.3238773 ],
       [ -40.06460916],
       [ 197.24561364],
       [ -56.88269642],
       [-268.83324876],
       [  16.78730897],
       [  -1.80806096],
       [ 196.39720247],
       [-153.89219543],
       [  28.97055044],
       [-414.81335343],
       [ 207.68697853],
       [ 165.21614808],
       [ -27.8684952 ],
       [-278.11827871],
       [  33.62812399],
       [ 107.13976681],
       [ -68.69481842],
       [-208.85049207],
       [ 178.23657958],
       [ -97.17164961],
       [ 109.5142967 ],
       [-118.130

In [31]:
y_reg

array([[ -81.64367582],
       [ 437.86437198],
       [ 104.15502486],
       [ 168.70187103],
       [ 293.72877761],
       [  34.34481735],
       [  82.08138321],
       [ 154.43813612],
       [  91.2402544 ],
       [ 167.28073431],
       [ -23.0387585 ],
       [ -76.52328597],
       [ -99.68779939],
       [-184.07195072],
       [-187.26427736],
       [  80.30344265],
       [  41.03782639],
       [  53.67150682],
       [  89.53823638],
       [ 210.3238773 ],
       [ -40.06460916],
       [ 197.24561364],
       [ -56.88269642],
       [-268.83324876],
       [  16.78730897],
       [  -1.80806096],
       [ 196.39720247],
       [-153.89219543],
       [  28.97055044],
       [-414.81335343],
       [ 207.68697853],
       [ 165.21614808],
       [ -27.8684952 ],
       [-278.11827871],
       [  33.62812399],
       [ 107.13976681],
       [ -68.69481842],
       [-208.85049207],
       [ 178.23657958],
       [ -97.17164961],
       [ 109.5142967 ],
       [-118.130

In [None]:
# import tensorflow as tf

# alg = tf.keras.Sequential([
#     tf.keras.layers.Input(10),
#     tf.keras.layers.Dense(20, activation='relu'),
#     tf.keras.layers.Dense(1, activation='relu'),
# ])

# alg.compile(optimizer='adam', loss='mse', )

# alg.fit(X_diab, y_diab, epochs=1000, batch_size=64)

In [None]:
# alg.predict(X_diab)