In [78]:
import numpy as np 
from typing import Literal

In [79]:
class NN:

    class Math:
        
        @classmethod
        def _sigmoid(cls, Z: np.ndarray):
            return 1/(1+np.exp(-Z)) 

        @classmethod
        def _sigmoid_derivative(cls, Z: np.ndarray):
            s = cls._sigmoid(Z)
            return s * (1-s) 

        @classmethod
        def _reLU_derivative(cls, Z: np.ndarray):
            return (Z > 0).astype(float)
        
        @classmethod
        def _reLU(cls, Z: np.ndarray):
            return Z * (Z > 0)

        activation_derivative_fun_by_str = {
            "sigmoid": lambda Z: NN.Math._sigmoid_derivative(Z),
            "reLU": lambda Z: NN.Math._reLU_derivative(Z)
        }

        activation_fun_by_str = {
            "sigmoid": lambda Z: NN.Math._sigmoid(Z),
            "reLU": lambda Z: NN.Math._reLU(Z)
        }

    class Layer:
    
        def __init__(self, neurons_amount: int, activation: Literal["sigmoid", "reLU"], is_first: bool=False):

            self.activation_fun = NN.Math.activation_fun_by_str[activation]
            self.activation_derivative_fun = NN.Math.activation_derivative_fun_by_str[activation]
            self.neurons_amount = neurons_amount
            self.activation = activation
            self.is_first = is_first

        def init_params(self):
            self.W = np.random.randn(self.input_amount, self.neurons_amount, )
            self.B = np.zeros((1, self.neurons_amount))
        
        def activate(self, X: np.ndarray):

            # shape: 1 x neurons_amount (row vector)

            self.X = X

            self.Z = self.X @ self.W  + self.B

            self.activations = self.activation_fun(self.Z)     

            return self.activations

        def backward(self, DJ_DA: np.array, lr: float):

            """
                @param: DJ_DA How much the activation of this layer affects the error. DJ_DA might be DJ_DX of the layer ahead (if any), since the activations are used as inputs for the layer ahead.
                @param: lr Learning rate, the percentage of the gradient that we'll use to determine the step size for updating the parameters. Ex.: 0.1 as lr means we'll update in 10% of the gradient, but in reversed direction.
            """

            DJ_DZ = DJ_DA * self.activation_derivative_fun(self.Z)

            DJ_DW = (self.X.transpose(0, 2, 1) @ DJ_DZ )            

            DJ_DX = DJ_DZ @ self.W.transpose(1, 0) if self.is_first == False else 0

            self.W -=  lr *  np.mean(DJ_DW, axis=0)
            self.B -=  lr *  np.mean(DJ_DZ, axis=0)

            return DJ_DX
        

    def __init__(self, layers: list[Layer], input_amount: int):

        self.input_amount = input_amount

        for i, layer in enumerate(layers):

            layer_input_amount = layers[i-1].neurons_amount if i!=0 else self.input_amount

            layer.input_amount = layer_input_amount
            layer.init_params()
            layer.is_first = i == 0

        self.layers = layers

    def add_layer(self, layer: Layer):

        if len(self.layers) != 0:
            layer_input_amount = self.layers[-1].neurons_amount
            is_first = False
        else:
            layer_input_amount = self.input_amount
            is_first = True

            layer.input_amount = layer_input_amount
            layer.init_params()
        layer.is_first = is_first

        self.layers.append(layer)

    def forward_prop(self, input: np.ndarray):

        activations: np.ndarray = input

        for layer in self.layers:
            
            activations = layer.activate(activations)

        return activations
    
    def train(self, X, Y, epochs: int = 100, optimizer = '', batch_percentage = 100, glr = 1, ):

        y_hat = self.forward_prop(X)

        L = len(self.layers)

        for epoch in range(epochs):

            DJ_DA = y_hat - Y

            for i in reversed(range(L)):

                layer = self.layers[i]
                DJ_DA = layer.backward(DJ_DA, glr)

            y_hat = self.forward_prop(X)

            if(epoch % 100 == 0):
                print(f"Cost for epoch {epoch+1}: {self.cost(y_hat, Y)}")

    def cost(self, y_hat, Y):

        return (1/2) * np.mean((y_hat - Y)**2)
    
    def bin_predict(self, X):

        return np.round(self.forward_prop(X)[0, 0])




In [80]:
nn = NN(layers=[

    NN.Layer(neurons_amount=3, activation='sigmoid',),
    NN.Layer(neurons_amount=1, activation='sigmoid',),

], input_amount=2)

In [81]:
X_train_1 = np.array([[0, 0]])
X_train_2 = np.array([[0, 1]])
X_train_3 = np.array([[1, 0]])
X_train_4 = np.array([[1, 1]])

X_train = np.array([X_train_1, X_train_2, X_train_3, X_train_4 ])

Y = np.array([
    [[0]],
    [[1]],
    [[1]],
    [[0]]
    ])

nn.train(X_train, Y, epochs=10000, glr=1)



Cost for epoch 1: 0.1659004836138328
Cost for epoch 101: 0.12416569181717774
Cost for epoch 201: 0.12292477734638053
Cost for epoch 301: 0.12092037083412813
Cost for epoch 401: 0.11737565785637757
Cost for epoch 501: 0.11152585005756052
Cost for epoch 601: 0.10306294371626813
Cost for epoch 701: 0.09119904346181146
Cost for epoch 801: 0.07319818461920005


Cost for epoch 901: 0.050465515106219494
Cost for epoch 1001: 0.03211677691825069
Cost for epoch 1101: 0.021102827637749486
Cost for epoch 1201: 0.014832872834363034
Cost for epoch 1301: 0.01108352702267753
Cost for epoch 1401: 0.008687531054968542
Cost for epoch 1501: 0.007062371050368834
Cost for epoch 1601: 0.005904577222270539
Cost for epoch 1701: 0.005046258155491403
Cost for epoch 1801: 0.004389013777959933
Cost for epoch 1901: 0.00387216510180388
Cost for epoch 2001: 0.003456621927618884
Cost for epoch 2101: 0.0031162412929185753
Cost for epoch 2201: 0.002832967948142407
Cost for epoch 2301: 0.0025939811730818917
Cost for epoch 2401: 0.002389954658887714
Cost for epoch 2501: 0.0022139590651598516
Cost for epoch 2601: 0.002060749084052529
Cost for epoch 2701: 0.0019262879526781077
Cost for epoch 2801: 0.0018074228050990902
Cost for epoch 2901: 0.0017016583055282706
Cost for epoch 3001: 0.0016069957954575188
Cost for epoch 3101: 0.0015218170218619215
Cost for epoch 3201: 0.0014447

In [83]:
print("XOR 00: ", nn.bin_predict(X_train_1))
print("XOR 01: ", nn.bin_predict(X_train_2))
print("XOR 10: ", nn.bin_predict(X_train_3))
print("XOR 11: ", nn.bin_predict(X_train_4))



XOR 00:  0.0
XOR 01:  1.0
XOR 10:  1.0
XOR 11:  0.0
