In [1]:
import numpy as np
np.random.seed(0)


def square_loss(yhat, y):
    return np.square(y-yhat)/2

def grad_square_loss(yhat, y):
    return yhat-y

def sigmoid(z):
        return 1 / (1 + np.exp(-z))

def grad_sigmoid(z):
    return np.multiply(z, 1-z) 

class InputLayer:
    def __init__(self, data):
        self._a = data
    
    @property
    def a(self):
        return self._a

class HiddenLayer:
    def __init__(self, num_nodes, prev_nodes, activation="SIGMOID", lr=0.1):
        self.w = np.random.uniform(size=(prev_nodes, num_nodes))
        self.b = np.random.uniform(size=(1, num_nodes))
        self.activation = activation
        self.lr = lr

    def forward(self, i):
        self.i = i
        z = np.dot(self.i, self.w) + self.b
        if self.activation == "SIGMOID":
            a = sigmoid(z)
            self.dz = grad_sigmoid(a)
        return a

    def backprop(self, error, w1):
        self.error = np.dot(error, w1.T)*self.dz
        self.dw = np.dot(self.i.T, self.error)
        self.db = np.sum(self.error,axis=0,keepdims=True)
        w = self.w.copy()

        self.w = self.w - self.lr*self.dw
        self.b = self.b - self.lr*self.db
        return w, self.error

class OutputLayer:
    def __init__(self, num_nodes, prev_nodes, activation="SIGMOID", lr=0.1):
        self.w = np.random.uniform(size=(prev_nodes, num_nodes))
        self.b = np.random.uniform(size=(1, num_nodes))
        self.activation = activation
        self.lr = lr

    def forward(self, i):
        self.i = i
        z = np.dot(self.i, self.w) + self.b
        if self.activation == "SIGMOID":
            a = sigmoid(z)
            self.dz = grad_sigmoid(a)
        return a

    def backprop(self, error, w1):
        self.error = np.multiply(error, self.dz)
        self.dw = np.dot(self.i.T, self.error)
        self.db = np.sum(self.error,axis=0,keepdims=True)
        w = self.w.copy()

        self.w = self.w - self.lr*self.dw
        self.b = self.b - self.lr*self.db
        return w, self.error


class Network:
    def __init__(self, loss="SQUARE", lr=0.1):
        self.loss_type = loss
        self.layers = []
        self.lr = lr

    def build(self, X, hidden_layers, output_nodes, y):
        self.y = y
        # Add input layer
        self.layers.append(InputLayer(X))

        prev_nodes = X.shape[1]
        #Hidden layers
        for nodes in hidden_layers:
            self.layers.append(HiddenLayer(nodes, prev_nodes, lr=self.lr))
            prev_nodes = nodes

        # output layer
        self.layers.append(OutputLayer(output_nodes, prev_nodes, lr=self.lr))

    def forward(self):
        a = self.layers[0].a
        for l in range(1, len(self.layers)):
            a = self.layers[l].forward(a)
        return a

    def backprop(self, error):
        w = None
        for layer in range(len(self.layers)-1, 0, -1):
            w, error = self.layers[layer].backprop(error, w)

    def train(self, epochs):
        # forward pass
        for _ in range(epochs):
            output = self.forward()
            error = grad_square_loss(output, self.y)
            self.backprop(error)
        print(f"output: {output}")   

In [2]:
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0], [1], [1], [0]])

network = Network()
network.build(X, [2], 1, y)
network.train(20000)

output: [[0.03357583]
 [0.97037109]
 [0.9703608 ]
 [0.03115384]]
