In [120]:
import numpy as np

In [121]:
x = np.random.rand(2,1)
y = np.ones((1,1))

In [122]:
x, y

(array([[0.34743453],
        [0.5971142 ]]), array([[1.]]))

In [123]:
# Activation Entity
class ReLU: 
    
    def __init__(self):
        pass
    
    def forward(self, Z):
        return np.where(Z>0, Z, np.zeros_like(Z))
        
    def __call__(self, Z):
        return self.forward(Z)
    
    def grad(self, Z):
        return {"i": self.grad_i(Z)}
        
    def grad_i(self, Z):
        return np.diag(np.where(Z>0, np.ones_like(Z), np.zeros_like(Z)).reshape(-1))
        
    def update(self, *args, **kwargs):
        pass
    
class Sigmoid:
    def  __init__(self):
        pass
    
    def forward(self, Z):
        return 1/(1+np.exp(-Z))
    
    def __call__(self, Z):
        return self.forward(Z)
    
    def grad(self, Z):
        return {"i": self.grad_i(Z)}
    
    def grad_i(self, Z):
        y = self(Z)
        return np.diag((y*(1-y)).reshape(-1))
    
    def update(self, *args, **kwargs):
        pass

In [182]:
# Layer entity
class Dense: 
    
    def __init__(self, no_of_neurons, input_size):
        self.no_of_neurons = no_of_neurons
        
        self.W = np.random.randn(input_size, no_of_neurons)
        self.b = np.random.randn(no_of_neurons, 1)
    
    def __call__(self, X):
        return self.forward(X)
    
    def grad(self, X):
        return {"i": self.grad_i(X), "b": self.grad_b(X), "w": self.grad_w(X)}
    
    def grad_i(self, X):
        return self.W.T
    
    def grad_b(self, X):
        return np.identity(self.no_of_neurons)
    
    def grad_w(self, X):
        m, n = self.W.shape
        return (np.repeat(np.eye(n), repeats=m, axis=0)*np.repeat(np.expand_dims(X, 0), repeats=n, axis=0).reshape(m*n, 1)).reshape(n,m,n)
        
    def forward(self, X):
        return np.matmul(self.W.T, X) + self.b        
    
    def update(self, grad, func):
        
        self.W = func(self.W, grad["w"])
        self.b = func(self.b, grad["b"])

In [285]:
# Model entity 
class Sequential:
    
    def __init__(self, layers=[]):
        self.layers = layers
        
    def __call__(self, X):
        return self.forward(X)
    
    def forward(self, X, with_grad=False):
        grads = []
        for layer in self.layers:
            if with_grad:
                grads.append(layer.grad(X))
            X = layer.forward(X)
        return (X, grads) if with_grad else X
    
    def backward(self, grads, loss_grad):
        loss_grads = []
        grads.reverse()
        
        for grad in grads:
            g = {}
            if grad.get("w", None) is not None: 
                g["w"] = np.einsum("ij,jkl->ikl", loss_grad, grad["w"])[0]
            if grad.get("b", None) is not None:
                g["b"] = np.einsum("ij,jk->ik", loss_grad, grad["b"]).T
                
            g["i"] = np.matmul(loss_grad, grad["i"])
            loss_grads.append(g)
            loss_grad = g["i"]
        
        loss_grads.reverse()
        
        return loss_grads        
    
    def step(self, X, y_true, loss, optim):
        y_pred, grads = self.forward(X, with_grad=True)
        l = loss(y_pred=y_pred, y_true=y_true)
        loss_grads = self.backward(grads, loss.grad_i(y_pred=y_pred, y_true=y_true))
        for (layer, grad) in zip(self.layers, loss_grads):
            optim.step(layer, grad)
        
    def fit(self, X, y_true, loss, optim, n_epochs):
        for i in range(n_epochs):
            self.step(X, y_true, loss, optim)
        
    def add(self, layer):
        self.layers.append(layer)
        return self

In [286]:
class Optimizer:
    
    def __init__(self):
        self.optim = self.optimizer()
   
    def optimizer(self):
        raise NotImplementedError("optimizer method not defined")
        
    def step(self, layer, grad):
        layer.update(grad, self.optim)
    

class SGD(Optimizer):
    
    def __init__(self, lr):
        self.lr = lr
        super().__init__()
    
    def optimizer(self):
        def func(w, dw):
            assert w.shape == dw.shape, f"shape mismatch {w.shape} and {dw.shape}"
            return w - self.lr*dw
        return func

In [287]:
# Loss entity
class BinaryCrossEntropy:
    
    def __init__(self):
        pass
    
    def __call__(self, y_pred, y_true):
        return self.forward(y_pred, y_true)
    
    def forward(self, y_pred, y_true):
        return np.where(y_true == 0, -np.log(1-y_pred), -np.log(y_pred))
    
    def grad_i(self, y_pred, y_true):
        return np.where(y_true == 0, -1/(1-y_pred), -1/y_pred)
        

In [316]:
layer1 = Dense(2, 2)
act1 = ReLU()
layer2 = Dense(2, 2)
act2 = ReLU()
layer3 = Dense(1, 2)
act3 = Sigmoid()

In [317]:
model = Sequential(layers=[layer1, act1, layer2, act2, layer3, act3])

In [323]:
loss = BinaryCrossEntropy()
optim = SGD(0.01)

In [324]:
y_pred, grads = model.forward(x, with_grad=True)

In [327]:
for i in range(500):
    model.step(x, y, loss, optim=optim)
    y_pred = model(x)
    print("\r"+f"predicted: {y_pred[0][0]}, true: {y[0][0]}, loss: {loss(y_pred=y_pred, y_true=y)[0][0]}", end="")

predicted: 0.8481892913872623, true: 1.0, loss: 0.16465144712478388predicted: 0.8483846659477873, true: 1.0, loss: 0.16442113053817833predicted: 0.8485795829151099, true: 1.0, loss: 0.16419140623100986predicted: 0.8487740437752757, true: 1.0, loss: 0.1639622720565412predicted: 0.848968050008384, true: 1.0, loss: 0.16373372587786872predicted: 0.8491616030886169, true: 1.0, loss: 0.16350576556786764predicted: 0.8493547044842653, true: 1.0, loss: 0.16327838900913877predicted: 0.8495473556577557, true: 1.0, loss: 0.1630515940939568predicted: 0.8497395580656771, true: 1.0, loss: 0.16282537872421693predicted: 0.8499313131588079, true: 1.0, loss: 0.16259974081138248predicted: 0.8501226223821413, true: 1.0, loss: 0.16237467827643404predicted: 0.8503134871749132, true: 1.0, loss: 0.1621501890498166predicted: 0.8505039089706267, true: 1.0, loss: 0.16192627107138965predicted: 0.8506938891970794, true: 1.0, loss: 0.1617029222903753predicted: 0.8508834292763894, true: 1.0, loss: 0.16

predicted: 0.8996611179250436, true: 1.0, loss: 0.10573712220401045predicted: 0.8997516585122943, true: 1.0, loss: 0.10563648872122897predicted: 0.8998420448949777, true: 1.0, loss: 0.10553603673299607predicted: 0.8999322774499648, true: 1.0, loss: 0.10543576576686157predicted: 0.9000223565529463, true: 1.0, loss: 0.10533567535196452predicted: 0.9001122825784361, true: 1.0, loss: 0.10523576501902625predicted: 0.9002020558997771, true: 1.0, loss: 0.1051360343003438predicted: 0.900291676889144, true: 1.0, loss: 0.10503648272978418predicted: 0.9003811459175491, true: 1.0, loss: 0.1049371098427771predicted: 0.9004704633548456, true: 1.0, loss: 0.10483791517630951predicted: 0.900559629569733, true: 1.0, loss: 0.10473889826891795predicted: 0.9006486449297605, true: 1.0, loss: 0.10464005866068365predicted: 0.9007375098013316, true: 1.0, loss: 0.1045413958932256predicted: 0.9008262245497086, true: 1.0, loss: 0.10444290950969426predicted: 0.9009147895390169, true: 1.0, loss: 0.10