In [1]:
import numpy as np

In [2]:
x = np.random.rand(2,1)
y = np.ones((1,1))

In [3]:
x, y

(array([[0.25332887],
        [0.29207688]]),
 array([[1.]]))

In [61]:
# Activation Entity
class ReLU: 
    
    def __init__(self):
        pass
    
    def forward(self, Z):
        return np.where(Z>0, Z, np.zeros_like(Z))
        
    def __call__(self, Z):
        return self.forward(Z)
    
    def grad(self, Z):
        return {"i": self.grad_i(Z)}
        
    def grad_i(self, Z):
        return np.diag(np.where(Z>0, np.ones_like(Z), np.zeros_like(Z)).reshape(-1))
        
    def update(self, *args, **kwargs):
        pass
    
class Sigmoid:
    def  __init__(self):
        pass
    
    def forward(self, Z):
        return 1/(1+np.exp(-Z))
    
    def __call__(self, Z):
        return self.forward(Z)
    
    def grad(self, Z):
        return {"i": self.grad_i(Z)}
    
    def grad_i(self, Z):
        y = self(Z)
        return np.diag((y*(1-y)).reshape(-1))
    
    def update(self, *args, **kwargs):
        pass

In [234]:
# Layer entity
class Dense: 
    
    def __init__(self, no_of_neurons, input_size):
        
        self.W = np.random.randn(input_size, no_of_neurons)
        self.b = np.random.randn(no_of_neurons, 1)
    
    def __call__(self, X):
        return self.forward(X)
    
    def grad(self, X):
        return {"i": self.grad_i(X), "b": self.grad_b(X), "w": self.grad_w(X)}
    
    def grad_i(self, X):
        return self.W.T
    
    def grad_b(self, X):
        return np.identity(X.shape[0])
    
    def grad_w(self, X):
        m, n = self.W.shape
        return (np.repeat(np.eye(n), repeats=m, axis=0)*np.repeat(np.expand_dims(X, 0), repeats=n, axis=0).reshape(m*n, 1)).reshape(n,m,n)
        
    def forward(self, X):
        return np.matmul(self.W.T, X) + self.b        
    
    def update(self, grad, func):
        
        self.W = func(self.W, grad["w"])
        self.b = func(self.b, grad["b"])

In [235]:
# Model entity 
class Sequential:
    
    def __init__(self, layers=[]):
        self.layers = layers
        
    def __call__(self, X):
        return self.forward(X)
    
    def forward(self, X, with_grad=False):
        grads = []
        for layer in self.layers:
            if with_grad:
                grads.append(layer.grad(X))
            X = layer.forward(X)
        return (X, grads) if with_grad else X
    
    def backward(self, grads, loss_grad):
        loss_grads = []
        grads.reverse()
        
        for grad in grads:
            g = {}
            if grad.get("w", None) is not None: 
                g["w"] = np.matmul(loss_grad, grad["w"])
            if grad.get("b", None) is not None:
                g["b"] = np.matmul(loss_grad, grad["b"])
                
            g["i"] = np.matmul(loss_grad, grad["i"])
            loss_grads.append(g)
            loss_grad = g["i"]
        
        loss_grads.reverse()
        
        return loss_grads        
    
    def fit(self, X, y_true, loss, optim):
        y_pred, grads = self.forward(X, with_grad=True)
        l = loss(y_pred=y_pred, y_true=y_true)
        loss_grads = self.backward(grads, loss.grad_i(y_pred=y_pred, y_true=y_true))
        
        for (layer, grad) in zip(self.layers, loss_grads):
            optim.step(layer, grad)
        
    def add(self, layer):
        self.layers.append(layer)
        return self

In [236]:
class Optimizer:
    
    def __init__(self):
        self.optim = self.optimizer()
   
    def optimizer(self):
        raise NotImplementedError("optimizer method not defined")
        
    def step(self, layer, grad):
        layer.update(grad, self.optim)
    

class SGD(Optimizer):
    
    def __init__(self, lr):
        self.lr = lr
        super().__init__()
    
    def optimizer(self):
        def func(w, dw):
            return w - self.lr*dw
        return func

In [237]:
# Loss entity
class BinaryCrossEntropy:
    
    def __init__(self):
        pass
    
    def __call__(self, y_pred, y_true):
        return self.forward(y_pred, y_true)
    
    def forward(self, y_pred, y_true):
        return np.where(y_true == 0, -np.log(1-y_pred), -np.log(y_pred))
    
    def grad_i(self, y_pred, y_true):
        return np.where(y_true == 0, -1/(1-y_pred), -1/y_pred)
        

In [238]:
layer1 = Dense(2, 2)
act1 = ReLU()
layer2 = Dense(2, 2)
act2 = ReLU()
layer3 = Dense(1, 2)
act3 = Sigmoid()

In [239]:
model = Sequential(layers=[layer1, act1, layer2, act2, layer3, act3])

In [240]:
loss = BinaryCrossEntropy()
optim = SGD(0.001)

In [241]:
y_pred, grads = model.forward(x, with_grad=True)

In [242]:
g1 = np.matmul(loss.grad_i(y_pred, y), grads[-1]["i"])

In [243]:
g2 = np.matmul(g1, grads[-2]["i"])

In [244]:
g3 = np.matmul(g2, grads[-3]["i"])

In [245]:
g4 = np.matmul(g3, grads[-4]["i"])
g5 = np.matmul(g4, grads[-5]["i"])

In [246]:
g1, g3, g5

(array([[-0.68422635]]), array([[0., 0.]]), array([[0., 0.]]))

In [159]:
np.matmul(grads[-1]["i"], grads[-2]["i"])

array([[0.02495379, 0.06187208]])

In [257]:
g1.shape

(1, 1)

In [259]:
grads[-2]["w"].shape

(1, 2, 1)

In [279]:
np.einsum?

In [274]:
a = np.random.randint(1, 3, size=(1,2))
b = np.random.randint(1, 3, size=(3, 2, 1))

In [275]:
a

array([[2, 2]])

In [276]:
b

array([[[1],
        [2]],

       [[2],
        [2]],

       [[1],
        [1]]])

In [277]:
np.matmul(a, b)

array([[[6]],

       [[8]],

       [[4]]])

In [255]:
np.matmul(g1, grads[-2]["w"])

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 1)

In [249]:
grads[-2]["w"].shape

(1, 2, 1)

In [104]:
model.fit(x, y, loss, optim)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 10 is different from 1)

In [19]:
y_pred

array([[8.36289363e-05]])

In [20]:
bce(y_pred, y)

array([[9.38912097]])

In [21]:
bce.grad_i(y_pred, y)

array([[-11957.58362862]])

In [23]:
y

array([[1.]])