## Session 36 
Aug 20, 2021

In [1]:
import numpy as np

In [2]:
class BinaryCrossEntropy:

    def __call__(self, ytrue, ypred):
        return -np.sum( ytrue*np.log(ypred + 1e-10) + (1-ytrue)*np.log(1-ypred + 1e-10) )

    def grad_input(self, y, ypred):
        if y == 0:
            return 1/1-ypred
        else:
            return -1/ypred


In [3]:
class Sigmoid:

    def __call__(self, X):
        return self.eval(X)

    def eval(self, X):
        return 1/((np.e**-X) + 1)

    def grad_input(self, X):
        return np.identity(X.shape[1])*self.eval(X) * (1-self.eval(X))

In [4]:
class Dot:

    def __init__(self, input_size, units):
        self.W = np.random.randn(input_size, units)
        self.b = np.random.randn(1, units)

    def __call__(self, X):
        return self.eval(X)

    def eval(self, X):
        return X.dot(self.W) + self.b

    def grad_input(self):
        return self.W.T

    def grad_w(self, X):
        I = np.identity(self.b.shape[1])
        g = np.stack([I]*self.W.shape[0])
        for i in range(g.shape[0]):
            g[i] *= X[0][i]
        return g

    def grad_b(self):
        return np.identity(self.b.shape[1])


In [35]:
class Dense:

    def __init__(self, input_size, activation, units):
        """
        input_size: no. of neurons in previous layer
        activation: some activation funtion
        units: no. of neurons in current layer 
        """
        self.activation = activation
        self.units = units
        self.dot = Dot(input_size, units)

    def eval(self, X):
        return self.activation( self.dot(X))

    def grad_input(self, X):
        g1 = self.activation.grad_input( self.dot(X) )
        g2 = self.dot.grad_input()
#         print("g1",g1.shape, "g2", g2.shape)
        return g1.dot(g2)

    def grad_parameters(self, X):
        da_dI = self.activation.grad_input(self.dot(X))
        dI_dw = self.dot.grad_w(X)
        # da_dw = da_dI.dot(dI_dw)
#         print("da_dI\n:", da_dI)
#         print("dI_dw\n:", da_dI)
#         print(f"da_dI: {da_dI.shape}, dI_dw {dI_dw.shape}")

        da_dw = da_dI * dI_dw
#         print("da_dI\n:", da_dI)
#         print(f"da_dI: {da_dI.shape}, dI_dw {dI_dw.shape}, da_dw{da_dw.shape}")

        dI_db = self.dot.grad_b()
        da_db = da_dI * dI_db
#         print(f"da_dI: {da_dI.shape}, dI_db {dI_db.shape}, da_db{da_db.shape}")

        return da_dw, da_db

    def update(self, grad, optimizer):
        """ grad: (dL_dwi, dL_dbi)"""
        self.dot.W = optimizer.minimize(self.dot.W, grad[0])
        self.dot.b = optimizer.minimize(self.dot.b, grad[1])

In [36]:
class GradientDescentOptimizer:

    def __init__(self, lr):
        self.lr = lr

    def minimize(self, w, grad):
        assert w.shape == grad.shape, f"Shape mismatch w shape {w.shape} != grad shape {grad.shape}"
        w = w-self.lr*grad
        return w

In [43]:
class Sequential:

    def __init__(self, loss):
        self.layers = []
        self.loss = loss

    def add(self, layer):
        self.layers.append(layer)

    def forward_propagation(self, X):
        output = X
        outputs = []
        grads = []
        for layer in self.layers:
            g = {}
            g['input'] = layer.grad_input(output)
            g['w'], g['b'] = layer.grad_parameters(output)
            grads.append(g)
            output = layer.eval(output)
            outputs.append(output)
#             print("ForProp:", output.shape)
        return outputs, grads

    def back_propagate(self, grads, outputs, y):
        grad_loss = self.loss.grad_input(y, outputs[-1]) # dL/dlast_layer_output
        for layer, grad in list(zip(self.layers, grads))[::-1]:
            dL_dwi, dL_dbi = grad_loss.dot(grad['w']), grad_loss.dot(grad['b'])
            layer.update((dL_dwi[0], dL_dbi), self.optimizer)
            grad_loss = grad_loss.dot(grad['input']) # update grad loss for prev layer
            

    def fit(self, X, y, epochs, optimizer, learning_rate, verbose=1):
        self.optimizer = optimizer(learning_rate)
        for i in range(epochs):
            outputs, grads = self.forward_propagation(X)
            self.back_propagate(grads, outputs, y)
            if verbose==1:
                print(f"\rEpoch: {i+1} Loss: {self.loss(y, outputs[-1])}", end="")
        if verbose==0:
            print(f"Epoch: {i} Loss: {self.loss(y, outputs[-1])}")

    def eval(self, X):
        return self.forward_propagation(X)[0][-1]


In [44]:
model = Sequential(BinaryCrossEntropy())
model.add(Dense(input_size = 2, activation=Sigmoid(), units=2))
model.add(Dense(input_size = 2, activation=Sigmoid(), units=3))
model.add(Dense(input_size = 3, activation=Sigmoid(), units=1))

In [45]:
X = np.random.randn(1,2)
y = np.array([0])
print(X)
ypred = model.eval(X)
print(ypred)

# before fitting/ training
print(model.loss(y, ypred))
ypred

[[ 0.39710514 -1.06285403]]
[[0.26351748]]
0.30586977626079315


array([[0.26351748]])

In [51]:
model.fit(X, y, epochs=1000, optimizer=GradientDescentOptimizer, learning_rate=0.008, verbose=1)

Epoch: 1000 Loss: 0.01849475873446689

In [52]:
ypred = model.eval(X) # after training
print(ypred)

[[0.01831834]]
