In [1]:
import numpy as np

In [2]:
class BinaryCrossEntropy:
    def __init__(self):
        pass

    def __call__(self, y_pred, y_true):
        ix_zeros = np.arange(0, y_true.shape[0])[y_true.reshape(-1) == 0]
        ix_ones = np.arange(0, y_true.shape[0])[y_true.reshape(-1) == 1]

        y_zero = np.log(1 - y_pred[ix_zeros] + 1e-10).sum()
        y_one = np.log(y_pred[ix_ones] + 1e-10).sum()

        return -1 * (y_zero + y_one)
    
    def grad_input(self, X, y_true):
        ix_zeros = np.arange(0, y_true.shape[0])[y_true.reshape(-1) == 0]
        ix_ones = np.arange(0, y_true.shape[0])[y_true.reshape(-1) == 1]

        grad = np.empty((y_true.shape[0], 1, 1), dtype="float")
        grad[ix_zeros] = 1/(1-X[:, ix_zeros].reshape(-1,1,1))
        grad[ix_ones] = -1/X[:, ix_ones].reshape(-1,1,1)

        return grad        


In [3]:
class Sigmoid:
    def __call__(self, X):
        return self.eval(X)
    
    def eval(self, X):
        return 1/(1+np.e**(-1*X))

    def grad_input(self, X):
        return np.einsum('ij,im->mij', np.identity(X.shape[0]), self.eval(X)*(1 - self.eval(X)))

In [4]:
class Dot:
    def __init__(self, input_size, units):
        self.W = np.random.randn(input_size, units)
        self.b = np.random.randn(units, 1)

    def __call__(self, X):
        return self.W.T.dot(X) + self.b

    def grad_w(self, X):
        I = np.identity(self.b.shape[0])
        m1 = np.stack([I]*self.W.shape[0], axis=1)
        grad = np.einsum('ijk,jm->mijk', m1, X)
        return grad
    
    def grad_b(self, X):
        return np.stack([np.identity(self.b.shape[0])]*X.shape[1], axis=0)

    def grad_input(self, X):
        return np.stack([self.W.T]*X.shape[1], axis=0)
    
    def get_output_size(self):
        return self.b.shape
    
    def get_no_of_params(self):
        return np.prod(self.W.shape) + np.prod(self.b.shape)
    
    def update(self, gradW, gradb, optimizer, method):
        if method == "minimize":
            self.W = optimizer.minimize(self.W, gradW)
            self.b = optimizer.minimize(self.b, gradb)
        elif method == "maximize":
            self.W = optimizer.maximize(self.W, gradW)
            self.b = optimizer.maximize(self.b, gradb)

In [5]:
class Dense:
    
    def __init__(self, units, activation, input_size):
        self.units = units
        self.dot = Dot(input_size, units)
        self.activation = activation
        self.input_size = input_size

    def get_output_size(self):
        return self.dot.get_output_size()

    def get_no_of_params(self):
        return self.dot.get_no_of_params()

    def eval(self, X):
        return self.activation(self.dot(X))

    def grad_parameters(self, X):
        da_dI = self.activation.grad_input(self.dot(X))
        dI_dw = self.dot.grad_w(X)
        da_dw = np.einsum('mij,mjkl->mikl', da_dI, dI_dw)
        
        dI_db = self.dot.grad_b(X)
        da_db = np.einsum('mij,mjk->mik', da_dI, dI_db)
        return (da_dw, da_db)
    
    def grad_input(self, X):
        g1 = self.activation.grad_input(self.dot(X))

        g2 = self.dot.grad_input(X)

        return np.einsum('mij,mjk->mik', g1, g2)
    
    def update(self, grad_w, grad_b, optimizer, method="minimize"):
        self.dot.update(grad_w, grad_b, optimizer, method)
        

In [6]:
class GradientDescentOptimizer:
    def __init__(self):
        pass

    def set_lr(self, learning_rate):
        self.learning_rate = learning_rate
        return self
    
    def minimize(self, X, grad_X):
        assert X.shape == grad_X.shape, f"Shape mismatch, Input shape {X.shape} != Gradient shape {grad_X.shape}"
        return X - (self.learning_rate*grad_X)
    
    def maximize(self, X, grad_X):
        assert X.shape == grad_X.shape, f"Shape mismatch, Input shape {X.shape} != Gradient shape {grad_X.shape}"
        return X + (self.learning_rate*grad_X)

        

In [168]:
class Sequential:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.outputs = []

    def add(self, layer):
        self.layers.append(layer)
        return self
    
    def summary(self):
        from tabulate import tabulate

        headers = ["Layer Type", "Output Shape", "No. of parameters"]
        summary_ = []
        params = 0
        for layer in self.layers:
            p = layer.get_no_of_params()
            params += p
            summary_.append([layer.__class__.__name__, layer.get_output_size(), p])
        
        print(tabulate(summary_, headers=headers))
        print("Total No. of parameters:", params)
    
    def get_batch(self, X, y, batch_size):
        ixs = np.arange(X.shape[0])
        np.random.shuffle(ixs)
        for i in range(0, X.shape[0], batch_size):
            x_batch, y_batch = X[ixs[i:i+batch_size]], y[ixs[i:i+batch_size]]
            if len(x_batch):
                yield (x_batch, y_batch)
        return 
    
    def fit(self, X, y, n_epochs, learning_rate, optimizer, batch_size=1, verbose=1):
        if len(y.shape) < 2:
            raise ValueError(f"Incompatible shape of y {y.shape}, try reshaping y using y.reshape(-1,1)")
        
        self.optimizer = optimizer.set_lr(learning_rate)
        for i in range(n_epochs):
            if verbose == 1:
                print(f"Epoch: {i+1}/{n_epochs}")
            
            progress_bar = self.__progress_bar(50, int(50*batch_size/X.shape[0]))
            for (X_batch, y_batch) in self.get_batch(X, y, batch_size):
                time.sleep(0.01)
                _, outputs, _gradients_ = self.forward_propagation(X_batch)
                grads = self.backward_propagation(outputs, _gradients_, y_batch.reshape(-1,1))
                self._update_params(grads)
                if verbose == 1:
                    try:
                        _loss = self._eval_loss(X_batch, y_batch)
                        print("\r" + next(progress_bar), f"Loss: {np.round(_loss, 4)}", end="")
                    except StopIteration:
                        pass
            if verbose == 1:
                _loss = self._eval_loss(X, y)
                bar =  "|" + "-"*50 + ">" + " "*0 + "|"
                print("\r" + bar, f"Loss: {np.round(_loss, 4)}")
        if verbose == 0:
            print(f"\rEpoch: {i+1} Loss:{self._eval_loss(X, y)}", end="")
            
        print("")
            
    def forward_propagation(self, X, eval=False):
        output = X.T
        outputs = [output]
        gradients = []
        for layer in self.layers:
            if not eval:
                grad_ = {}
                grad_["input"] = layer.grad_input(output)
                grad_["w"], grad_["b"] = layer.grad_parameters(output)
                gradients.append(grad_)
            output = layer.eval(output)
            outputs.append(output)

        return output.T, outputs, gradients
    
    def backward_propagation(self, outputs, gradients, y):
        grad_loss = self.loss.grad_input(outputs[-1], y)
        outputs = outputs[:-1]
        grads = []
        for grad, output in list(zip(gradients, outputs))[::-1]:
            grad_w = np.einsum('mij,mjkl->mikl', grad_loss, grad["w"]).sum(axis=0)[0]
            grad_b = np.einsum('mij,mjk->mik', grad_loss, grad["b"]).sum(axis=0).T
            grads.append((grad_w, grad_b))

            grad_loss = np.einsum('mij,mjk->mik', grad_loss, grad["input"])
        
        return grads
    
    def _update_params(self, grads):
        for ((grad_w, grad_b), layer) in zip(grads, self.layers[::-1]):
            layer.update(grad_w, grad_b, self.optimizer)
    
    def predict(self, X):
        return self._eval(X)
    
    def predict_classes(self, X, threshold=0.5):
        return (self.predict(X) > threshold).astype("int")
    
    def evaluate(self, X, y):
        if len(y.shape) < 2:
            raise ValueError(f"Incompatible shape of y {y.shape}, try reshaping y using y.reshape(-1,1)")
        return self._eval_loss(X, y), (y == self.predict_classes(X)).astype('int')
    
    def _eval(self, X):
        return self.forward_propagation(X, eval=True)[0]
    
    def compile(self, loss):
        self.loss = loss
    
    def __progress_bar(self, size, inc):
        step = 0
        inc += 1
        while step <= size:

            bar = "|" + "-"*step + ">" + " "*(size-step) + "|"
            yield bar
            step += inc
        
        return
    
    def _eval_loss(self, X, y_true):
        if len(y_true.shape) < 2:
            raise ValueError(f"Incompatible shape of y {y_true.shape}, try reshaping y using y.reshape(-1,1)")
            
        if self.loss is None:
            raise RuntimeError("Model not compiled")
            
        return self.loss(self._eval(X), y_true)

In [169]:
model = Sequential()
model.add(Dense(units=3, activation=Sigmoid(), input_size=2))
model.add(Dense(units=2, activation=Sigmoid(), input_size=3))
model.add(Dense(units=1, activation=Sigmoid(), input_size=2))
model.compile(BinaryCrossEntropy())

In [170]:
model.summary()

Layer Type    Output Shape      No. of parameters
------------  --------------  -------------------
Dense         (3, 1)                            9
Dense         (2, 1)                            8
Dense         (1, 1)                            3
Total No. of parameters: 20


In [171]:
from sklearn.datasets import make_gaussian_quantiles

In [172]:
X, y = make_gaussian_quantiles(n_samples=200,n_classes=2)
y = y.reshape(-1,1)
print("Loss", model.evaluate(X, y)[0])

Loss 158.0060210424048


In [173]:
import time

In [178]:
model.fit(X, y, n_epochs=10, batch_size=10, learning_rate=0.001, optimizer=GradientDescentOptimizer(), verbose=1)

Epoch: 1/10
|-------------------------------------------------->| Loss: 138.6248
Epoch: 2/10
|-------------------------------------------------->| Loss: 138.612
Epoch: 3/10
|-------------------------------------------------->| Loss: 138.68
Epoch: 4/10
|-------------------------------------------------->| Loss: 138.5882
Epoch: 5/10
|-------------------------------------------------->| Loss: 138.5774
Epoch: 6/10
|-------------------------------------------------->| Loss: 138.567Loss: 6.7688
Epoch: 7/10
|-------------------------------------------------->| Loss: 138.557
Epoch: 8/10
|-------------------------------------------------->| Loss: 138.5474
Epoch: 9/10
|-------------------------------------------------->| Loss: 138.5382
Epoch: 10/10
|-------------------------------------------------->| Loss: 138.529oss: 7.0069

