In [26]:
from copy import deepcopy
import numpy as np 
from sklearn.datasets import make_classification

from templates import AutoDiffFunction, Layer, Loss, Optimizer

Defining the activation functions

In [27]:
class Sigmoid(AutoDiffFunction):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x):
        self.saved_for_backward = 1/(1 + np.exp(-x))
        return self.saved_for_backward

    def compute_grad(self, x):
        y = self.saved_for_backward

        return {"x": y*(1-y)}

    def backward(self, dy):
        return dy * self.grad["x"]      


class RelU(AutoDiffFunction):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x):
        self.saved_for_backward = np.where(x>0.0, 1.0, 0.0)

        return x * self.saved_for_backward

    def compute_grad(self, x):
        return {"x": self.saved_for_backward}

    def backward(self, dy):
        return dy * self.grad["x"]
     

Defining the layers

In [37]:
class FC(Layer):
    def __init__(self, in_dim, out_dim) -> None:
        super().__init__()
        self.initialize_weights(in_dim, out_dim)

    def initialize_weights(self, in_dim, out_dim):
        
        self.weights["w"] = np.random.randn(in_dim, out_dim)
        self.weights["b"] = np.random.randn(1, out_dim)

    def compute_grad(self, x):
        
        gradients = {}

        # y = x * w + b        
        # we compute gradients wrt w and x 
        # gradient wrt b is not required explicitly since we know that it's value is 1
        gradients["w"] = self.saved_for_backward["x"].T
        gradients["x"] = self.weights["w"].T

        return gradients


    def forward(self, x):
        
        output = x @ self.weights["w"] + self.weights["b"]
        self.saved_for_backward["x"] = x
        
        return output

    def backward(self, dy):
        #print(dy.shape)
        #print(self.grad["x"].shape)
        #print(self.grad["w"].shape)
        
        dx = dy @ self.grad["x"]
        
        # calculating gradients wrt weights
        dw = self.grad["w"] @ dy
        db = np.sum(dy, axis=0, keepdims=True)

        self.absolute_gradients = {"w": dw, "b": db}

        return dx

    def update_weights(self):
        self.optimizer.step(self)

Defining the loss function

For this particular problem, we require CrossEntropy Loss for classification

In [38]:
x = np.array([[0.94, 0.04, 0.02],[0.3, 0.64, 0.06]])

y = np.array([[1,0,0],[0,1,0]])

z = -y * np.log(x)
print(z)
z = np.sum(z, axis=1)
print(z)
print(np.mean(z))

[[ 0.0618754 -0.        -0.       ]
 [-0.         0.4462871 -0.       ]]
[0.0618754 0.4462871]
0.2540812531732535


In [39]:
class CrossEntropyLossFromLogits(Loss):

    @staticmethod
    def softmax(x):
        v = np.exp(x)

        return v / np.sum(v, axis=1, keepdims=True)

    @staticmethod
    def encode(y): 
        d = len(np.unique(y))
        encoded_y = np.zeros(shape=(len(y), d))

        for i in range(len(y)):
            encoded_y[i,y[i]] = 1

        return encoded_y

    def forward(self, y_pred, y_true):
         
        probabilities = self.softmax(y_pred)
        y_true_encoded = self.encode(y_true)

        loss_value = np.mean(np.sum(- y_true_encoded * np.log(probabilities), axis=1))

        self.saved_for_backward["probabilities"] = probabilities
        self.saved_for_backward["y_true"] = y_true_encoded

        return loss_value

    def compute_grad(self, y_pred, y_true):

        return {"x": self.saved_for_backward["probabilities"] - self.saved_for_backward["y_true"]}        

Creating an optimizer for the loss

In [40]:
class SGD(Optimizer):
    def __init__(self, lr):
        self.lr = lr
    
    def step(self, layer):
        for weight_name, _ in layer.weights.items():
            layer.weights[weight_name] = layer.weights[weight_name] - self.lr * layer.absolute_gradients[weight_name]

Creating the structure for an actual neural network

In [41]:
class NeuralNet():
    def __init__(self, layers) -> None:
        self.layers = layers

    def __call__(self, *args, **kwds):
        return self.forward(*args, **kwds)

    def compile(self, loss, optimizer):
        self.loss = loss

        for layer in self.layers:
            if isinstance(layer, Layer):
                layer.optimizer = deepcopy(optimizer)

    def calculate_loss(self, y_pred, y_true):
        return self.loss(y_pred, y_true)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)

        return x

    def backward(self):

        gradient = self.loss.backward()
        for layer in reversed(self.layers):
            gradient = layer.backward(gradient)

        return gradient

    def update_weights(self):

        for layer in reversed(self.layers):
            if isinstance(layer, Layer):
                layer.update_weights()

Create a custom classification dataset to test out the function <br> dsdsd

In [42]:
## creating a dummy dataset to test out stuff ##

X, y = make_classification(n_samples=32*6, n_features=20, n_informative=15, n_classes=3)

def create_batches(X, y, batch_size=32):
    batches = []

    for i in range(len(y) // batch_size):
        start_idx = batch_size * i
        end_idx = batch_size * (i + 1)

        batches.append([X[start_idx: end_idx], y[start_idx: end_idx]])

    return batches

In [43]:
## Utility functions ##
def probs_to_labels(y): 
    return np.argmax(y, axis=1)


def encoded_to_labels(y):
    return np.where(y==1)[1]

def accuracy_score(y_pred, y_true):

    pred_labels = probs_to_labels(y_pred)

    return np.sum(pred_labels == y_true) / len(y_true)

batches = create_batches(X, y, batch_size=32)
len(batches)

6

In [45]:
model = NeuralNet([FC(20, 32), RelU(), FC(32, 3)])
optimizer = SGD(lr = 0.001)

def fit_model(model, batches, optimizer, epochs=10):

    training_stats = []
    num_batches = len(batches)
 
    loss = CrossEntropyLossFromLogits()
    model.compile(loss=loss, optimizer=optimizer)

    for epoch in range(1, epochs+1):

        total_loss = 0
        total_accuracy = 0

        for X, y in batches:

            preds = model(X)
            total_loss += model.loss(preds, y)
            total_accuracy += accuracy_score(preds, y)

            _ = model.backward()
            model.update_weights()

        loss_per_epoch = total_loss / num_batches
        accuracy = total_accuracy / num_batches

        print(f"Epoch: {epoch} Train Loss: {loss_per_epoch} Train Accuracy: {accuracy}")

        training_stats.append({"Epoch" : epoch, 
                                "Train Loss": loss_per_epoch,
                                "Train Accuracy": accuracy})

    return training_stats

In [47]:
training_stats = fit_model(model, batches, SGD(lr=1e-3))

Epoch: 1 Train Loss: 0.030437021236744535 Train Accuracy: 0.9947916666666666
Epoch: 2 Train Loss: 0.02824352679668765 Train Accuracy: 1.0
Epoch: 3 Train Loss: 0.026414579685934764 Train Accuracy: 1.0
Epoch: 4 Train Loss: 0.024833449023869206 Train Accuracy: 1.0
Epoch: 5 Train Loss: 0.023397073284273224 Train Accuracy: 1.0
Epoch: 6 Train Loss: 0.02217035885632808 Train Accuracy: 1.0
Epoch: 7 Train Loss: 0.021088792700828075 Train Accuracy: 1.0
Epoch: 8 Train Loss: 0.020129594902831185 Train Accuracy: 1.0
Epoch: 9 Train Loss: 0.019233363186824337 Train Accuracy: 1.0
Epoch: 10 Train Loss: 0.018433657410856025 Train Accuracy: 1.0
Epoch: 11 Train Loss: 0.01770867252126077 Train Accuracy: 1.0
Epoch: 12 Train Loss: 0.01704596157986977 Train Accuracy: 1.0
Epoch: 13 Train Loss: 0.01643315172463195 Train Accuracy: 1.0
Epoch: 14 Train Loss: 0.015897271966827633 Train Accuracy: 1.0
Epoch: 15 Train Loss: 0.015357063895110983 Train Accuracy: 1.0
Epoch: 16 Train Loss: 0.014869907805205014 Train Accur

Load MNIST Dataset