In [1]:
import numpy as np 
import math

from templates import AutoDiffFunction, Layer, Loss, Optimizer

Defining the activation functions

In [2]:
class Sigmoid(AutoDiffFunction):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x):
        self.saved_for_backward = 1/(1 + np.exp(-x))
        return self.saved_for_backward

    def compute_grad(self, x):
        y = self.saved_for_backward

        return {"x": y*(1-y)}

    def backward(self, dy):
        return dy * self.grad["x"]      


class RelU(AutoDiffFunction):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x):
        self.saved_for_backward = np.where(x>0.0, 1.0, 0.0)

        return x * self.saved_for_backward

    def compute_grad(self, x):
        return {"x": self.saved_for_backward}

    def backward(self, dy):
        return dy * self.grad["x"]
     

Defining the layers

In [3]:
class Linear(Layer):
    def __init__(self, in_dim, out_dim) -> None:
        super().__init__()
        self.initialize_weights(in_dim, out_dim)

    def initialize_weights(self, in_dim, out_dim):
        
        self.weights["w"] = np.random.randn(in_dim, out_dim)
        self.weights["b"] = np.random.randn(1, out_dim)

    def compute_grad(self, x):
        
        gradients = {}

        # y = x * w + b        
        # we compute gradients wrt w and x 
        # gradient wrt b is not required explicitly since we know that it's value is 1
        gradients["w"] = self.saved_for_backward["x"]
        gradients["x"] = self.weights["w"].T

        return gradients


    def forward(self, x):
        
        output = x @ self.weights["w"] + self.weights["b"]

        self.saved_for_backward["x"] = x
        
        return output

    def backward(self, dy):
        
        dx = dy @ self.grad["x"]
        
        # calculating gradients wrt weights

        dw = self.grad["w"] @ dy
        db = np.sum(dy, axis=0, keepdims=True)

        self.update_weights = {"w": dw, "b": db}

        return dx

Defining the loss function

For this particular problem, we require CrossEntropy Loss for classification

In [4]:
x = np.array([[0.94, 0.04, 0.02],[0.3, 0.64, 0.06]])

y = np.array([[1,0,0],[0,1,0]])

z = -y * np.log(x)
print(z)
z = np.sum(z, axis=1)
print(z)
print(np.mean(z))

[[ 0.0618754 -0.        -0.       ]
 [-0.         0.4462871 -0.       ]]
[0.0618754 0.4462871]
0.2540812531732535


In [5]:
class CrossEntropyLossFromLogits(Loss):

    @staticmethod
    def softmax(self, x):
        v = np.exp(x)

        return v / np.sum(v, axis=1, keepdims=True)

    def forward(self, y_pred, y_true):
         
        probabilities = self.softmax(y_pred)

        loss_value = np.mean(np.sum(- y_true * np.log(probabilities), axis=1))

        self.saved_for_backward["probabilities"] = probabilities
        self.saved_for_backward["y_true"] = y_true

        return loss_value

    def compute_grad(self, y_pred, y_true):

        return {"x": self.saved_for_backward["probabilities"] - self.saved_for_backward["y_true"]}        

Creating an optimizer for the loss

In [6]:
class SGD(Optimizer):
    def __init__(self, lr):
        self.lr = lr
    
    def step(self, weights, grads):
        for weight_name, _ in weights.items():
            weights[weight_name] = weights[weight_name] - self.lr * grads[weight_name]

6


Creating the structure for an actual neural network

In [None]:
class Net():
    def __init__(self, layers) -> None:
        self.layers = layers

    def __call__(self, *args, **kwds):
        return self.forward(*args, **kwds)

    def compile(self, loss, optimizer):
        self.loss = loss
        self.optimizer = optimizer

    def calculate_loss(self, y_pred, y_true):
        return self.loss(y_pred, y_true)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)

        return x

    def backward(self):

        gradient = self.loss.backward()
        for layer in reversed(self.layers):
            gradient = layer.backward(gradient)

        return gradient

    def update_weights(self):

        for layer in reversed(self.layers):
            if isinstance(layer, Layer):
                layer.update_weights()