# Numerical Gradient Checking 
please refer to `grad_check.py` to see the implementation of how numerical gradient checking is carried out

In [1]:
import numpy as np
from NeuralNetworks.layers import FullyConnected
from NeuralNetworks.activations import Linear, Sigmoid, TanH, ReLU, SoftMax
from NeuralNetworks.losses import CrossEntropy
from utils import check_gradients

## Activation Function Gradient Check

In [6]:
check = {'Linear': Linear, 'Sigmoid': Sigmoid, 'TanH': TanH, 'ReLU': ReLU, 'Softmax': SoftMax}

for name, fn in check.items(): 
    X = np.random.randn(2, 3)
    dLdY = np.random.randn(2, 3)

    # initialize a fully connected layer
    # and perform a forward and backward pass
    activation = fn()
    _ = activation.forward(X)
    grad = activation.backward(X, dLdY)

    # check the gradients w.r.t. each parameter
    print(
        f"Relative error for {name} activation:",
        check_gradients(
            fn=activation.forward,  # the function we are checking
            grad=grad,  # the analytically computed gradient
            x=X,        # the variable w.r.t. which we are taking the gradient
            dLdf=dLdY,  # gradient at previous layer
        )
    )

Relative error for Linear activation: 4.407971522515095e-12
Relative error for Sigmoid activation: 6.41001440675322e-11
Relative error for TanH activation: 2.1801962031463618e-11
Relative error for ReLU activation: 1.4377884969231344e-11
Relative error for Softmax activation: 3.948456953048531e-11


## Fully Connected Layer Gradient Check

In [5]:
X = np.random.randn(2, 3)
dLdY = np.random.randn(2, 4)

# initialize a fully connected layer
# and perform a forward and backward pass
fc_layer = FullyConnected(n_out=4, activation="linear")
_ = fc_layer.forward(X)
_ = fc_layer.backward(dLdY)

# check the gradients w.r.t. each parameter
for param in fc_layer.parameters:
    print(
        f"Relative error for {param}:",
        check_gradients(
            fn=fc_layer.forward_with_param(param, X),  # the function we are checking
            grad=fc_layer.gradients[param],  # the analytically computed gradient
            x=fc_layer.parameters[param],  # the variable w.r.t. which we are taking the gradient
            dLdf=dLdY,                     # gradient at previous layer
        )
    )

Relative error for W: 4.0907738088780064e-11
Relative error for b: 3.3474831817927246e-11


## Cross Entropy Loss Gradient Check

In [4]:
num_pts = 5
num_classes = 6

# one-hot encoded y
y_idxs = np.random.randint(0, num_classes, (num_pts,))
y = np.zeros((num_pts, num_classes))
y[range(num_pts), y_idxs] = 1

# normalized predictions
scores = np.random.uniform(0, 1, size=(num_pts, num_classes))
y_hat = scores / scores.sum(axis=1, keepdims=True)

cross_entropy_loss = CrossEntropy("cross_entropy")

def forward_fn(Y, Y_hat):    
    def inner_forward(Y_hat):
        return cross_entropy_loss.forward(Y, Y_hat)
    return inner_forward

loss = cross_entropy_loss.forward(y, y_hat)
grad = cross_entropy_loss.backward(y, y_hat)

print(
    f"Relative error for cross entropy loss:",
    check_gradients(
        fn=forward_fn(y, y_hat),  # the function we are checking
        grad=grad,  # the analytically computed gradient
        x=y_hat,        # the variable w.r.t. which we are taking the gradient
        dLdf=1,  # gradient at previous layer
    )
)

Relative error for cross entropy loss: 3.058921437577199e-09
