In [1]:
import numpy as np
from NeuralNetworks.layers import FullyConnected
from NeuralNetworks.activations import Linear, Sigmoid, TanH, ReLU, SoftMax
from grad_check import check_gradients

## Activation Function Gradient Check

In [21]:
check = {'Linear': Linear, 'Sigmoid': Sigmoid, 'TanH': TanH, 'ReLU': ReLU, 'Softmax': SoftMax}

for name, fn in check.items(): 
    X = np.random.randn(2, 3)
    dLdY = np.random.randn(2, 3)

    # initialize a fully connected layer
    # and perform a forward and backward pass
    activation = fn()
    _ = activation.forward(X)
    grad = activation.backward(X, dLdY)

    # check the gradients w.r.t. each parameter
    print(
        f"Relative error for {name} activation:",
        check_gradients(
            fn=activation.forward,  # the function we are checking
            grad=grad,  # the analytically computed gradient
            x=X,        # the variable w.r.t. which we are taking the gradient
            dLdf=dLdY,  # gradient at previous layer
        )
    )

Relative error for Linear activation: 2.8434717441750492e-11
Relative error for Sigmoid activation: 1.1071217121012581e-10
Relative error for TanH activation: 2.2266722710042676e-11
Relative error for ReLU activation: 1.362101106828521e-11
Relative error for Softmax activation: 1.1238822943861714e-10


## Fully Connected Layer Gradient Check

In [2]:
X = np.random.randn(2, 3)
dLdY = np.random.randn(2, 4)

# initialize a fully connected layer
# and perform a forward and backward pass
fc_layer = FullyConnected(n_out=4, activation="linear")
_ = fc_layer.forward(X)
_ = fc_layer.backward(dLdY)

# check the gradients w.r.t. each parameter
for param in fc_layer.parameters:
    print(
        f"Relative error for {param}:",
        check_gradients(
            fn=fc_layer.forward_with_param(param, X),  # the function we are checking
            grad=fc_layer.gradients[param],  # the analytically computed gradient
            x=fc_layer.parameters[param],  # the variable w.r.t. which we are taking the gradient
            dLdf=dLdY,                     # gradient at previous layer
        )
    )

Relative error for W: 1.4501579951504764e-11
Relative error for b: 5.740241368282514e-11
