***Gradient check***

Approximating gradient and comparing with the analytical gradient

In [37]:
import numpy as np

def batch_loss_BCE(NN, X, y):
    m = X.shape[0]
    loss = 0.0
    for i in range(m):
        p = NN.forward_prop(X[i])       # (1,1)
        p = float(p[0,0])
        yi = float(y[i]) if np.asarray(y[i]).ndim==0 else float(y[i][0])
        loss += -(yi*np.log(p) + (1.0-yi)*np.log(1.0-p))
    return loss / m

def analytical_grad(NN, X, y):
    m = X.shape[0]
    NN.Delta = NN.init_D()
    for i in range(m):
        NN.forward_prop(X[i])
        d = NN.calculate_small_d(y[i])      
        NN.calculate_big_Delta(d)
    return [D / float(m) for D in NN.Delta]



def grad_check(NN, X, y, epsilon=1e-5):

    loss_fn = batch_loss_BCE
    analytical = analytical_grad(NN, X, y)
    numerical  = [np.zeros(Wl.shape, dtype=float) for Wl in NN.W]
    original   = [Wl.copy() for Wl in NN.W]

    for l in range(len(NN.W)):
        rows, cols = NN.W[l].shape
        for r in range(rows):
            for c in range(cols):
                w0 = NN.W[l][r, c]

                NN.W[l][r, c] = w0 + epsilon
                Jp = loss_fn(NN, X, y)

                NN.W[l][r, c] = w0 - epsilon
                Jm = loss_fn(NN, X, y)

                numerical[l][r, c] = (Jp - Jm) / (2.0 * epsilon)
                NN.W[l][r, c] = w0
        NN.W[l][:] = original[l]

    # simple per-layer report
    diffs = []
    for l in range(len(NN.W)):
        A, N = analytical[l], numerical[l]
        max_abs = 0.0
        max_rel = 0.0
        for r in range(A.shape[0]):
            for c in range(A.shape[1]):
                d = abs(float(A[r,c]) - float(N[r,c]))
                if d > max_abs: 
                    max_abs = d
        
        print(f"Layer {l}: max |E|={max_abs:.3e}")
        diffs.append((max_abs, max_rel))
    return diffs


In [38]:
from Model import NeuralNet

X_batch = np.array([[1,2,3], [4,5,6], [7,8,9]])
y_batch = np.array([[1], [0], [1]])


NN_GradCheck = NeuralNet(X_batch.shape[1], [2,2,3],y_batch.shape[1])
diffs= grad_check(NN_GradCheck, X_batch, y_batch, epsilon=1e-5)


Layer 0: max |E|=1.649e-11
Layer 1: max |E|=1.347e-11
Layer 2: max |E|=1.314e-11
Layer 3: max |E|=1.514e-11
