## MSE Loss, Linear

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
from nets.functional import (
    linear_forward,
    linear_backward,
    mse_loss_forward,
    mse_loss_backward,
    softmax_loss_forward,
    softmax_loss_backward
)

from tests.check import check_equals
epsilon = 1e-6
x = np.random.randn(1, 3)
w = np.random.randn(8, 3)
b = np.random.randn(8)
y = np.random.randn(1, 8)

numerical_dx = np.zeros_like(x)

for n in range(x.shape[0]):
    for d in range(x.shape[1]):
        # Computing numerical derivative
        h = np.zeros_like(x)
        h[n, d] = epsilon

        out_minus, _ = linear_forward(x - h, w, b)
        loss_minus, _ = mse_loss_forward(out_minus, y)

        out_plus, _ = linear_forward(x + h, w, b)
        loss_plus, _ = mse_loss_forward(out_plus, y)

        numerical_dx[n, d] = (loss_plus - loss_minus) / (2 * epsilon)



numerical_dw = np.zeros_like(w)

for k in range(w.shape[0]):
    for d in range(w.shape[1]):
        # Computing numerical derivative
        h = np.zeros_like(w)
        h[k, d] = epsilon

        out_minus, _ = linear_forward(x, w - h, b)
        loss_minus, _ = mse_loss_forward(out_minus, y)

        out_plus, _ = linear_forward(x, w + h , b)
        loss_plus, _ = mse_loss_forward(out_plus, y)

        numerical_dw[k, d] = (loss_plus - loss_minus) / (2 * epsilon)


# Testing our derivative
out1, cache1 = linear_forward(x, w, b)
out2, cache2 = mse_loss_forward(out1, y)

dL_dout = mse_loss_backward(cache2)
dx, dw, db = linear_backward(dL_dout, cache1)

check_equals(dx, numerical_dx)
check_equals(dw, numerical_dw)

1.390860759897805e-10
2.439638213669326e-10


## Softmax Loss

In [6]:
N = 25
K = 10
eps = 1e-6
x = np.random.randn(N, K)
y = np.random.randint(0, K, (N,))
loss, cache = softmax_loss_forward(x, y)
dL_dx = softmax_loss_backward(cache)


dx_numeric = np.zeros((N, K))
for n in range(N):
    for k in range(K):
        h = np.zeros_like(x)
        h[n,k] = eps
        dx_numeric[n, k] = (softmax_loss_forward(x + h, y)[0] - softmax_loss_forward(x - h, y)[0]) / (2 * eps)
        
check_equals(dx_numeric, dL_dx)

3.8829753388341937e-10
