# Numerical Gradient checking of Layers

Verify the correctness of implementation using Gradient checks provided in CS231 2nd assignment.

1. **Probably Wrong**: relative error > 1e-2 
2. **Something not right** :1e-2 > relative error > 1e-4 
3. **Okay for objectives with kinks**: 1e-4 > relative error, if no kinks then too high
4. **Probably Right**: relative error < 1e-7 

In [1]:
import numpy as np
from layers import *
from nonlinearity import ReLU
from utils import numerical_gradient_array,eval_numerical_gradient,rel_error,load_mnist,load_cifar10
from time import time
from loss import SoftmaxLoss,regularization,delta_regularization
from nnet import NeuralNet
from solver import sgd,sgd_momentum,adam
import sys

## Convolution Layer

Perform numerical grdient checking for verifying the implementation of convolution layer.

### Forward Pass

The difference of correct_out and out should be around 1e-8

In [2]:
x_shape = (2, 3, 4, 4)
w_shape = (3, 3, 4, 4)
x = np.linspace(-0.1, 0.5, num=np.prod(x_shape)).reshape(x_shape)
w = np.linspace(-0.2, 0.3, num=np.prod(w_shape)).reshape(w_shape)
b = np.linspace(-0.1, 0.2, num=3)

c_layer = Conv((3,4,4),n_filter=3,h_filter=4,w_filter=4,stride=2,padding=1)
c_layer.W = w
c_layer.b = b.reshape(-1,1)

correct_out = np.array([[[[-0.08759809, -0.10987781],
                           [-0.18387192, -0.2109216 ]],
                          [[ 0.21027089,  0.21661097],
                           [ 0.22847626,  0.23004637]],
                          [[ 0.50813986,  0.54309974],
                           [ 0.64082444,  0.67101435]]],
                         [[[-0.98053589, -1.03143541],
                           [-1.19128892, -1.24695841]],
                          [[ 0.69108355,  0.66880383],
                           [ 0.59480972,  0.56776003]],
                          [[ 2.36270298,  2.36904306],
                           [ 2.38090835,  2.38247847]]]])

out = c_layer.forward(x)

error = rel_error(out,correct_out)
print("Testing forward pass of Conv Layer")
print("Difference: ",error)

Testing forward pass of Conv Layer
Difference:  2.21214764967e-08


### Backward pass

The errors for gradients should be around 1e-9

In [3]:
x = np.random.randn(4, 3, 5, 5)
w = np.random.randn(2, 3, 3, 3)
b = np.random.randn(2,).reshape(-1,1)
dout = np.random.randn(4, 2, 5, 5)

c_layer = Conv((3,5,5),n_filter=2,h_filter=3,w_filter=3,stride=1,padding=1)
c_layer.W = w
c_layer.b = b

dx_num = numerical_gradient_array(lambda x: c_layer.forward(x), x, dout)
dw_num = numerical_gradient_array(lambda w: c_layer.forward(x), w, dout)
db_num = numerical_gradient_array(lambda b: c_layer.forward(x), b, dout)

out = c_layer.forward(x)
dx,grads = c_layer.backward(dout)
dw,db = grads

print("Testing backward pass of Conv Layer")
print("dX error: ",rel_error(dx,dx_num))
print("dW error: ",rel_error(dw,dw_num))
print("db error: ",rel_error(db,db_num))

Testing backward pass of Conv Layer
dX error:  5.04631083003e-09
dW error:  1.21968306724e-08
db error:  1.82009326877e-11


## Maxpool Layer

Perform gradient check for maxpool layer and verify correctness of its implementation

### Forward Pass

Difference should be around 1e-8

In [4]:
x_shape = (2, 3, 4, 4)
x = np.linspace(-0.3, 0.4, num=np.prod(x_shape)).reshape(x_shape)

pool = Maxpool((3,4,4),size=2,stride=2)

out = pool.forward(x,)
correct_out = np.array([[[[-0.26315789, -0.24842105],
                          [-0.20421053, -0.18947368]],
                         [[-0.14526316, -0.13052632],
                          [-0.08631579, -0.07157895]],
                         [[-0.02736842, -0.01263158],
                          [ 0.03157895,  0.04631579]]],
                        [[[ 0.09052632,  0.10526316],
                          [ 0.14947368,  0.16421053]],
                         [[ 0.20842105,  0.22315789],
                          [ 0.26736842,  0.28210526]],
                         [[ 0.32631579,  0.34105263],
                          [ 0.38526316,  0.4       ]]]])

print('Testing max_pool_forward_naive function:')
print('difference: ', rel_error(out, correct_out))

Testing max_pool_forward_naive function:
difference:  4.16666651573e-08


### Backward Pass

Error should be around 1e-12

In [5]:
x = np.random.randn(3, 2, 8, 8)
dout = np.random.randn(3, 2, 4, 4)

pool = Maxpool((2,8,8),size=2,stride=2)

dx_num = numerical_gradient_array(lambda x: pool.forward(x), x, dout)

out = pool.forward(x)
dx,_ = pool.backward(dout)

print('Testing bacward pass of Maxpool layer')
print('dX error: ', rel_error(dx, dx_num))

Testing bacward pass of Maxpool layer
dX error:  3.27564681622e-12


## ReLU Layer
Error should be around 1e-12

In [6]:
x = np.random.randn(3, 2, 8, 8)
dout = np.random.randn(3, 2, 8, 8)

r = ReLU()

dx_num = numerical_gradient_array(lambda x:r.forward(x), x, dout)

out = r.forward(x)
dx,_ = r.backward(dout)

print('Testing backward pass of ReLU layer')
print('dX error: ',rel_error(dx,dx_num))

Testing backward pass of ReLU layer
dX error:  3.27561559317e-12


## Conv-ReLU-MaxPool

In [7]:
x = np.random.randn(2, 3, 16, 16)
w = np.random.randn(3, 3, 3, 3)
b = np.random.randn(3,).reshape(-1,1)
dout = np.random.randn(2, 3, 8, 8)

c = Conv((3,16,16),n_filter=3,h_filter=3,w_filter=3,stride=1,padding=1)
c.W, c.b = w, b
r = ReLU()
m = Maxpool(c.out_dim,size=2,stride=2)

def conv_relu_pool_forward(c,r,m,x):
    c_out = c.forward(x)
    r_out = r.forward(c_out)
    m_out = m.forward(r_out)
    return m_out

dx_num = numerical_gradient_array(lambda x: conv_relu_pool_forward(c,r,m,x), x, dout)
dw_num = numerical_gradient_array(lambda w: conv_relu_pool_forward(c,r,m,x), w, dout)
db_num = numerical_gradient_array(lambda b: conv_relu_pool_forward(c,r,m,x), b, dout)

m_dx,_ = m.backward(dout)
r_dx,_ = r.backward(m_dx)
dx,grads = c.backward(r_dx)
dw,db = grads


print('Testing conv_relu_pool')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

Testing conv_relu_pool
dx error:  6.88787711827e-09
dw error:  9.29133425491e-10
db error:  6.67766526233e-12


## Fully Connected Layer

In [8]:
num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape), output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim).reshape(1,-1)

flat = Flatten()
x = flat.forward(x)

f = FullyConnected(120,3)
f.W,f.b= w,b
out = f.forward(x)

correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
                        [ 3.25553199,  3.5141327,   3.77273342]])

print(out)
# Compare your output with ours. The error should be around 1e-9.
print('Testing fully connected forward pass:')
print('difference: ', rel_error(out, correct_out))


[[ 1.49834967  1.70660132  1.91485297]
 [ 3.25553199  3.5141327   3.77273342]]
Testing fully connected forward pass:
difference:  9.76985004799e-10


In [9]:
x = np.random.randn(10, 2, 3)
w = np.random.randn(6, 5)
b = np.random.randn(5)
dout = np.random.randn(10, 5)

flat = Flatten()
x = flat.forward(x)

f = FullyConnected(60,5)
f.W,f.b= w,b

dx_num = numerical_gradient_array(lambda x: f.forward(x), x, dout)
dw_num = numerical_gradient_array(lambda w: f.forward(x), w, dout)
db_num = numerical_gradient_array(lambda b: f.forward(x), b, dout)

dx,grads= f.backward(dout)
dw, db = grads
# The error should be around 1e-10
print('Testing fully connected backward pass:')
print('dx error: ', rel_error(dx_num, dx))
print('dw error: ', rel_error(dw_num, dw))
print('db error: ', rel_error(db_num, db))

Testing fully connected backward pass:
dx error:  1.00870024555e-09
dw error:  1.46833423537e-10
db error:  1.1317301e-10


## Softmax Loss



In [10]:
num_classes, num_inputs = 10, 50
x = 0.001 * np.random.randn(num_inputs, num_classes)
y = np.random.randint(num_classes, size=num_inputs)

dx_num = eval_numerical_gradient(lambda x: SoftmaxLoss(x,y)[0], x,verbose=False)
loss,dx = SoftmaxLoss(x,y)

# Test softmax_loss function. Loss should be 2.3 and dx error should be 1e-8
print('Testing SoftmaxLoss:')
print('loss: ', loss)
print('dx error: ', rel_error(dx_num, dx))

Testing SoftmaxLoss:
loss:  2.30269056333
dx error:  8.37370677824e-09


## Testing a simple CNN

When we use the softmax loss, we expect the loss for random weights (and no regularization) to be about log(C) for C classes. When we add regularization this should go up.

In [11]:
N = 2
num_class = 10
X = np.random.randn(N, 3, 10, 10)
y = np.random.randint(num_class, size=N)
dout = np.random.randn(N, num_class)

def make_cnn(X_dim,num_class):
    conv = Conv(X_dim,n_filter=3,h_filter=3,w_filter=3,stride=1,padding=1)
    relu_conv = ReLU()
    maxpool = Maxpool(conv.out_dim,size=2,stride=1)
    flat = Flatten()
    fc = FullyConnected(np.prod(maxpool.out_dim),num_class)
    relu_fc = ReLU()
    return [conv,relu_conv,maxpool,flat,fc,relu_fc]

cnn = NeuralNet(make_cnn((3,10,10),num_class))

dx_num = numerical_gradient_array(lambda X: cnn.forward(X), X, dout)
out = cnn.forward(X)
dx,grads = cnn.backward(dout)

print('dx error: ', rel_error(dx_num, dx))

loss,dout = SoftmaxLoss(out,y)

print('Initial loss (no regularization): ', loss)

# # r_loss,out = cnn.forward(X,y,reg=True)
# # print('Initial loss (with regularization): ', r_loss)


dx error:  3.50764487292e-08
Initial loss (no regularization):  6.03857135481
