In [1]:
from util.layer import *
from util.util import *
from util.activations import *
from util.normalization import *
from util.convolution import *
from util.loss import *
import numpy as np
%load_ext autoreload
%autoreload 2
import time

# Activation Layers 

## Relu Gradient Checking

In [2]:
x = np.random.rand(3,5,5)
dOut = np.random.rand(3,5,5)

f,cache = relu_forward(x)
dx = relu_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:relu_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.0006234809e-13


## Sigmoid Gradient Checking

In [3]:
x = np.random.rand(5,8)
dOut = np.random.rand(5,8)

f,cache = sigmoid_forward(x)
dx = sigmoid_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:sigmoid_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 1.88052735943e-11


## Tanh Gradient Checking

In [4]:
x = np.random.rand(12,89)
dOut = np.random.rand(12,89)

f,cache = tanh_forward(x)
dx = tanh_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:tanh_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 2.46170388848e-11


## Leaky Relu Checking 

In [5]:
x = np.random.normal(0,1,(5,16))
dOut = np.random.normal(0,2,(5,16))

f,cache = leaky_relu_forward(x)
dx = leaky_relu_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:leaky_relu_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 6.43891385601e-12


# Normalization Layers 

## Batch Normalization 

In [6]:
x = np.random.rand(25,42)
gamma = np.random.rand(42)
beta = np.random.rand(42)
dOut = np.random.rand(25,42)
params = {}
params["mode"]="train"
params["running_mean"]=0.5
params["running_var"] = 0.04
params["momentum"]=0.9
params["eps"]=1e-8
out,cache = batch_normalization_forward(x,gamma,beta,params)
dx,dgamma,dbeta = batch_normalization_backward(dOut,cache)

In [7]:
dx_num = num_gradient_array(lambda x:batch_normalization_forward(x,gamma,beta,params)[0],x,dOut)
dgamma_num = num_gradient_array(lambda gamma:batch_normalization_forward(x,gamma,beta,params)[0],gamma,dOut)
dbeta_num = num_gradient_array(lambda beta:batch_normalization_forward(x,gamma,beta,params)[0],beta,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dgamma : "+str(rel_error(dgamma,dgamma_num)))
print("dbeta : "+str(rel_error(dbeta,dbeta_num)))

dx : 4.01617890857e-08
dgamma : 2.58860948323e-10
dbeta : 1.30173303136e-12


## Spatial Batch Normalization

In [8]:
x = np.random.rand(20,3,10,10)
dOut = np.random.rand(20,3,10,10)
gamma = np.random.rand(3)
beta = np.random.rand(3)
params = {}
params["mode"]="train"
params["running_mean"]=0.5
params["running_var"] = 0.04
params["momentum"]=0.9
params["eps"]=1e-8
f,cache= spatial_batch_forward(x,gamma,beta,params)
dx,dgamma,dbeta = spatial_batch_backward(dOut,cache)

dx_num = num_gradient_array(lambda x:spatial_batch_forward(x,gamma,beta,params)[0],x,dOut)
dgamma_num = num_gradient_array(lambda gamma:spatial_batch_forward(x,gamma,beta,params)[0],gamma,dOut)
dbeta_num = num_gradient_array(lambda beta:spatial_batch_forward(x,gamma,beta,params)[0],beta,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dgamma : "+str(rel_error(dgamma,dgamma_num)))
print("dbeta : "+str(rel_error(dbeta,dbeta_num)))

dx : 8.55472644418e-05
dgamma : 1.5767112162e-12
dbeta : 1.43200075285e-12


# Convolution Layers

## Pooling Gradient Checking 

In [9]:
x = np.random.rand(5,6,8,8)
dOut= np.random.rand(5,6,4,4)
pooling_params={}
pooling_params['pooling_height'] = 2
pooling_params['pooling_width'] = 2
pooling_params['pooling_stride_height'] = 2
pooling_params['pooling_stride_width'] = 2

f,cache = max_pooling_forward(x,pooling_params)
dx = max_pooling_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:max_pooling_forward(x,pooling_params)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.0008601242e-13


## Fast Convolution Forward Checking

In [10]:
x = np.random.rand(5,3,20,20)
w = np.random.rand(10,3,3,3)
b = np.random.rand(10)
dOut = np.random.rand(5,10,18,18)
S = 1
params = {}
params['stride']=S
f,cache = convolve_forward_naive(x,w,b,params)
f2,cache2 = convolve_forward_fast(x,w,b,params)
print("f : "+str(rel_error(f,f2)))

f : 3.05930980827e-15


## Fast Convolution Gradient Checking

In [11]:
x = np.random.rand(5,3,20,20)
w = np.random.rand(10,3,3,3)
b = np.random.rand(10)
dOut = np.random.rand(5,10,18,18)
S = 1
params = {}
params['stride']=S
f,cache = convolve_forward_fast(x,w,b,params)
dx,dw,db = convolve_backward_fast(dOut,cache)
dx_num = num_gradient_array(lambda x:convolve_forward_fast(x,w,b,params)[0],x,dOut)
dw_num = num_gradient_array(lambda w:convolve_forward_fast(x,w,b,params)[0],w,dOut)
db_num = num_gradient_array(lambda b:convolve_forward_fast(x,w,b,params)[0],b,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dw : "+str(rel_error(dw,dw_num)))
print("db : "+str(rel_error(db,db_num)))

dx : 8.47344760684e-09
dw : 4.20574816475e-11
db : 1.64471230066e-11


# Other Layers

## Affine Gradient Checking

In [12]:
x = np.random.rand(2,4)
w = np.random.rand(4,8)
b = np.random.rand(8)
dOut = np.random.rand(2,8)
out,cache = affine_forward(x,w,b)
dx,dw,db = affine_backward(dOut,cache)

In [13]:
dx_num = num_gradient_array(lambda x:affine_forward(x,w,b)[0],x,dOut)
dw_num = num_gradient_array(lambda w:affine_forward(x,w,b)[0],w,dOut)
db_num = num_gradient_array(lambda b:affine_forward(x,w,b)[0],b,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dw : "+str(rel_error(dw,dw_num)))
print("db : "+str(rel_error(db,db_num)))

dx : 7.31601974619e-12
dw : 2.96556886977e-11
db : 7.82673450721e-12


## Flatten Gradient Checking 

In [14]:
x = np.random.rand(5,2,6,7,8)
dOut= np.random.rand(5,2*6*7*8)

f,cache = flatten_forward(x)
dx = flatten_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:flatten_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.0008243458e-13


# Loss Layers 

## Softmax Loss

In [15]:
x = np.random.rand(20,10)
y = np.random.randint(0,10,20)
loss,dx = softmax_loss(x,y)
print("Loss: "+str(loss))
print("dx: "+str(dx.shape))

Loss: 2.35281598595
dx: (20, 10)


## SVM Loss

In [27]:
#x = np.random.rand(20,10)
x = np.random.normal(0.1,0.08,(20,10))
y = np.random.randint(0,10,20)
loss, dx = svm_loss(x,y)
print("Loss: "+str(loss))
print(dx)

Loss: 0.929020463126
[[ 0.05  0.05  0.05 -0.4   0.    0.05  0.05  0.05  0.05  0.05]
 [ 0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05 -0.45  0.05]
 [ 0.05 -0.45  0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05]
 [ 0.    0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05 -0.4 ]
 [ 0.    0.05  0.05  0.05 -0.4   0.05  0.05  0.05  0.05  0.05]
 [ 0.05  0.05  0.    0.05  0.05  0.05 -0.4   0.05  0.05  0.05]
 [ 0.05 -0.45  0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05]
 [-0.45  0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05]
 [ 0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.   -0.4   0.05]
 [-0.45  0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05]
 [-0.4   0.05  0.05  0.05  0.05  0.05  0.    0.05  0.05  0.05]
 [ 0.05  0.05  0.05  0.05 -0.4   0.05  0.05  0.    0.05  0.05]
 [ 0.05  0.05  0.05  0.05 -0.45  0.05  0.05  0.05  0.05  0.05]
 [-0.45  0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05  0.05]
 [ 0.05  0.05  0.05  0.05  0.05  0.05  0.05 -0.45  0.05  0.05]
 [ 0.05  0.05  0.05  0.05 -0.45  0