In [2]:
from layers.util.layer import *
from layers.util.util import *
from layers.util.activations import *
from layers.util.normalization import *
from layers.util.convolution import *
from layers.util.loss import *
import numpy as np
%load_ext autoreload
%autoreload 2
import time

# Activation Layers 

## Relu Gradient Checking

In [3]:
x = np.random.rand(3,5,5)
dOut = np.random.rand(3,5,5)

f,cache = relu_forward(x)
dx = relu_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:relu_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.00080447224e-13


## Sigmoid Gradient Checking

In [4]:
x = np.random.rand(5,8)
dOut = np.random.rand(5,8)

f,cache = sigmoid_forward(x)
dx = sigmoid_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:sigmoid_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 1.92181772144e-11


## Tanh Gradient Checking

In [5]:
x = np.random.rand(12,89)
dOut = np.random.rand(12,89)

f,cache = tanh_forward(x)
dx = tanh_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:tanh_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 2.91515393342e-11


## Leaky Relu Checking 

In [6]:
x = np.random.normal(0,1,(5,16))
dOut = np.random.normal(0,2,(5,16))

f,cache = leaky_relu_forward(x)
dx = leaky_relu_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:leaky_relu_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 6.43887848045e-12


# Normalization Layers 

## Batch Normalization 

In [8]:
x = np.random.rand(25,42)
gamma = np.random.rand(42)
beta = np.random.rand(42)
dOut = np.random.rand(25,42)
params = {}
params["mode"]="train"
params["running_mean"]=0.5
params["running_var"] = 0.04
params["momentum"]=0.9
params["eps"]=1e-8
out,cache = batch_normalization_forward(x,gamma,beta,params)
dx,dgamma,dbeta = batch_normalization_backward(dOut,cache)

In [9]:
dx_num = num_gradient_array(lambda x:batch_normalization_forward(x,gamma,beta,params)[0],x,dOut)
dgamma_num = num_gradient_array(lambda gamma:batch_normalization_forward(x,gamma,beta,params)[0],gamma,dOut)
dbeta_num = num_gradient_array(lambda beta:batch_normalization_forward(x,gamma,beta,params)[0],beta,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dgamma : "+str(rel_error(dgamma,dgamma_num)))
print("dbeta : "+str(rel_error(dbeta,dbeta_num)))

dx : 1.27363701366e-07
dgamma : 5.96461613783e-11
dbeta : 1.93695176604e-12


## Spatial Batch Normalization

In [11]:
x = np.random.rand(20,3,10,10)
dOut = np.random.rand(20,3,10,10)
gamma = np.random.rand(3)
beta = np.random.rand(3)
params = {}
params["mode"]="train"
params["running_mean"]=0.5
params["running_var"] = 0.04
params["momentum"]=0.9
params["eps"]=1e-8
f,cache= spatial_batch_forward(x,gamma,beta,params)
dx,dgamma,dbeta = spatial_batch_backward(dOut,cache)

dx_num = num_gradient_array(lambda x:spatial_batch_forward(x,gamma,beta,params)[0],x,dOut)
dgamma_num = num_gradient_array(lambda gamma:spatial_batch_forward(x,gamma,beta,params)[0],gamma,dOut)
dbeta_num = num_gradient_array(lambda beta:spatial_batch_forward(x,gamma,beta,params)[0],beta,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dgamma : "+str(rel_error(dgamma,dgamma_num)))
print("dbeta : "+str(rel_error(dbeta,dbeta_num)))

dx : 2.20348552171e-05
dgamma : 8.46746396696e-12
dbeta : 7.77020220547e-13


# Convolution Layers

## Pooling Gradient Checking 

In [13]:
x = np.random.rand(5,6,8,8)
dOut= np.random.rand(5,6,4,4)
pooling_params={}
pooling_params['pooling_height'] = 2
pooling_params['pooling_width'] = 2
pooling_params['pooling_stride_height'] = 2
pooling_params['pooling_stride_width'] = 2

f,cache = max_pooling_forward(x,pooling_params)
dx = max_pooling_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:max_pooling_forward(x,pooling_params)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.0007987267e-13


## Fast Convolution Forward Checking

In [14]:
x = np.random.rand(5,3,20,20)
w = np.random.rand(10,3,3,3)
b = np.random.rand(10)
dOut = np.random.rand(5,10,18,18)
S = 1
params = {}
params['stride']=S
f,cache = convolve_forward_naive(x,w,b,params)
f2,cache2 = convolve_forward_fast(x,w,b,params)
print("f : "+str(rel_error(f,f2)))

f : 2.83041586081e-15


## Fast Convolution Gradient Checking

In [15]:
x = np.random.rand(5,3,20,20)
w = np.random.rand(10,3,3,3)
b = np.random.rand(10)
dOut = np.random.rand(5,10,18,18)
S = 1
params = {}
params['stride']=S
f,cache = convolve_forward_fast(x,w,b,params)
dx,dw,db = convolve_backward_fast(dOut,cache)
dx_num = num_gradient_array(lambda x:convolve_forward_fast(x,w,b,params)[0],x,dOut)
dw_num = num_gradient_array(lambda w:convolve_forward_fast(x,w,b,params)[0],w,dOut)
db_num = num_gradient_array(lambda b:convolve_forward_fast(x,w,b,params)[0],b,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dw : "+str(rel_error(dw,dw_num)))
print("db : "+str(rel_error(db,db_num)))

dx : 2.30624570369e-08
dw : 4.71954228593e-11
db : 5.73950193626e-12


# Other Layers

## Affine Gradient Checking

In [16]:
x = np.random.rand(2,4)
w = np.random.rand(4,8)
b = np.random.rand(8)
dOut = np.random.rand(2,8)
out,cache = affine_forward(x,w,b)
dx,dw,db = affine_backward(dOut,cache)

In [17]:
dx_num = num_gradient_array(lambda x:affine_forward(x,w,b)[0],x,dOut)
dw_num = num_gradient_array(lambda w:affine_forward(x,w,b)[0],w,dOut)
db_num = num_gradient_array(lambda b:affine_forward(x,w,b)[0],b,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dw : "+str(rel_error(dw,dw_num)))
print("db : "+str(rel_error(db,db_num)))

dx : 5.87193401642e-12
dw : 1.08361476362e-11
db : 7.82655511997e-12


## Flatten Gradient Checking 

In [18]:
x = np.random.rand(5,2,6,7,8)
dOut= np.random.rand(5,2*6*7*8)

f,cache = flatten_forward(x)
dx = flatten_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:flatten_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.00084761089e-13


# Loss Layers 

## Softmax Loss

In [20]:
x = np.random.rand(20,10)
y = np.random.randint(0,10,20)
score,loss,dx = softmax_loss(x,y)
print("Loss: "+str(loss))

Loss: 2.35266623821


## SVM Loss

In [29]:
#x = np.random.rand(20,10)
x = np.random.normal(0.1,0.08,(20,10))
y = np.random.randint(0,10,20)
score,loss, dx = svm_loss(x,y)
print("Loss: "+str(loss))

Loss: 0.892562854184


## MSE Loss

In [8]:
x = np.random.normal(0.1,0.5,(20,1))
y = np.random.normal(0.1,0.5,(20,))
scores, loss, dx = mse_loss(x,y)
print("Loss: "+str(loss))

Loss: 0.295085120032


## Cross Entropy Loss 

In [23]:
x = np.random.rand(5,8)
y = np.random.randint(0,8,(5,))
print(x)
print(y)
scores, loss, dx = cross_entropy_loss(x,y)
print("Loss: "+str(loss))

[[ 0.88704817  0.19643111  0.20485684  0.2904094   0.3297383   0.04283371
   0.94033273  0.59073047]
 [ 0.40004043  0.10574845  0.19676384  0.00832277  0.46893858  0.13855871
   0.99356407  0.25707469]
 [ 0.56536097  0.55071463  0.32031901  0.4224408   0.36162933  0.77817025
   0.37222159  0.96715205]
 [ 0.10968707  0.60995422  0.34979785  0.12296851  0.38727541  0.78988073
   0.2532519   0.02238632]
 [ 0.34855846  0.61668811  0.98082837  0.73424887  0.86398428  0.29175576
   0.13983054  0.08472162]]
[[0 0 1 0 0 1 0 1]
 [1 0 0 0 0 1 1 0]
 [1 1 0 0 0 1 1 1]
 [0 0 0 0 1 0 0 1]
 [1 1 0 0 0 1 1 0]]
Loss: 0.494179989197
