In [1]:
from util.layer import *
from util.util import *
from util.activations import *
from util.normalization import *
from util.convolution import *
from util.loss import *
import numpy as np
%load_ext autoreload
%autoreload 2
import time

# Activation Layers 

## Relu Gradient Checking

In [2]:
x = np.random.rand(3,5,5)
dOut = np.random.rand(3,5,5)

f,cache = relu_forward(x)
dx = relu_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:relu_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.00076869768e-13


## Sigmoid Gradient Checking

In [3]:
x = np.random.rand(5,8)
dOut = np.random.rand(5,8)

f,cache = sigmoid_forward(x)
dx = sigmoid_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:sigmoid_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 1.57243025039e-11


## Tanh Gradient Checking

In [4]:
x = np.random.rand(12,89)
dOut = np.random.rand(12,89)

f,cache = tanh_forward(x)
dx = tanh_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:tanh_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 2.50063428171e-11


## Leaky Relu Checking 

In [5]:
x = np.random.normal(0,1,(5,16))
dOut = np.random.normal(0,2,(5,16))

f,cache = leaky_relu_forward(x)
dx = leaky_relu_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:leaky_relu_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 6.43898211058e-12


# Normalization Layers 

## Batch Normalization 

In [6]:
x = np.random.rand(25,42)
gamma = np.random.rand(42)
beta = np.random.rand(42)
dOut = np.random.rand(25,42)
params = {}
params["mode"]="train"
params["running_mean"]=0.5
params["running_var"] = 0.04
params["momentum"]=0.9
params["eps"]=1e-8
out,cache = batch_normalization_forward(x,gamma,beta,params)
dx,dgamma,dbeta = batch_normalization_backward(dOut,cache)

In [7]:
dx_num = num_gradient_array(lambda x:batch_normalization_forward(x,gamma,beta,params)[0],x,dOut)
dgamma_num = num_gradient_array(lambda gamma:batch_normalization_forward(x,gamma,beta,params)[0],gamma,dOut)
dbeta_num = num_gradient_array(lambda beta:batch_normalization_forward(x,gamma,beta,params)[0],beta,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dgamma : "+str(rel_error(dgamma,dgamma_num)))
print("dbeta : "+str(rel_error(dbeta,dbeta_num)))

dx : 2.55636467322e-07
dgamma : 5.42108549502e-10
dbeta : 1.0982416371e-12


## Spatial Batch Normalization

In [18]:
x = np.random.rand(20,3,10,10)
dOut = np.random.rand(20,3,10,10)
gamma = np.random.rand(3)
beta = np.random.rand(3)
params = {}
params["mode"]="train"
params["running_mean"]=0.5
params["running_var"] = 0.04
params["momentum"]=0.9
params["eps"]=1e-8
f,cache= spatial_batch_forward(x,gamma,beta,params)
dx,dgamma,dbeta = spatial_batch_backward(dOut,cache)

dx_num = num_gradient_array(lambda x:spatial_batch_forward(x,gamma,beta,params)[0],x,dOut)
dgamma_num = num_gradient_array(lambda gamma:spatial_batch_forward(x,gamma,beta,params)[0],gamma,dOut)
dbeta_num = num_gradient_array(lambda beta:spatial_batch_forward(x,gamma,beta,params)[0],beta,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dgamma : "+str(rel_error(dgamma,dgamma_num)))
print("dbeta : "+str(rel_error(dbeta,dbeta_num)))

[[[[  7.91005927e-09   7.83100973e-09   7.86430338e-09 ...,
      7.94196248e-09   7.88505572e-09   8.06063882e-09]
   [  7.71641595e-09   7.93008942e-09   7.80468845e-09 ...,
      7.84494025e-09   8.01557382e-09   7.93915955e-09]
   [  8.03635658e-09   7.94840194e-09   7.86079535e-09 ...,
      7.63774977e-09   7.78419373e-09   7.69238206e-09]
   ..., 
   [  7.99668809e-09   7.70355268e-09   7.63714952e-09 ...,
      7.86677767e-09   7.65371566e-09   7.78259923e-09]
   [  7.95700283e-09   8.03459144e-09   7.75932318e-09 ...,
      7.98840949e-09   7.69028574e-09   7.89525223e-09]
   [  8.03464362e-09   7.89497524e-09   7.82400211e-09 ...,
      8.12419887e-09   8.01575262e-09   7.92366683e-09]]

  [[ -8.44378079e-09  -8.27424929e-09  -8.43932346e-09 ...,
     -8.32425573e-09  -8.32605507e-09  -8.34896174e-09]
   [ -8.29961433e-09  -8.08983930e-09  -8.58183613e-09 ...,
     -8.41482328e-09  -8.38864737e-09  -8.32470781e-09]
   [ -8.19554957e-09  -8.31311109e-09  -8.21948065e-09 ...,
 

# Convolution Layers

## Pooling Gradient Checking 

In [9]:
x = np.random.rand(5,6,8,8)
dOut= np.random.rand(5,6,4,4)
pooling_params={}
pooling_params['pooling_height'] = 2
pooling_params['pooling_width'] = 2
pooling_params['pooling_stride_height'] = 2
pooling_params['pooling_stride_width'] = 2

f,cache = max_pooling_forward(x,pooling_params)
dx = max_pooling_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:max_pooling_forward(x,pooling_params)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.00078748014e-13


## Fast Convolution Forward Checking

In [10]:
x = np.random.rand(5,3,20,20)
w = np.random.rand(10,3,3,3)
b = np.random.rand(10)
dOut = np.random.rand(5,10,18,18)
S = 1
params = {}
params['stride']=S
f,cache = convolve_forward_naive(x,w,b,params)
f2,cache2 = convolve_forward_fast(x,w,b,params)
print("f : "+str(rel_error(f,f2)))

f : 2.31086479282e-15


## Fast Convolution Gradient Checking

In [11]:
x = np.random.rand(5,3,20,20)
w = np.random.rand(10,3,3,3)
b = np.random.rand(10)
dOut = np.random.rand(5,10,18,18)
S = 1
params = {}
params['stride']=S
f,cache = convolve_forward_fast(x,w,b,params)
dx,dw,db = convolve_backward_fast(dOut,cache)
dx_num = num_gradient_array(lambda x:convolve_forward_fast(x,w,b,params)[0],x,dOut)
dw_num = num_gradient_array(lambda w:convolve_forward_fast(x,w,b,params)[0],w,dOut)
db_num = num_gradient_array(lambda b:convolve_forward_fast(x,w,b,params)[0],b,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dw : "+str(rel_error(dw,dw_num)))
print("db : "+str(rel_error(db,db_num)))

dx : 1.07977530657e-08
dw : 4.2551371637e-11
db : 8.81689025131e-12


# Other Layers

## Affine Gradient Checking

In [12]:
x = np.random.rand(2,4)
w = np.random.rand(4,8)
b = np.random.rand(8)
dOut = np.random.rand(2,8)
out,cache = affine_forward(x,w,b)
dx,dw,db = affine_backward(dOut,cache)

In [13]:
dx_num = num_gradient_array(lambda x:affine_forward(x,w,b)[0],x,dOut)
dw_num = num_gradient_array(lambda w:affine_forward(x,w,b)[0],w,dOut)
db_num = num_gradient_array(lambda b:affine_forward(x,w,b)[0],b,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dw : "+str(rel_error(dw,dw_num)))
print("db : "+str(rel_error(db,db_num)))

dx : 5.17014073357e-12
dw : 2.12536707039e-11
db : 6.92126751647e-12


## Flatten Gradient Checking 

In [14]:
x = np.random.rand(5,2,6,7,8)
dOut= np.random.rand(5,2*6*7*8)

f,cache = flatten_forward(x)
dx = flatten_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:flatten_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.00091295022e-13


# Loss Layers 

## Softmax Loss

In [15]:
x = np.random.rand(20,10)
y = np.random.randint(0,10,20)
loss,dx = softmax_loss(x,y)
print("Loss: "+str(loss))

Loss: 2.39773624259


## SVM Loss

In [16]:
#x = np.random.rand(20,10)
x = np.random.normal(0.1,0.08,(20,10))
y = np.random.randint(0,10,20)
loss, dx = svm_loss(x,y)
print("Loss: "+str(loss))

Loss: 0.953957544702
