In [1]:
from layers.util.layer import *
from layers.util.util import *
from layers.util.activations import *
from layers.util.normalization import *
from layers.util.convolution import *
from layers.util.sequential import *
from layers.util.loss import *
import numpy as np
%load_ext autoreload
%autoreload 2
import time

# Activation Layers 

## Relu Gradient Checking

In [2]:
x = np.random.rand(3,5,5)
dOut = np.random.rand(3,5,5)

f,cache = relu_forward(x)
dx = relu_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:relu_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.00075339763e-13


## Sigmoid Gradient Checking

In [4]:
x = np.random.rand(5,8)
dOut = np.random.rand(5,8)

f,cache = sigmoid_forward(x)
dx = sigmoid_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:sigmoid_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 1.92181772144e-11


## Tanh Gradient Checking

In [5]:
x = np.random.rand(12,89)
dOut = np.random.rand(12,89)

f,cache = tanh_forward(x)
dx = tanh_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:tanh_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 2.91515393342e-11


## Leaky Relu Checking 

In [6]:
x = np.random.normal(0,1,(5,16))
dOut = np.random.normal(0,2,(5,16))

f,cache = leaky_relu_forward(x)
dx = leaky_relu_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:leaky_relu_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 6.43887848045e-12


# Normalization Layers 

## Batch Normalization 

In [8]:
x = np.random.rand(25,42)
gamma = np.random.rand(42)
beta = np.random.rand(42)
dOut = np.random.rand(25,42)
params = {}
params["mode"]="train"
params["running_mean"]=0.5
params["running_var"] = 0.04
params["momentum"]=0.9
params["eps"]=1e-8
out,cache = batch_normalization_forward(x,gamma,beta,params)
dx,dgamma,dbeta = batch_normalization_backward(dOut,cache)

In [9]:
dx_num = num_gradient_array(lambda x:batch_normalization_forward(x,gamma,beta,params)[0],x,dOut)
dgamma_num = num_gradient_array(lambda gamma:batch_normalization_forward(x,gamma,beta,params)[0],gamma,dOut)
dbeta_num = num_gradient_array(lambda beta:batch_normalization_forward(x,gamma,beta,params)[0],beta,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dgamma : "+str(rel_error(dgamma,dgamma_num)))
print("dbeta : "+str(rel_error(dbeta,dbeta_num)))

dx : 1.27363701366e-07
dgamma : 5.96461613783e-11
dbeta : 1.93695176604e-12


## Spatial Batch Normalization

In [11]:
x = np.random.rand(20,3,10,10)
dOut = np.random.rand(20,3,10,10)
gamma = np.random.rand(3)
beta = np.random.rand(3)
params = {}
params["mode"]="train"
params["running_mean"]=0.5
params["running_var"] = 0.04
params["momentum"]=0.9
params["eps"]=1e-8
f,cache= spatial_batch_forward(x,gamma,beta,params)
dx,dgamma,dbeta = spatial_batch_backward(dOut,cache)

dx_num = num_gradient_array(lambda x:spatial_batch_forward(x,gamma,beta,params)[0],x,dOut)
dgamma_num = num_gradient_array(lambda gamma:spatial_batch_forward(x,gamma,beta,params)[0],gamma,dOut)
dbeta_num = num_gradient_array(lambda beta:spatial_batch_forward(x,gamma,beta,params)[0],beta,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dgamma : "+str(rel_error(dgamma,dgamma_num)))
print("dbeta : "+str(rel_error(dbeta,dbeta_num)))

dx : 2.20348552171e-05
dgamma : 8.46746396696e-12
dbeta : 7.77020220547e-13


# Convolution Layers

## Pooling Gradient Checking 

In [13]:
x = np.random.rand(5,6,8,8)
dOut= np.random.rand(5,6,4,4)
pooling_params={}
pooling_params['pooling_height'] = 2
pooling_params['pooling_width'] = 2
pooling_params['pooling_stride_height'] = 2
pooling_params['pooling_stride_width'] = 2

f,cache = max_pooling_forward(x,pooling_params)
dx = max_pooling_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:max_pooling_forward(x,pooling_params)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.0007987267e-13


## Fast Convolution Forward Checking

In [14]:
x = np.random.rand(5,3,20,20)
w = np.random.rand(10,3,3,3)
b = np.random.rand(10)
dOut = np.random.rand(5,10,18,18)
S = 1
params = {}
params['stride']=S
f,cache = convolve_forward_naive(x,w,b,params)
f2,cache2 = convolve_forward_fast(x,w,b,params)
print("f : "+str(rel_error(f,f2)))

f : 2.83041586081e-15


## Fast Convolution Gradient Checking

In [15]:
x = np.random.rand(5,3,20,20)
w = np.random.rand(10,3,3,3)
b = np.random.rand(10)
dOut = np.random.rand(5,10,18,18)
S = 1
params = {}
params['stride']=S
f,cache = convolve_forward_fast(x,w,b,params)
dx,dw,db = convolve_backward_fast(dOut,cache)
dx_num = num_gradient_array(lambda x:convolve_forward_fast(x,w,b,params)[0],x,dOut)
dw_num = num_gradient_array(lambda w:convolve_forward_fast(x,w,b,params)[0],w,dOut)
db_num = num_gradient_array(lambda b:convolve_forward_fast(x,w,b,params)[0],b,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dw : "+str(rel_error(dw,dw_num)))
print("db : "+str(rel_error(db,db_num)))

dx : 2.30624570369e-08
dw : 4.71954228593e-11
db : 5.73950193626e-12


# Recurrent Neural Network Layers

## Vanilla RNN Gradient Checking 

In [2]:
N, D, H = 3, 10, 4

x = np.linspace(-0.4, 0.7, num=N*D).reshape(N, D)
prev_h = np.linspace(-0.2, 0.5, num=N*H).reshape(N, H)
Wx = np.linspace(-0.1, 0.9, num=D*H).reshape(D, H)
Wh = np.linspace(-0.3, 0.7, num=H*H).reshape(H, H)
b = np.linspace(-0.2, 0.4, num=H)

next_h, cache = rnn_step(x, prev_h, Wx, Wh, b)
expected_next_h = np.asarray([
  [-0.58172089, -0.50182032, -0.41232771, -0.31410098],
  [ 0.66854692,  0.79562378,  0.87755553,  0.92795967],
  [ 0.97934501,  0.99144213,  0.99646691,  0.99854353]])

print('next_h error: '+str(rel_error(expected_next_h, next_h)))

next_h error: 6.29242142647e-09


In [79]:
np.random.seed(231)
N, D, H = 4, 5, 8
x = np.random.randn(N, D)
h = np.random.randn(N, H)
Wx = np.random.randn(D, H)
Wh = np.random.randn(H, H)
b = np.random.randn(H)

out, cache = rnn_step(x, h, Wx, Wh, b)

dnext_h = np.random.rand(*out.shape)

fx = lambda x: rnn_step(x, h, Wx, Wh, b)[0]
fh = lambda prev_h: rnn_step(x, h, Wx, Wh, b)[0]
fWx = lambda Wx: rnn_step(x, h, Wx, Wh, b)[0]
fWh = lambda Wh: rnn_step(x, h, Wx, Wh, b)[0]
fb = lambda b: rnn_step(x, h, Wx, Wh, b)[0]

dx_num = num_gradient_array(fx, x, dnext_h)
dprev_h_num = num_gradient_array(fh, h, dnext_h)
dWx_num = num_gradient_array(fWx, Wx, dnext_h)
dWh_num = num_gradient_array(fWh, Wh, dnext_h)
db_num = num_gradient_array(fb, b, dnext_h)

dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache)

print('dx error: ' +str(rel_error(dx_num, dx)))
print('dprev_h error: '+str(rel_error(dprev_h_num, dprev_h)))
print('dWx error: '+str(rel_error(dWx_num, dWx)))
print('dWh error: '+ str(rel_error(dWh_num, dWh)))
print('db error: '+str(rel_error(db_num, db)))

dx error: 1.71420924391e-10
dprev_h error: 1.91864481282e-10
dWx error: 2.27069552351e-09
dWh error: 4.19612725856e-10
db error: 3.45571597805e-11


In [3]:
N, T, D, H = 2, 3, 4, 5

x = np.linspace(-0.1, 0.3, num=N*T*D).reshape(N,T,D)
h0 = np.linspace(-0.3, 0.1, num=N*H).reshape(N, H)
Wx = np.linspace(-0.2, 0.4, num=D*H).reshape(D, H)
Wh = np.linspace(-0.4, 0.1, num=H*H).reshape(H, H)
b = np.linspace(-0.7, 0.1, num=H)

h, _ = rnn_forward(x, h0, Wx, Wh, b)
expected_h = np.asarray([
  [
    [-0.42070749, -0.27279261, -0.11074945,  0.05740409,  0.22236251],
    [-0.39525808, -0.22554661, -0.0409454,   0.14649412,  0.32397316],
    [-0.42305111, -0.24223728, -0.04287027,  0.15997045,  0.35014525],
  ],
  [
    [-0.55857474, -0.39065825, -0.19198182,  0.02378408,  0.23735671],
    [-0.27150199, -0.07088804,  0.13562939,  0.33099728,  0.50158768],
    [-0.51014825, -0.30524429, -0.06755202,  0.17806392,  0.40333043]]])
print('h error: '+str(rel_error(expected_h, h)))

h error: 7.72846615831e-08


In [4]:
np.random.seed(231)

N, D, T, H = 2, 3, 10, 5

x = np.random.randn(N, T, D)
h0 = np.random.randn(N, H)
Wx = np.random.randn(D, H)
Wh = np.random.randn(H, H)
b = np.random.randn(H)

out, cache = rnn_forward(x, h0, Wx, Wh, b)

dout = np.random.randn(*out.shape)

dx, dh0, dWx, dWh, db = rnn_backward(dout, cache)

fx = lambda x: rnn_forward(x, h0, Wx, Wh, b)[0]
fh0 = lambda h0: rnn_forward(x, h0, Wx, Wh, b)[0]
fWx = lambda Wx: rnn_forward(x, h0, Wx, Wh, b)[0]
fWh = lambda Wh: rnn_forward(x, h0, Wx, Wh, b)[0]
fb = lambda b: rnn_forward(x, h0, Wx, Wh, b)[0]

dx_num = num_gradient_array(fx, x, dout)
dh0_num = num_gradient_array(fh0, h0, dout)
dWx_num = num_gradient_array(fWx, Wx, dout)
dWh_num = num_gradient_array(fWh, Wh, dout)
db_num = num_gradient_array(fb, b, dout)

print('dx error: '+ str(rel_error(dx_num, dx)))
print('dh0 error: '+ str(rel_error(dh0_num, dh0)))
print('dWx error: '+ str(rel_error(dWx_num, dWx)))
print('dWh error: '+ str(rel_error(dWh_num, dWh)))
print('db error: '+ str(rel_error(db_num, db)))

dx error: 1.54084107655e-09
dh0 error: 3.38254104079e-09
dWx error: 7.0801428247e-09
dWh error: 1.30334519893e-07
db error: 1.79334265024e-10


# Other Layers

## Affine Gradient Checking

In [16]:
x = np.random.rand(2,4)
w = np.random.rand(4,8)
b = np.random.rand(8)
dOut = np.random.rand(2,8)
out,cache = affine_forward(x,w,b)
dx,dw,db = affine_backward(dOut,cache)

In [17]:
dx_num = num_gradient_array(lambda x:affine_forward(x,w,b)[0],x,dOut)
dw_num = num_gradient_array(lambda w:affine_forward(x,w,b)[0],w,dOut)
db_num = num_gradient_array(lambda b:affine_forward(x,w,b)[0],b,dOut)
print("dx : "+str(rel_error(dx,dx_num)))
print("dw : "+str(rel_error(dw,dw_num)))
print("db : "+str(rel_error(db,db_num)))

dx : 5.87193401642e-12
dw : 1.08361476362e-11
db : 7.82655511997e-12


## Flatten Gradient Checking 

In [18]:
x = np.random.rand(5,2,6,7,8)
dOut= np.random.rand(5,2*6*7*8)

f,cache = flatten_forward(x)
dx = flatten_backward(dOut,cache)
dx_num = num_gradient_array(lambda x:flatten_forward(x)[0],x,dOut)
print("dx : "+str(rel_error(dx,dx_num)))

dx : 5.00084761089e-13


# Loss Layers 

## Softmax Loss

In [20]:
x = np.random.rand(20,10)
y = np.random.randint(0,10,20)
score,loss,dx = softmax_loss(x,y)
print("Loss: "+str(loss))

Loss: 2.35266623821


## SVM Loss

In [29]:
#x = np.random.rand(20,10)
x = np.random.normal(0.1,0.08,(20,10))
y = np.random.randint(0,10,20)
score,loss, dx = svm_loss(x,y)
print("Loss: "+str(loss))

Loss: 0.892562854184


## MSE Loss

In [8]:
x = np.random.normal(0.1,0.5,(20,1))
y = np.random.normal(0.1,0.5,(20,))
scores, loss, dx = mse_loss(x,y)
print("Loss: "+str(loss))

Loss: 0.295085120032


## Cross Entropy Loss 

In [104]:
x = np.random.rand(5,1)
y = np.random.randint(0,2,(5,1))
scores, loss, dx = cross_entropy_loss(x,y)
print("Loss: "+str(loss))

Loss: 0.416874593639


In [138]:
import network

In [149]:
a = network.network(input_shape=(64,50),update_params={'alpha':1e-3,'epoch':1},initialization="xavier2")

In [150]:
#a.add("batch_normalization",batch_params={'mode':'train','momentum':0.9,'eps':1e-7})
a.add("sigmoid")
a.add("cross_entropy")

In [151]:
x = np.random.rand(64,50)
y = np.random.randint(0,2,(64,50))
#print(y)
#print(x)
a.train(x,y)

Initial Cost :18.4026225398
Initial Accuracy :0.0
Cost at Iteration 0 : 18.604074086
Accuracy at Iteration 0 : 0.0
