# FSMN Gradient Check

In [1]:
# Please use python 2.7
# This notebook is based on cs231n/2016winter/assignment3
# A bit of setup

import time
import numpy as np

from gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from rnn_layers import *

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

# Compact vFSMN

In [2]:
from fsmn import *

T, D, N = 15, 5, 4
# fake inputs
hidden = np.linspace(1, 75, num=T*D).reshape(T, D)
filter = np.linspace(0.1, 2.0, num=N*D).reshape(N, D)
position = np.array([0,1,2,3,4,5,6,7,0,1,2,3,4,0,1])
print "hidden\n", hidden
print "filter\n", filter
print "position\n", position

# forward
# also need to check forward implementation
memory = compact_vfsmn_memory_forward(hidden, filter, position)
print "memory\n", memory

# backward
# fake output diff
dmemory = np.linspace(75, 1, num=T*D).reshape(T, D)
print "dmemory\n", dmemory

dhidden, dfilter = compact_vfsmn_memory_backward(dmemory, hidden, filter, position)
print "dhidden\n", dhidden
print "dfilter\n", dfilter

# gradient check
fhidden = lambda h: compact_vfsmn_memory_forward(h, filter, position)
ffilter = lambda f: compact_vfsmn_memory_forward(hidden, f, position)

dhidden_num = eval_numerical_gradient_array(fhidden, hidden, dmemory)
dfilter_num = eval_numerical_gradient_array(ffilter, filter, dmemory)

print 'dhidden error: ', rel_error(dhidden_num, dhidden)
print 'dfilter error: ', rel_error(dfilter_num, dfilter)

# Just want to know what rel_error(x, y) do
# rel_error(x, y): np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
# You can also comment Line:46 in fsmn.py (dhidden[r, c] += dmemory[r, c])
# to see the effect of wrong backward implementation
#print "dhidden_num\n", dhidden_num
#print "dhidden\n", dhidden
#print np.abs(dhidden_num - dhidden)
#print np.maximum(1e-8, np.abs(dhidden_num) + np.abs(dhidden))
#print np.abs(dhidden_num - dhidden) / np.maximum(1e-8, np.abs(dhidden_num) + np.abs(dhidden))
#print np.max(np.abs(dhidden_num - dhidden) / np.maximum(1e-8, np.abs(dhidden_num) + np.abs(dhidden)))

hidden
[[  1.   2.   3.   4.   5.]
 [  6.   7.   8.   9.  10.]
 [ 11.  12.  13.  14.  15.]
 [ 16.  17.  18.  19.  20.]
 [ 21.  22.  23.  24.  25.]
 [ 26.  27.  28.  29.  30.]
 [ 31.  32.  33.  34.  35.]
 [ 36.  37.  38.  39.  40.]
 [ 41.  42.  43.  44.  45.]
 [ 46.  47.  48.  49.  50.]
 [ 51.  52.  53.  54.  55.]
 [ 56.  57.  58.  59.  60.]
 [ 61.  62.  63.  64.  65.]
 [ 66.  67.  68.  69.  70.]
 [ 71.  72.  73.  74.  75.]]
filter
[[ 0.1  0.2  0.3  0.4  0.5]
 [ 0.6  0.7  0.8  0.9  1. ]
 [ 1.1  1.2  1.3  1.4  1.5]
 [ 1.6  1.7  1.8  1.9  2. ]]
position
[0 1 2 3 4 5 6 7 0 1 2 3 4 0 1]
memory
[[   1.     2.     3.     4.     5. ]
 [   6.1    7.4    8.9   10.6   12.5]
 [  12.2   14.8   17.8   21.2   25. ]
 [  21.8   26.7   32.2   38.3   45. ]
 [  37.4   45.6   54.6   64.4   75. ]
 [  59.4   69.6   80.6   92.4  105. ]
 [  81.4   93.6  106.6  120.4  135. ]
 [ 103.4  117.6  132.6  148.4  165. ]
 [  41.    42.    43.    44.    45. ]
 [  50.1   55.4   60.9   66.6   72.5]
 [  80.2   90.8  101.8  

# Bidirectional Compact vFSMN

In [3]:
from fsmn import *

T, D, N1, N2 = 15, 5, 4, 3
# fake inputs
hidden = np.linspace(1, 75, num=T*D).reshape(T, D)
bfilter = np.linspace(0.1, 2.5, num=(N1+1)*D).reshape(N1+1, D)
ffilter = np.linspace(3.0, 4.5, num=N2*D).reshape(N2, D)
bposition = np.array([0,1,2,3,4,5,6,7,0,1,2,3,4,0,1])
fposition = np.array([7,6,5,4,3,2,1,0,4,3,2,1,0,1,0])
print "hidden\n", hidden
print "bfilter\n", bfilter
print "bposition\n", bposition
print "ffilter\n", ffilter
print "fposition\n", fposition

# forward
# also need to check forward implementation
memory = bi_compact_vfsmn_memory_forward(hidden, bfilter, ffilter, bposition, fposition)
print "memory\n", memory

# backward
# fake output diff
dmemory = np.linspace(75, 1, num=T*D).reshape(T, D)
print "dmemory\n", dmemory

dhidden, dbfilter, dffilter = bi_compact_vfsmn_memory_backward(dmemory, hidden, bfilter, ffilter, bposition, fposition)
print "dhidden\n", dhidden
print "dbfilter\n", dbfilter
print "dffilter\n", dffilter

# gradient check
fhidden = lambda h: bi_compact_vfsmn_memory_forward(h, bfilter, ffilter, bposition, fposition)
fbfilter = lambda bf: bi_compact_vfsmn_memory_forward(hidden, bf, ffilter, bposition, fposition)
fffilter = lambda ff: bi_compact_vfsmn_memory_forward(hidden, bfilter, ff, bposition, fposition)

dhidden_num = eval_numerical_gradient_array(fhidden, hidden, dmemory)
dbfilter_num = eval_numerical_gradient_array(fbfilter, bfilter, dmemory)
dffilter_num = eval_numerical_gradient_array(fffilter, ffilter, dmemory)

print 'dhidden error: ', rel_error(dhidden_num, dhidden)
print 'dbfilter error: ', rel_error(dbfilter_num, dbfilter)
print 'dffilter error: ', rel_error(dffilter_num, dffilter)

hidden
[[  1.   2.   3.   4.   5.]
 [  6.   7.   8.   9.  10.]
 [ 11.  12.  13.  14.  15.]
 [ 16.  17.  18.  19.  20.]
 [ 21.  22.  23.  24.  25.]
 [ 26.  27.  28.  29.  30.]
 [ 31.  32.  33.  34.  35.]
 [ 36.  37.  38.  39.  40.]
 [ 41.  42.  43.  44.  45.]
 [ 46.  47.  48.  49.  50.]
 [ 51.  52.  53.  54.  55.]
 [ 56.  57.  58.  59.  60.]
 [ 61.  62.  63.  64.  65.]
 [ 66.  67.  68.  69.  70.]
 [ 71.  72.  73.  74.  75.]]
bfilter
[[ 0.1  0.2  0.3  0.4  0.5]
 [ 0.6  0.7  0.8  0.9  1. ]
 [ 1.1  1.2  1.3  1.4  1.5]
 [ 1.6  1.7  1.8  1.9  2. ]
 [ 2.1  2.2  2.3  2.4  2.5]]
bposition
[0 1 2 3 4 5 6 7 0 1 2 3 4 0 1]
ffilter
[[ 3.          3.10714286  3.21428571  3.32142857  3.42857143]
 [ 3.53571429  3.64285714  3.75        3.85714286  3.96428571]
 [ 4.07142857  4.17857143  4.28571429  4.39285714  4.5       ]]
fposition
[7 6 5 4 3 2 1 0 4 3 2 1 0 1 0]
memory
[[ 123.13571429  138.9         155.50714286  172.95714286  191.25      ]
 [ 182.27142857  200.94285714  220.65714286  241.41428571  26