In [32]:
import numpy as np
from numpy import random
from time import perf_counter

In [33]:
# sigmoid definition

In [47]:
def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))

In [36]:
random.seed(10) 
emb=128 # embedding size
T=256 # number of variables in sequences
h_dim=16 # hidden state dimension
h_0=np.zeros((h_dim,1)) # initial hidden state

In [37]:
# random initialization of weights and biases

In [38]:
w1=random.standard_normal((h_dim,emb+h_dim))
w2=random.standard_normal((h_dim,emb+h_dim))
w3=random.standard_normal((h_dim,emb+h_dim))
b1=random.standard_normal((h_dim,1))
b2=random.standard_normal((h_dim,1))
b3=random.standard_normal((h_dim,1))

X=random.standard_normal((T,emb,1))
weights=[w1,w2,w3,b1,b2,b3]

In [41]:
# implementation of forward computation for Vanilla-RNN

<img src='RNN.PNG' width="700"/>

 \begin{equation}
h^{<t>}=g(W_{h}[h^{<t-1>},x^{<t>}] + b_h)
\label{eq: htRNN}
\end{equation}
    
\begin{equation}
\hat{y}^{<t>}=g(W_{yh}h^{<t>} + b_y)
\label{eq: ytRNN}
\end{equation}

In [39]:
def forward_V_RNN(inputs,weights):
    x, h_t=inputs
    wh,_,_,bh,_,_=weights
    h_t=np.dot(wh,np.concatenate([h_t,x]))+bh
    h_t=sigmoid(h_t)
    return h_t,h_t

In [43]:
# implementation of forward computation for GRU cell

# <img src="GRU.PNG" width="700"/>

\begin{equation}
\Gamma_r=\sigma{(W_r[h^{<t-1>}, x^{<t>}]+b_r)}
\end{equation}

\begin{equation}
\Gamma_u=\sigma{(W_u[h^{<t-1>}, x^{<t>}]+b_u)}
\end{equation}

\begin{equation}
c^{<t>}=\tanh{(W_h[\Gamma_r*h^{<t-1>},x^{<t>}]+b_h)}
\end{equation}

\begin{equation}
h^{<t>}=\Gamma_u*c^{<t>}+(1-\Gamma_u)*h^{<t-1>}
\end{equation}

In [44]:
def forward_GRU(inputs,weights):
    x,h_t=inputs
    wu,wr,wc,bu,br,bc=weights
    
    # update gate
    u=np.dot(wu,np.concatenate([h_t,x]))+bu
    u=sigmoid(u)
    
    r=np.dot(wr,np.concatenate([h_t,x]))+br
    r=sigmoid(r)
    
    c=np.dot(wc,np.concatenate([r*h_t,x]))+bc
    c=np.tanh(c)
    
    h_t=u*c+(1-u)*h_t
    return h_t,h_t

In [45]:
# running forward

In [48]:
forward_GRU([X[1],h_0],weights)[0]

array([[ 9.77779014e-01],
       [-9.97986240e-01],
       [-5.19958083e-01],
       [-9.99999886e-01],
       [-9.99707004e-01],
       [-3.02197037e-04],
       [-9.58733503e-01],
       [ 2.10804828e-02],
       [ 9.77365398e-05],
       [ 9.99833090e-01],
       [ 1.63200940e-08],
       [ 8.51874303e-01],
       [ 5.21399924e-02],
       [ 2.15495959e-02],
       [ 9.99878828e-01],
       [ 9.77165472e-01]])

In [49]:
# implementation of scanning function

In [50]:
def scan(fn, elems, weights, h_0=None):
    h_t=h_0
    ys=[]
    for x in elems:
        y,h_t=fn([x,h_t],weights)
        ys.append(y)
    return ys,h_t

# Comparison between Vanilla-RNN and GRU

In [52]:
tic = perf_counter()
ys, h_T = scan(forward_V_RNN, X, weights, h_0)
toc = perf_counter()
RNN_time=(toc-tic)*1000
print (f"It took {RNN_time:.2f}ms to run the forward method for the vanilla RNN.")

It took 11.01ms to run the forward method for the vanilla RNN.


In [53]:
tic = perf_counter()
ys, h_T = scan(forward_GRU, X, weights, h_0)
toc = perf_counter()
GRU_time=(toc-tic)*1000
print (f"It took {GRU_time:.2f}ms to run the forward method for the GRU.")

It took 16.69ms to run the forward method for the GRU.
