In [1]:
import theano
import theano.tensor as T
import numpy as np
import sys
from itertools import izip
import time

# Min/max sequence length
MIN_LENGTH = 50
MAX_LENGTH = 55
# Number of units in the hidden (recurrent) layer
N_HIDDEN = 100
# input
N_INPUT = 2
# output
N_OUTPUT = 1

In [2]:
length = 55
x_seq = np.concatenate([np.random.uniform(size=(length, 1)),
                        np.zeros((length, 1))],
                       axis=-1)
x_seq.shape

(55, 2)

In [3]:
# from https://github.com/Lasagne/Lasagne/blob/master/examples/recurrent.py
def gen_data(min_length=MIN_LENGTH, max_length=MAX_LENGTH):
    '''
    Generate a sequences for the "add" task, e.g. the target for the
    following
    ``| 0.5 | 0.7 | 0.3 | 0.1 | 0.2 | ... | 0.5 | 0.9 | ... | 0.8 | 0.2 |
      |  0  |  0  |  1  |  0  |  0  |     |  0  |  1  |     |  0  |  0  |``
    would be 0.3 + .9 = 1.2.  This task was proposed in [1]_ and explored in
    e.g. [2]_.
    Parameters
    ----------
    min_length : int
        Minimum sequence length.
    max_length : int
        Maximum sequence length.
    Returns
    -------
    X : np.ndarray
        Input to the network, of shape (length, 2), where the last
        dimension corresponds to the two sequences shown above.
    y : np.ndarray
        Correct output for each sample (a scalar).
    '''
    # Generate x_seq
    length = np.random.randint(min_length, max_length)
    x_seq = np.concatenate([np.random.uniform(size=(length, 1)),
                        np.zeros((length, 1))],
                       axis=-1)
    # Set the second dimension to 1 at the indices to add
    x_seq[np.random.randint(length/10), 1] = 1
    x_seq[np.random.randint(length/2, length), 1] = 1
    # Multiply and sum the dimensions of x_seq to get the target value
    y_hat = np.sum(x_seq[:, 0]*x_seq[:, 1])
    return x_seq,y_hat

In [4]:
    

# what can we get from gen_data()
x_seq ,y_hat = gen_data()

<img width='400px' src='./rnn.png' />
<img width='400px' src='./rnn-step.png' />


In [5]:
x_seq = T.matrix('input')
y_hat = T.scalar('target')

Wi = theano.shared( np.random.randn(N_INPUT,N_HIDDEN) )
bh = theano.shared( np.zeros(N_HIDDEN) )
Wo = theano.shared( np.random.randn(N_HIDDEN,N_OUTPUT) )
bo = theano.shared( np.zeros(N_OUTPUT) )
Wh = theano.shared( np.random.randn(N_HIDDEN,N_HIDDEN) )
parameters = [Wi,bh,Wo,bo,Wh]

def sigmoid(z):
        return 1/(1+T.exp(-z))

def step(x_t,a_tm1,y_tm1):
        a_t = sigmoid( T.dot(x_t,Wi) \
                + T.dot(a_tm1,Wh) + bh )
        y_t = T.dot(a_t,Wo) + bo
        return a_t, y_t

a_0 = theano.shared(np.zeros(N_HIDDEN))
y_0 = theano.shared(np.zeros(N_OUTPUT))

[a_seq,y_seq],_ = theano.scan(
                        step,
                        sequences = x_seq,
                        outputs_info = [ a_0, y_0 ],
			truncate_gradient=-1
                )

y_seq_last = y_seq[-1][0] # we only care about the last output 
cost = T.sum( ( y_seq_last - y_hat )**2 ) 

gradients = T.grad(cost,parameters)

In [6]:
def MyUpdate(parameters,gradients):
	mu =  np.float32(0.001)
	parameters_updates = [(p,p - mu * g) for p,g in izip(parameters,gradients) ] 
	return parameters_updates

rnn_test = theano.function(
        inputs= [x_seq],
        outputs=y_seq_last
)

rnn_train = theano.function(
        inputs=[x_seq,y_hat],
        outputs=cost,
	updates=MyUpdate(parameters,gradients)
)

  from scan_perform.scan_perform import *


In [7]:
for i in range(10000000):
    x_seq, y_hat = gen_data()
    if i % 100000 == 0:
        print "iteration:", i, "cost:",  rnn_train(x_seq,y_hat)

for i in range(10):
	x_seq, y_hat = gen_data()
	print "reference", y_hat, "RNN output:", rnn_test(x_seq)

iteration: 0 cost: 1.87286049796
iteration: 100000 cost: 0.0389489752921
iteration: 200000 cost: 0.767482507614
iteration: 300000 cost: 0.656512680759
iteration: 400000 cost: 1.03579989708
iteration: 500000 cost: 0.00297750277748
iteration: 600000 cost: 0.445865545077
iteration: 700000 cost: 2.08940234897
iteration: 800000 cost: 1.66911791592
iteration: 900000 cost: 0.126490677392
iteration: 1000000 cost: 1.66585286845
iteration: 1100000 cost: 0.625987674831
iteration: 1200000 cost: 0.221164682048
iteration: 1300000 cost: 0.00148957367261
iteration: 1400000 cost: 0.433307887547
iteration: 1500000 cost: 0.149722065831
iteration: 1600000 cost: 0.73590279033
iteration: 1700000 cost: 0.00307003183073
iteration: 1800000 cost: 0.981170515097
iteration: 1900000 cost: 0.147502216936
iteration: 2000000 cost: 0.00174814090118
iteration: 2100000 cost: 0.376269839506
iteration: 2200000 cost: 1.73209036336
iteration: 2300000 cost: 0.698531307932
iteration: 2400000 cost: 1.14444918992
iteration: 250