In [1]:
import numpy as np
np.set_printoptions(precision=3)

In [2]:
def oneHot(size, item):
    o = np.zeros((size, 1))
    o[item] = 1
    return o

def softmax(y):
    return np.exp(y) / np.sum(np.exp(y))

def zeros_like(*args):
    return (np.zeros_like(a) for a in args)

def clip(*args):
    for a in args:
        np.clip(a, -5, 5, out=a) # clip to mitigate exploding gradients

In [13]:
class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.wx = np.random.randn(hidden_size, input_size) * .01
        self.wh = np.random.randn(hidden_size, hidden_size)* .01
        self.wy = np.random.randn(output_size, hidden_size)* .01
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))
        self.h = np.zeros((hidden_size, 1))
        # adagrad params
        self.mbh, self.mby, self.mwx, self.mwh, self.mwy = zeros_like(self.bh, self.by, self.wx, self.wh, self.wy)
        
    def forward(self, x, y):
        x_, y_, h_, logit_, p_, loss = {}, {}, {}, {}, {}, 0
        h_[-1] = np.copy(self.h)
        for i in range(len(x)):
            x_[i] = oneHot(self.input_size, x[i])
            y_[i] = oneHot(self.output_size, y[i])
            h_[i] = np.tanh(np.dot(self.wx, x_[i]) + np.dot(self.wh, h_[i - 1]) + self.bh)
            logit_[i] = np.dot(self.wy, h_[i]) + self.by
            p_[i] = softmax(logit_[i])
            loss += np.sum(-1 * y_[i] * np.log(p_[i]) - (1 - y_[i]) * np.log(1 - p_[i]))
            #loss += np.sum(-1 * y_[i] * np.log(p_[i]))
        self.forward_stash = x_, y_, h_, logit_, p_, loss
        
    def backward(self):
        x_, y_, h_, logit_, p_, loss = self.forward_stash
        dbh, dby, dwx, dwh, dwy, dhnext = zeros_like(self.bh, self.by, self.wx, self.wh, self.wy, self.h)
        for i in reversed(range(len(x_))):
            dy = p_[i] - y_[i]
            dby += dy            
            dwy += np.dot(dy, h_[i].T)
            dh = np.dot(self.wy.T, dy) + dhnext
            dtanh = (1 - h_[i]**2) * dh
            dbh += dtanh
            dwx += np.dot(dtanh, x_[i].T)
            dwh += np.dot(dtanh, h_[i-1].T)
            dhnext = np.dot(self.wh.T, dtanh)
        clip(dbh, dby, dwx, dwh, dwy)    
        self.backward_stash = dwx, dwh, dwy, dbh, dby
        self.h = h_[len(x_) - 1]
        return loss
    
    def grad_update(self, learning_rate):        
        dwx, dwh, dwy, dbh, dby = self.backward_stash
        for param, dparam, mem in zip([self.wx, self.wh, self.wy, self.bh, self.by], 
                                      [dwx, dwh, dwy, dbh, dby], 
                                      [self.mwx, self.mwh, self.mwy, self.mbh, self.mby]):
            mem += dparam ** 2
            param += -learning_rate * dparam / np.sqrt(mem + 1e-8)
    
    def sample(self, seed, N):
        x = oneHot(self.input_size, seed)
        res, h = [], np.copy(self.h)
        for i in range(N):
            h = np.tanh(np.dot(self.wx, x) + np.dot(self.wh, h) + self.bh)
            y = np.dot(self.wy, h) + self.by
            logit = softmax(y)
            pick = np.random.choice(range(self.input_size), p=logit.ravel())
            x = oneHot(self.input_size, pick)
            res.append(pick)
        return res  
    
    def reset_memory(self):
        self.h = np.zeros((self.hidden_size, 1))

In [18]:
def run(iterations = 2000, filename = 'input.txt', seq_length = 25, hidden_size = 100, learning_rate = 1e-1):
    with open(filename, 'r') as f:
        data = f.read()
    chars = list(set(data))
    data_size, vocab_size = len(data), len(chars)
    char_to_ix = { ch:i for i,ch in enumerate(chars) }
    ix_to_char = { i:ch for i,ch in enumerate(chars) }
    print 'data has %d characters, %d unique.' % (data_size, vocab_size)
    
    np.random.seed(42)
    rnn = RNN(vocab_size, hidden_size, vocab_size)

    n, p = 0, 0
    smooth_loss = -np.log(1./ vocab_size) * seq_length # loss at iteration 0
    for n in range(iterations):
        # prepare inputs (we're sweeping from left to right in steps seq_length long)
        if  p + seq_length + 1 >= len(data) or n == 0: 
            rnn.reset_memory()
            p = 0 # go from start of data
        inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
        targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

        if n % 1000 == 0:
            sample_ix = rnn.sample(inputs[0], 200)
            txt = ''.join(ix_to_char[ix] for ix in sample_ix)
            print '----\n %s \n----' % (txt, )

        rnn.forward(inputs, targets)
        loss = rnn.backward()
        rnn.grad_update(learning_rate)
        
        smooth_loss = smooth_loss * 0.999 + loss * 0.001
        if n % 1000 == 0: 
            print 'iter %d, loss: %f' % (n, smooth_loss) # print progress
            
        p += seq_length # move data pointer

In [17]:
run('input.txt')

data has 50 characters, 21 unique.
----
 odoejTjeueuljwr!ih!ousuurwskjuhfH.nestwudhohi jfutrhjTwrlsrfT.jrusohj wrw
!.ewwr
rhkkj hhtnudrshw.hh wnH rijr.wjkfH
tn
Hoo!.f! n
!Hs
ikel !lf
k.uwhtTnHertjuljklHuhhdhHiui Huissuhjeijr!iTruwTrjtodir.e  
----
iter 0, loss: 76.137460
----
 Hello world for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thoslo world for rnn! Thisoworld for rnn! Thisow rld for rn 
----
iter 1000, loss: 30.071132
----
 Hello world for rnn! Thisiwowld for rnn! Thisow rld for rnn! Thisowlwld for rnn! Thisor rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisowornd fThisld world fo 
----
iter 2000, loss: 11.105116


In [21]:
run()

data has 50 characters, 21 unique.
----
 odoejTjeueuljwr!ih!ousuurwskjuhfH.nestwudhohi jfutrhjTwrlsrfT.jrusohj wrw
!.ewwr
rhkkj hhtnudrshw.hh wnH rijr.wjkfH
tn
Hoo!.f! n
!Hs
ikel !lf
k.uwhtTnHertjuljklHuhhdhHiui Huissuhjeijr!iTruwTrjtodir.e  
----
iter 0, loss: 76.137460
----
 Hello world for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thisoworld for rnn! Thoslo world for rnn! Thisoworld for rnn! Thisow rld for rn 
----
iter 1000, loss: 30.071132


In [4]:
rnn = RNN(5, 10, 5)
x = np.array([1,1,1])
y = np.array([1,1,1])
rnn.reset_memory()
rnn.forward(x, y)
print rnn.backward()
rnn.grad_update(1e-1)
rnn.sample(1, 5)

4.8280576821


[4, 4, 4, 1, 3]