In [5]:
import theano
import theano.tensor as T
import numpy as np
import string
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
%matplotlib inline 

In [2]:
def remove_punctuation(s):
    return s.translate(None, string.punctuation)

In [13]:
def init_weight(mi,mo):
    return np.random.randn(mi,mo) / np.sqrt(mi+mo)

In [3]:
def get_robert_frost():
    word2idx = {'START': 0, 'END': 1}
    current_idx = 2
    sentences = []
    for line in open('../data/robert_frost.txt'):
        line = line.strip()
        if line:
            tokens = remove_punctuation(line.lower()).split()
            sentence = []
            for t in tokens:
                if t not in word2idx:
                    word2idx[t] = current_idx
                    current_idx += 1
                idx = word2idx[t]
                sentence.append(idx)
            sentences.append(sentence)
    return sentences,word2idx

In [6]:
class SimpleRNN(object):
    def __init__(self,D,M,V):
        self.D = D # dimensionality of word embedding 
        self.M = M # hidden layer size 
        self.V = V # vocabulary size
    def train(self, X, learning_rate=10e-1, mu=0.99, reg=1.0, epochs=500, show_fig=False):
        N = len(X)
        D = self.D
        M = self.M 
        V = self.V
        #init weights 
        We = init_weight(V,D)
        Wx = init_weight(D,M)
        Wh = init_weight(M,M)
        bh = np.zeros(M)
        h0 = np.zeros(M)
        Wo = init_weight(M,V)
        bo = np.zeros(V)
        #make them theano shared 
        self.We = theano.shared(We)
        self.Wx = theano.shared(Wx)
        self.Wh = theano.shared(Wh)
        self.bh = theano.shared(bh)
        self.h0 = theano.shared(h0)
        self.Wo = theano.shared(Wo)
        self.bo = theano.shared(bo)
        self.params = [self.We,self.Wx,self.Wh,self.bh,self.h0,self.Wo,self.bo]
        thX = T.ivector('X')
        Ei = self.We[thX]
        thY = T.ivector('Y')
        
        def recurrence(x_t,h_t1):
            h_t = T.nnet.relu(x_t.dot(self.Wx) + h_t1.dot(self.Wh) + bh)
            y_t = T.nnet.softmax(h_t.dot(self.Wo) + self.bo)
            return h_t,y_t
        [h,y],_ = theano.scan(
            fn = recurrence,
            outputs_info = [self.h0,None],
            sequences = Ei,
            n_steps = Ei.shape[0],
        )
        py_x = y[:, 0, :]
        prediction = T.argmax(py_x, axis=1)
        cost = -T.mean(T.log(py_x[T.arange(thY.shape[0]), thY]))
        grads = T.grad(cost, self.params)
        dparams = [theano.shared(p.get_value()*0) for p in self.params]
        updates = [
            (p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
        ] + [
            (dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
        ]
        self.predict_op = theano.function(inputs=[thX], outputs=prediction)
        self.train_op = theano.function(
            inputs=[thX, thY],
            outputs=[cost, prediction],
            updates=updates
        )
        costs = []
        n_total = sum((len(sentence)+1) for sentence in X)