# PTB LSTM model

This tutorial is a port of [Tensorflow's PTB LSTM model](https://github.com/tensorflow/models/blob/master/tutorials/rnn/ptb/ptb_word_lm.py) to Keras.

In [2]:
!curl http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz -o simple-examples.tgz
!tar xf simple-examples.tgz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 33.2M  100 33.2M    0     0  1960k      0  0:00:17  0:00:17 --:--:-- 2180k


In [2]:
import collections

def read_words(fname):
    with open(fname) as f:
        return f.read().replace("\n", "<eos>").split()
    
def build_vocab(filename):
    data = read_words(filename)
    counter = collections.Counter(data)
    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))

    words, _ = list(zip(*count_pairs))
    word_to_id = dict(zip(words, range(len(words))))

    return word_to_id

def file_to_word_ids(filename, word_to_id):
    data = read_words(filename)
    return [word_to_id[word] for word in data if word in word_to_id]

def flip(word_to_id):
    return {v: k for k, v in word_to_id.items()}

def to_sentence(word_ids):
    words = list(map(lambda id: id_to_word[id], word_ids))
    return ' '.join(words)

word_to_id = build_vocab('simple-examples/data/ptb.train.txt')
seq = file_to_word_ids('simple-examples/data/ptb.train.txt',word_to_id)
id_to_word = flip(word_to_id)
print(read_words('simple-examples/data/ptb.train.txt')[50:60])
print(seq[50:60])

['publishing', 'group', '<eos>', 'rudolph', '<unk>', 'N', 'years', 'old', 'and', 'former']
[1596, 96, 2, 7682, 1, 3, 72, 393, 8, 337]


In [3]:
import numpy as np

batch_size = 20
num_steps = 15

batch_len = len(seq)// batch_size
raw_data = np.array(seq)
data = np.reshape(raw_data[0 : batch_size * batch_len], [batch_size, batch_len])

x = data[:, num_steps:(2)*num_steps][0]
y = data[:, num_steps+1:(2)*num_steps+1][0]

print('Input Sentence:')
print(to_sentence(x))
print()
print('Expected Output Sentence:')
print(to_sentence(y))

Input Sentence:
punts rake regatta rubens sim snack-food ssangyong swapo wachter <eos> pierre <unk> N years old

Expected Output Sentence:
rake regatta rubens sim snack-food ssangyong swapo wachter <eos> pierre <unk> N years old will


In [11]:
def get_epoch_size(raw_data, batch_size, num_steps):
    batch_len = len(raw_data)// batch_size
    return (batch_len - 1) // num_steps

def batch_iter(raw_data, batch_size, num_steps):
    batch_len = len(raw_data)// batch_size
    raw_data = np.array(raw_data)
    data = np.reshape(raw_data[0 : batch_size * batch_len], [batch_size, batch_len])

    epoch_size = get_epoch_size(raw_data, batch_size, num_steps)
    assert epoch_size > 1
    
    for i in range(epoch_size):
        x = data[:, i*num_steps:(i+1)*num_steps]
        y = data[:, (i+1)*num_steps:(i+1)*num_steps+1]
        yield (x, y)

In [6]:
from keras import backend as K
from keras.optimizers import Optimizer
import numpy as np

class PtbSGD(Optimizer):
    def __init__(self, lr=1.0, decay=.5, epoch_size=1000,
                 max_epoch=4, **kwargs):
        super(PtbSGD, self).__init__(**kwargs)
        self.__dict__.update(locals())
        self.iterations = K.variable(0.)
        self.base_lr = K.variable(lr)
        self.lr = K.variable(lr)
        self.decay = K.variable(decay)
        self.epoch_size = K.variable(epoch_size)
        self.max_epoch = K.variable(max_epoch)

    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        epoch = self.iterations // self.epoch_size
        decay = K.pow(self.decay, K.switch(epoch - self.max_epoch > 0.,
                                           epoch - self.max_epoch,
                                           K.variable(0.)))
        self.lr = self.base_lr * decay

        self.updates = [(self.iterations, self.iterations + 1.)]
        for p, g in zip(params, grads):
            self.updates.append((p, p - self.lr * g))
        return self.updates

    def get_config(self):
        config = {'base_lr': float(K.get_value(self.base_lr)),
                  'decay': float(K.get_value(self.decay)),
                  'epoch_size': float(K.get_value(self.epoch_size)),
                  'max_epoch': float(K.get_value(self.max_epoch))}
        base_config = super(PtbSGD, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def get_lr(self):
        return self.lr.eval()

In [9]:
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense
from keras import optimizers 

vocab_size = 10000
batch_size = 20
num_steps = 20
layer_size = 20
learning_rate = 1.0
lr_decay = 0.5
epoch_size = get_epoch_size(raw_data, batch_size, num_steps)

model = Sequential()
model.add(Embedding(vocab_size, 100, input_length=20))
model.add(LSTM(layer_size, return_sequences=True))
model.add(LSTM(layer_size, return_sequences=False))
model.add(Dense(vocab_size, activation='softmax'))

optimizer = PtbSGD(lr=learning_rate, 
                   decay=lr_decay,
                   clipnorm=5,
                   epoch_size=epoch_size,
                   max_epoch=4)

# optimizer = optimizers.SGD(lr=learning_rate, decay=lr_decay)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [13]:
from keras.utils.np_utils import to_categorical
import time

batch_size = 20
num_steps = 20

start_time = time.time()
losses = 0.0
iters = 0

model.reset_states()

for step, (x, y) in enumerate(batch_iter(raw_data, batch_size, num_steps)):
    y = to_categorical(y, num_classes=vocab_size)
    loss = model.train_on_batch(x, y)
    losses += loss
    iters += num_steps

    # print(model.optimizer.get_lr())
    # print(np.exp(losses / iters))
    if step % (epoch_size // 10) == 10:
        print('{:.3f} perplexity: {:.3f} speed: {:.0f} wps'.format(
            step * 1.0 / epoch_size, np.exp(losses / iters),
            iters * batch_size / (time.time() - start_time)
        ))


TypeError: unsupported operand type(s) for *: 'IndexedSlices' and 'int'

In [None]:
for step, (x, y) in enumerate(batch_iter(raw_data, batch_size, num_steps)):
    model.predict_on_batch(x)