## Text Generation using LSTM

In [18]:
from neon.layers import GeneralizedCost, LSTM, Affine
from neon.models import Model
from neon.optimizers import RMSProp
from neon.transforms import Logistic, Tanh, Softmax, CrossEntropyMulti
from neon.callbacks.callbacks import Callbacks
from neon.util.argparser import NeonArgparser
from neon.data import Text
from neon.initializers import GlorotUniform, Uniform, Orthonormal

In [19]:
import numpy as np
import os
import datetime
import cPickle
import sklearn
from sklearn.cross_validation import train_test_split

In [20]:
# Create neon backend, batch_size=20
from neon.backends import gen_backend
be = gen_backend(backend='cpu',batch_size=20)
rnn_size = 256

Use neon.data.Text data iterator. It will create vocab and one-hot representation of the input.
Document link: https://neon.nervanasys.com/index.html/generated/neon.data.text.Text.html#neon.data.text.Text

In [21]:
# Create Text iterator. Timestep = 20
from neon.data import Text
path = '/Users/tgn/Desktop/pilot/Benchmarks/P3B2/raw_text_upper.txt'
data = Text(20, path)

In [23]:
# Create layers
init_orthonormal = Orthonormal()
layers = [LSTM(output_size=rnn_size, init=init_glorot, activation=Tanh(),gate_activation=Tanh(),
               init_inner=init_orthonormal),
         Affine(len(data.vocab), init=init_glorot, activation=Softmax())]

In [24]:
optimizer = RMSProp()
cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

In [25]:
# Save the model to "neon_lstm_model_upper.pickle"
from neon.callbacks import Callbacks
from neon.models import Model
model = Model(layers=layers)
num_epochs = 2
fname = "neon_lstm_model_upper"
model = Model(layers=layers)
callbacks = Callbacks(model, eval_set=data, eval_freq=num_epochs,
                      serialize=1, save_path=fname + '.pickle')

In [26]:
model.fit(data, optimizer=optimizer, num_epochs=20, cost=cost, callbacks=callbacks)

Epoch 0   [Train |████████████████████| 7177/7177 batches, 1.53 cost, 409.97s]
Epoch 1   [Train |████████████████████| 7177/7177 batches, 1.41 cost, 413.77s] [CrossEntropyMulti Loss 1.27, 174.36s]
Epoch 2   [Train |████████████████████| 7177/7177 batches, 1.32 cost, 416.47s]
Epoch 3   [Train |████████████████████| 7177/7177 batches, 1.25 cost, 414.77s] [CrossEntropyMulti Loss 1.16, 174.35s]
Epoch 4   [Train |████████████████████| 7177/7177 batches, 1.21 cost, 413.07s]


In [35]:
def sample(prob):
    prob = prob / (prob.sum() + 1e-6)
    return np.argmax(np.random.multinomial(1, prob, 1))

In [41]:
# Initialize new model using saved model and generate text.
model.be.bsz=20
time_steps = 1
num_predict = 1000

model_new = Model(layers=layers)
model_new.load_params('/Users/tgn/Desktop/pilot/Benchmarks/P3B2/neon_lstm_model_upper.pickle')
model_new.initialize(dataset=(data.shape[0], time_steps))

In [None]:
text = []
seed_tokens = list('ROMEO:')

x = model_new.be.zeros((len(data.vocab), 20))

for s in seed_tokens:
    x.fill(0)
    x[data.token_to_index[s], 0] = 1
    y = model_new.fprop(x)
    
for i in range(num_predict):
    # Take last prediction and feed into next fprop
    pred = sample(y.get()[:, -1])
    text.append(data.index_to_token[int(pred)])

    x.fill(0)
    x[int(pred), 0] = 1
    y = model_new.fprop(x)
    
neon_logger.display(''.join(seed_tokens + text))