# Char LSTM Example.
This example aims to show how to use lstm to build a char level language model, and generate text from it. 
We use a tiny shakespeare text for demo purpose. 

Data can be found at [https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare](https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare). 

In [1]:
from collections import Counter
from collections import defaultdict
import mxnet as mx
import numpy as np
import sys
import lstm

Set basic network parameters. 

In [2]:
batch_size = 32
seq_len = 32
num_hidden = 256
num_embed = 256
num_lstm_layer = 2
num_round = 21
learning_rate= 1
wd=0.00001
momentum=0.0
clip_gradient=1
update_period = 1


Make dictionary from text

In [3]:
def make_dict(text, max_vocab=10000):
    lst = list(text)
    cnt = Counter(lst)
    print("Total unique char: %d" % len(cnt))
    common = cnt.most_common(max_vocab - 1)
    dic = defaultdict(int)
    idx = 0
    for c, _ in common:
        dic[c] = idx
        idx += 1
    if len(dic) == max_vocab - 1:
        dic["_UNKNOWN_"] = idx
    return dic


Transfer text into data batch

In [4]:
def make_batch(file_path, batch_size=32, seq_lenth=32, max_vocab=10000, dic=None):
    fi = open(file_path)
    text = fi.read()
    fi.close()
    if dic == None:
        dic = make_dict(text, max_vocab)
    lookup_table = dict((idx, c) for c, idx in dic.items())
    char_lst = list(text)
    num_batch = int(len(char_lst) / batch_size)
    char_lst = char_lst[:num_batch * batch_size]
    data = np.zeros((num_batch, batch_size), dtype="float32")
    idx = 0
    for j in range(batch_size):
        for i in range(num_batch):
            if char_lst[idx] in dic:
                data[i][j] = dic[char_lst[idx]]
            else:
                char_lst[idx] = dic["_UNKNOWN_"]
            idx += 1
    return data, dic, lookup_table


In [5]:
X, dic, lookup_table = make_batch("./input.txt", batch_size=batch_size, seq_lenth=seq_len)
vocab = len(dic)

Total unique char: 65


Move tail text

In [6]:
def drop_tail(X, seq_len):
    shape = X.shape
    nstep = int(shape[0] / seq_len)
    return X[0:(nstep * seq_len), :]

train_val_fraction = 0.9
size = X.shape[0]
X_train = X[:int(size * train_val_fraction), :]
X_val = X[int(size * train_val_fraction):, :]

X_train = drop_tail(X_train, seq_len)
X_val = drop_tail(X_val, seq_len)

Set up LSTM model on GPU

In [7]:
model = lstm.setup_rnn_model(mx.gpu(),
                             num_lstm_layer=num_lstm_layer,
                             seq_len=seq_len,
                             num_hidden=num_hidden,
                             num_embed=num_embed,
                             num_label=vocab,
                             batch_size=batch_size,
                             input_size=vocab,
                             initializer=mx.initializer.Uniform(0.1),
                             dropout=0.)


Train LSTM model

In [8]:
lstm.train_lstm(model, X_train, X_val,
                num_round=num_round,
                half_life=3,
                update_period=update_period,
                learning_rate=learning_rate,
                wd=wd,
                momentum=momentum,
                clip_gradient=clip_gradient)

Training swith train.shape=(31360, 32)
Training swith val.shape=(3456, 32)
batch_size=32
seq_len=32
Epoch [125] Train: NLL=3.368, Prep=29.019
Epoch [250] Train: NLL=3.289, Prep=26.811
Epoch [375] Train: NLL=3.180, Prep=24.044
Epoch [500] Train: NLL=3.070, Prep=21.534
Epoch [625] Train: NLL=2.971, Prep=19.503
Epoch [750] Train: NLL=2.891, Prep=18.011
Epoch [875] Train: NLL=2.824, Prep=16.846
Iter [0] Train: Time: 40.182 sec, NLL=2.775, Prep=16.041
Iter [0] Val: NLL=2.288, Prep=9.857
Epoch [1000] Train: NLL=2.347, Prep=10.451
Epoch [1125] Train: NLL=2.321, Prep=10.188
Epoch [1250] Train: NLL=2.298, Prep=9.951
Epoch [1375] Train: NLL=2.276, Prep=9.741
Epoch [1500] Train: NLL=2.256, Prep=9.541
Epoch [1625] Train: NLL=2.234, Prep=9.338
Epoch [1750] Train: NLL=2.215, Prep=9.160
Epoch [1875] Train: NLL=2.196, Prep=8.987
Iter [1] Train: Time: 40.342 sec, NLL=2.184, Prep=8.885
Iter [1] Val: NLL=1.988, Prep=7.298
Epoch [2000] Train: NLL=2.050, Prep=7.766
Epoch [2125] Train: NLL=2.032, Prep=7.631

Get parameter from model

In [9]:
args = dict([(name, arr) for i, arr, grad_arr, name in model.param_blocks])

Make a sampler use the parameter we trained

In [10]:
batch_size = 1
sampler = lstm.setup_rnn_sample_model(mx.cpu(), args, num_lstm_layer, num_hidden, num_embed, vocab, batch_size, vocab)



In [11]:
start = 'a'
seq_len = 75
X_input_batch = np.zeros((1,1), dtype="float32")
X_input_batch[0][0] = dic[start]
out = lstm.sample_lstm(sampler, X_input_batch, seq_len)

Lookup predictions

In [12]:
chars = [lookup_table[int(out[i][0])] for i in range(seq_len)]
print(start + "".join(chars))

an'd and dear victories at sound before.
Sir! palient, made me; let it kiss 
