Notebook written by [Zhedong Zheng](https://github.com/zhedongzheng)

![title](img/dilated_cnn.jpg)

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
params = {
    'batch_size': 128,
    'text_iter_step': 25,
    'seq_len': 200,
    'kernel_sz': 5,
    'hidden_dim': 128,
    'n_hidden_layer': 4,
    'dropout_rate': 0.1,
    'display_step': 10,
    'generate_step': 100,
}

In [3]:
def parse_text(file_path):
    with open(file_path) as f:
        text = f.read()
    
    char2idx = {c: i+3 for i, c in enumerate(set(text))}
    char2idx['<pad>'] = 0
    char2idx['<start>'] = 1
    char2idx['<end>'] = 2
    
    ints = np.array([char2idx[char] for char in list(text)])
    return ints, char2idx

def next_batch(ints):
    len_win = params['seq_len'] * params['batch_size']
    for i in range(0, len(ints)-len_win, params['text_iter_step']):
        clip = ints[i: i+len_win]
        yield clip.reshape([params['batch_size'], params['seq_len']])
        
def input_fn(ints):
    dataset = tf.data.Dataset.from_generator(
        lambda: next_batch(ints), tf.int32, tf.TensorShape([None, params['seq_len']]))
    iterator = dataset.make_one_shot_iterator()
    return iterator.get_next()

In [4]:
def start_sent(x):
    _x = tf.fill([tf.shape(x)[0], 1], params['char2idx']['<start>'])
    return tf.concat([_x, x], 1)

def end_sent(x):
    _x = tf.fill([tf.shape(x)[0], 1], params['char2idx']['<end>'])
    return tf.concat([x, _x], 1)

def embed_seq(x, vocab_sz, embed_dim, name, zero_pad=True):
    embedding = tf.get_variable(name, [vocab_sz, embed_dim])
    if zero_pad:
        embedding = tf.concat([tf.zeros([1, embed_dim]), embedding[1:, :]], 0)
    x = tf.nn.embedding_lookup(embedding, x)
    return x


def position_embedding(inputs):
    T = inputs.get_shape().as_list()[1]
    x = tf.range(T)                            # (T)
    x = tf.expand_dims(x, 0)                   # (1, T)
    x = tf.tile(x, [tf.shape(inputs)[0], 1])   # (N, T)
    return embed_seq(x, T, params['hidden_dim'], 'position_embedding')

In [5]:
def cnn_block(x, dilation_rate, pad_sz, is_training):
    pad = tf.zeros([tf.shape(x)[0], pad_sz, params['hidden_dim']])
    x =  tf.layers.conv1d(inputs = tf.concat([pad, x, pad], 1),
                          filters = params['hidden_dim'],
                          kernel_size = params['kernel_sz'],
                          dilation_rate = dilation_rate)
    x = x[:, :-pad_sz, :]
    x = tf.nn.relu(x)
    x = tf.layers.dropout(x, params['dropout_rate'], training=is_training)
    return x


def forward(inputs, reuse, is_training):
    inputs = start_sent(inputs)
    with tf.variable_scope('model', reuse=reuse):
        x = embed_seq(inputs, params['vocab_size'], params['hidden_dim'], 'word_embedding')
        x += position_embedding(x)
        
        for i in range(params['n_hidden_layer']):
            dilation_rate = 2 ** i
            pad_sz = (params['kernel_sz'] - 1) * dilation_rate
            x += cnn_block(x, dilation_rate, pad_sz, is_training)
        
        logits = tf.layers.dense(x, params['vocab_size'])
    return logits

In [6]:
def autoregressive():
    def cond(i, x, temp):
        return i < params['seq_len']

    def body(i, x, temp):
        logits = forward(x, reuse=True, is_training=False)
        ids = tf.argmax(logits, -1, output_type=tf.int32)[:, i]
        ids = tf.expand_dims(ids, -1)

        temp = tf.concat([temp[:, 1:], ids], -1)

        x = tf.concat([temp[:, -(i+1):], temp[:, :-(i+1)]], -1)
        x = tf.reshape(x, [1, params['seq_len']])
        i += 1
        return i, x, temp

    x = tf.zeros([1, params['seq_len']], tf.int32)
    _, res, _ = tf.while_loop(cond, body, [tf.constant(0), x, x])
    
    return res[0]

In [None]:
ints, params['char2idx'] = parse_text('../temp/anna.txt')
params['vocab_size'] = len(params['char2idx'])
params['idx2char'] = {i: c for c, i in params['char2idx'].items()}
print('Vocabulary size:', params['vocab_size'])

X = input_fn(ints)
logits = forward(X, reuse=False, is_training=True)

ops = {}
ops['global_step'] = tf.Variable(0, trainable=False)

targets = end_sent(X)
ops['loss'] = tf.reduce_mean(tf.contrib.seq2seq.sequence_loss(
    logits = logits,
    targets = targets,
    weights = tf.to_float(tf.ones_like(targets))))

ops['train'] = tf.train.AdamOptimizer().minimize(ops['loss'], global_step=ops['global_step'])

ops['generate'] = autoregressive()

Vocabulary size: 86


In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
while True:
    try:
        _, step, loss = sess.run([ops['train'], ops['global_step'], ops['loss']])
    except tf.errors.OutOfRangeError:
        break
    else:
        if step % params['display_step'] == 0 or step == 1:
            print("Step %d | Loss %.3f" % (step, loss))
        if step % params['generate_step'] == 0 and step > 1:
            ints = sess.run(ops['generate'])
            print('\n'+''.join([params['idx2char'][i] for i in ints])+'\n')

Step 1 | Loss 4.490
Step 10 | Loss 3.249
Step 20 | Loss 2.921
Step 30 | Loss 2.688
Step 40 | Loss 2.487
Step 50 | Loss 2.336
Step 60 | Loss 2.201
Step 70 | Loss 2.087
Step 80 | Loss 1.985
Step 90 | Loss 1.892
Step 100 | Loss 1.807

 her his fand and her his fous the doond of the she sas and the stofe to the was the doof the shis, and the drout was the said the gould sting to his fare the child the was to the bures and that the s

Step 110 | Loss 1.711
Step 120 | Loss 1.634
Step 130 | Loss 1.550
Step 140 | Loss 1.476
Step 150 | Loss 1.403
Step 160 | Loss 1.336
Step 170 | Loss 1.273
Step 180 | Loss 1.211
Step 190 | Loss 1.153
Step 200 | Loss 1.096

 he was in ous for the pare, and the chald not be on in the room, be was un the looking-glass. It and that her mather the conding her hears her
coned of this himself, and he rapert one and what she wo

Step 210 | Loss 1.044
Step 220 | Loss 0.997
Step 230 | Loss 0.952
Step 240 | Loss 0.912
Step 250 | Loss 0.877
Step 260 | Loss 0.841
Step 270 | 

Step 1940 | Loss 0.299
Step 1950 | Loss 0.299
Step 1960 | Loss 0.293
Step 1970 | Loss 0.291
Step 1980 | Loss 0.286
Step 1990 | Loss 0.279
Step 2000 | Loss 0.287

 that he had come of Moscow for. From his brother's Levin went to Oblonsky's office, and
on getting news of the Shtcherbatskys from him, he drove to the place
where he had been told he might find Kitt

Step 2010 | Loss 0.282
Step 2020 | Loss 0.296
Step 2030 | Loss 0.288
Step 2040 | Loss 0.297
Step 2050 | Loss 0.285
Step 2060 | Loss 0.291
Step 2070 | Loss 0.295
Step 2080 | Loss 0.294
Step 2090 | Loss 0.282
Step 2100 | Loss 0.280

 the
skaters, Levin sake a deard Levin, which had ant reserctented and bas mind, and that you grack lore is she little boned all that it were to make on the first the
fort al int going by steprely wen

Step 2110 | Loss 0.291
Step 2120 | Loss 0.293
Step 2130 | Loss 0.285
Step 2140 | Loss 0.284
Step 2150 | Loss 0.284
Step 2160 | Loss 0.284
Step 2170 | Loss 0.278
Step 2180 | Loss 0.285
Step 2190 | Loss 0.