In [1]:
import tensorflow as tf
import numpy as np

In [2]:
def load_text_to_id(filename):
    with open(filename) as f:
        words = f.read().decode('utf8').replace("\n", " <eos> ").split()
    vocab = ['<unk>'] + sorted(list(set(words)))
    vocab = dict(zip(vocab, range(len(vocab))))
    word_ids = [vocab[w] if w in vocab else 0 for w in words]
    inv_vocab = np.array([x[1] for x in sorted(zip(vocab.values(), vocab.keys()))])
    return word_ids, vocab, inv_vocab

word_ids, vocab, inv_vocab = load_text_to_id('raw_sentences.txt')

In [3]:
def get_model(n_steps, dim_input, dim_hidden, batch_size, vocab_size):
    input_data = tf.placeholder('int32', [batch_size, n_steps])
    targets = tf.placeholder('int32', [batch_size, n_steps])

    with tf.device('/gpu:0'):
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(dim_hidden)
        # lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.5)
        # lstm_cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * 3)
        initial_state = lstm_cell.zero_state(batch_size, 'float32')

    with tf.device("/cpu:0"):
        embedding = tf.Variable(tf.random_normal([vocab_size, dim_hidden]))
    inputs = tf.nn.embedding_lookup(embedding, input_data)
    # inputs = tf.nn.dropout(inputs, 0.5)
    inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, n_steps, inputs)]

    outputs, state = tf.nn.rnn(lstm_cell, inputs, initial_state=initial_state)
    output = tf.reshape(tf.concat(1, outputs), [-1, dim_hidden])
    with tf.device('/gpu:0'):
        Wy = tf.Variable(tf.random_normal([dim_hidden, vocab_size]))
        by = tf.Variable(tf.random_normal([vocab_size]))
        logits = tf.matmul(output, Wy) + by
        probs = tf.nn.softmax(logits)
        loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(targets, [-1])],
            [tf.ones([batch_size * n_steps], dtype='float32')])
        cost = tf.reduce_sum(loss) / batch_size

    final_state = state
    train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
    return {'train': train_op, 'final_state': final_state, 'cost': cost,
            'logits': logits, 'input': input_data, 'target': targets,
            'init_state': initial_state, 'cell': lstm_cell, 'probs': probs,}

In [4]:
model = get_model(n_steps=1, dim_input=25, dim_hidden=50, batch_size=1, vocab_size=len(vocab))

In [5]:
prime = 'I like to'

In [7]:
sent = prime.split()
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    saver = tf.train.Saver(tf.global_variables())
    saver.restore(sess, './lstm_en.checkpoint')
    
    state = sess.run(model['cell'].zero_state(1, tf.float32))
    init_state = model['init_state']
    final_state = model['final_state']
    probs = model['probs']
    X = model['input']
    for w in prime.split():
        x = np.array([[vocab[w]]])
        p, state = sess.run([probs, final_state], feed_dict={X: x, init_state: state})
    word = inv_vocab[np.argmax(p[0])]
    sent.append(word)
    
    for _ in range(30):
        x = np.array([[vocab[word]]])
        p, state = sess.run([probs, final_state], feed_dict={X: x, init_state: state})
        # word = inv_vocab[np.argmax(log)]
        word = np.random.choice(inv_vocab, p=p[0])
        sent.append(word)
    print ' '.join(sent)

I like to do it ? <eos> But we should have a team . <eos> It 's a play . <eos> You were nt any of us , he said . <eos> But it
