In [1]:
# use only the first GPU
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [2]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Use TensorFlow 1.0 or newer'

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found! To train this neural network could take awhile.')
else:
    print('Default GPU Device:', tf.test.gpu_device_name())

('Default GPU Device:', u'/gpu:0')


In [3]:
import pickle

text, words_to_ids, ids_to_words = pickle.load(open('data/preprocess.p', mode='rb'))
print(len(text), len(ids_to_words))

(5378116, 2824)


In [4]:
def get_batches(int_text, batch_size, seq_length):
    x = np.array(int_text[:-1], dtype=np.int32)
    y = np.array(int_text[1:], dtype=np.int32)

    dim1 = len(x) // (batch_size * seq_length)

    trim_len = len(x) - batch_size * dim1 * seq_length

    x = x[:-trim_len]
    y = y[:-trim_len]

    x = np.split(x.reshape(batch_size, -1), dim1, 1)
    y = np.split(y.reshape(batch_size, -1), dim1, 1)

    result = np.array(list(zip(x, y)))
    return result

## Params

In [5]:
num_epochs = 100
batch_size = 500
rnn_size = 500
n_layers = 1
seq_length = 60
learning_rate = .001

checkout_dir = 'checkpoints/'
try:
    os.makedirs(checkout_dir)
except:
    pass

## Network

In [6]:
from tensorflow.contrib import seq2seq

vocab_size = len(ids_to_words)

# inputs
inputs = tf.placeholder(tf.int32, [None, None], name='input')
inputs_shape = tf.shape(inputs)
targets = tf.placeholder(tf.int32, [None, None], name='targets')
lr = tf.placeholder(tf.float32, [], name='learning')

# recurent nn
lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
cell = tf.contrib.rnn.MultiRNNCell([lstm]*n_layers)
initial_state = cell.zero_state(inputs_shape[0], tf.float32)
initial_state = tf.identity(initial_state, 'initial_state') # just to name it

# embeddings
params = tf.Variable(tf.random_uniform([vocab_size, rnn_size], -1., 1.))
embeddings = tf.nn.embedding_lookup(params, inputs)

# output and state
outputs, final_state = tf.nn.dynamic_rnn(cell, embeddings, dtype=tf.float32)
final_state = tf.identity(final_state, 'final_state')

logits = tf.contrib.layers.fully_connected(outputs, vocab_size, activation_fn=None)

# Probabilities for generating words
probs = tf.nn.softmax(logits, name='probs')

# Loss function
cost = seq2seq.sequence_loss(
    logits,
    targets,
    tf.ones([inputs_shape[0], inputs_shape[1]]))

# Optimizer
optimizer = tf.train.AdamOptimizer(lr)

# Gradient Clipping
gradients = optimizer.compute_gradients(cost)
capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]
train_op = optimizer.apply_gradients(capped_gradients)

## Create the session

In [7]:
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)
saver = tf.train.Saver()

## Function used to generate some output

In [None]:
from tokenizer import *

def pick_word(probabilities, int_to_vocab):
    return int_to_vocab[np.argmax(probabilities)]

def generate(starting_text='Homer Simpson:', generate_length=300):
    sentence_tokens = text_to_tokens(starting_text)
    prev_state = sess.run(initial_state, {inputs: np.array([[1]])})

    for n in range(generate_length):
        sentence_ids = [[words_to_ids[word] for word in sentence_tokens]]
        sentence_len = len(sentence_ids[0])

        probabilities, prev_state = sess.run(
            [probs, final_state],
            {inputs: sentence_ids, initial_state: prev_state})

        predicted_token = pick_word(probabilities[0][sentence_len-1], ids_to_words)
        sentence_tokens.append(predicted_token)
    return tokens_to_text(sentence_tokens)

## Train

In [None]:
import time

batches = get_batches(text, batch_size, seq_length)

step = 0
for epoch in range(num_epochs):
    state = sess.run(initial_state, {inputs: batches[0][0]})
    np.random.shuffle(batches)
    for x, y in batches:
        step += 1
        feed = {inputs: x, targets: y, initial_state: state, lr: learning_rate}
        start = time.time()
        train_loss, state, _ = sess.run([cost, final_state, train_op], feed)
        took = time.time() - start
        if step % 500 == 0:
            took *= 500
            print('Step:', step, 'Sequence Loss:', train_loss, "%.2f secs" % took)
            
        if step % 5000 == 0:
            saver.save(sess, checkout_dir + 'model')
            print('#' * 50)
            print(generate(starting_text='\n\nHomer Simpson: Where did the dog', generate_length=50))
            print('*' * 50)
            print(generate(starting_text='Bart Simpson:', generate_length=50))
            print('*' * 50)
            print(generate(starting_text='Moe:', generate_length=50))
            print('#' * 50)

('Step:', 500, 'Sequence Loss:', 1.8951381, 'took time:', '64.62 secs')
('Step:', 1000, 'Sequence Loss:', 1.7698169, 'took time:', '65.05 secs')
('Step:', 1500, 'Sequence Loss:', 1.7025232, 'took time:', '67.06 secs')
('Step:', 2000, 'Sequence Loss:', 1.6722658, 'took time:', '64.07 secs')
('Step:', 2500, 'Sequence Loss:', 1.6514809, 'took time:', '65.54 secs')
('Step:', 3000, 'Sequence Loss:', 1.6166742, 'took time:', '64.91 secs')
('Step:', 3500, 'Sequence Loss:', 1.5666766, 'took time:', '62.53 secs')
('Step:', 4000, 'Sequence Loss:', 1.5578958, 'took time:', '62.64 secs')
('Step:', 4500, 'Sequence Loss:', 1.5433419, 'took time:', '67.61 secs')
('Step:', 5000, 'Sequence Loss:', 1.5414732, 'took time:', '67.31 secs')
##################################################


Homer Simpson: Where did the dog  you to the ?

Homer Simpson: I don't know. I don't know what I did.

Homer 
**************************************************
Bart Simpson: I don't know what I did.

Bart Simpson: I d

In [None]:
('Step:', 500, 'Sequence Loss:', 1.9290897, 'took time:', '29.09 secs')
('Step:', 1000, 'Sequence Loss:', 1.7947156, 'took time:', '28.46 secs')
('Step:', 1500, 'Sequence Loss:', 1.7302173, 'took time:', '28.19 secs')
('Step:', 2000, 'Sequence Loss:', 1.6997606, 'took time:', '31.83 secs')
('Step:', 2500, 'Sequence Loss:', 1.708122, 'took time:', '28.33 secs')
('Step:', 3000, 'Sequence Loss:', 1.6659172, 'took time:', '27.52 secs')
('Step:', 3500, 'Sequence Loss:', 1.6495925, 'took time:', '27.50 secs')
('Step:', 4000, 'Sequence Loss:', 1.5773289, 'took time:', '27.56 secs')
('Step:', 4500, 'Sequence Loss:', 1.5948731, 'took time:', '27.45 secs')
('Step:', 5000, 'Sequence Loss:', 1.6080065, 'took time:', '26.70 secs')

In [None]:
sess.close()