# Hyper parameters for the model

In [1]:
import os

use_gpu_number = 0
num_epochs = 100
num_steps = 30000
batch_size = 64
rnn_size = 512 * 2
embed_dim = 300
n_layers = 1
seq_length = 50
learning_rate = .001
lstm_keep_prob = .9
embedd_keep_prob = 1.
embedd_trainable = True
checkout_dir = 'checkpoints' + str(use_gpu_number) + '/'

os.makedirs(checkout_dir, exist_ok=True)
%env CUDA_VISIBLE_DEVICES=$use_gpu_number

env: CUDA_VISIBLE_DEVICES=0


# Imports and checks

In [2]:
from distutils.version import LooseVersion
import warnings
import numpy as np
import tensorflow as tf
import pickle
import pandas as pd

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Use TensorFlow 1.0 or newer'

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found! To train this neural network could take days on CPU.')
else:
    print('Default GPU Device:', tf.test.gpu_device_name())

Default GPU Device: /gpu:0


# Import preprocessed data

In [3]:
text, words_to_ids, ids_to_words = pickle.load(open('data/preprocess.p', mode='rb'))

word_vectors = pd.read_hdf(key='data', path_or_buf='data/vectors.h5')

def get_batches(int_text, batch_size, seq_length):
    x = np.array(int_text[:-1], dtype=np.int32)
    y = np.array(int_text[1:], dtype=np.int32)

    dim1 = len(x) // (batch_size * seq_length)

    trim_len = len(x) - batch_size * dim1 * seq_length

    x = x[:-trim_len]
    y = y[:-trim_len]

    x = np.split(x.reshape(batch_size, -1), dim1, 1)
    y = np.split(y.reshape(batch_size, -1), dim1, 1)

    result = np.array(list(zip(x, y)))
    return result

print(len(text), len(ids_to_words))

5595231 12665


# Sequence to sequence network

In [4]:
from tensorflow.contrib import seq2seq

vocab_size = len(ids_to_words)

# inputs
inputs = tf.placeholder(tf.int32, [None, None], name='input')
inputs_shape = tf.shape(inputs)

targets = tf.placeholder(tf.int32, [None, None], name='targets')
lr = tf.placeholder(tf.float32, [], name='learning')

# embeddings
#params = tf.Variable(tf.random_uniform([vocab_size, embed_dim], -1., 1.))

W = tf.Variable(tf.constant(0.0, shape=[vocab_size, embed_dim]), trainable=embedd_trainable, name="W")
embedding_placeholder = tf.placeholder(tf.float32, [vocab_size, embed_dim])
embedding_init = W.assign(embedding_placeholder)

output_keep_prob = tf.placeholder_with_default(1., shape=[])
embeddings_keep_prob = tf.placeholder_with_default(1., shape=[])

#embeddings = tf.nn.embedding_lookup(params, inputs)
embeddings = tf.nn.embedding_lookup(W, inputs)

tf.contrib.layers.dropout(embeddings, keep_prob=embeddings_keep_prob)

# recurent nn
layers = []
for _ in range(n_layers):
    cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=output_keep_prob)
    layers.append(cell)

cell = tf.contrib.rnn.MultiRNNCell(layers)
initial_state = cell.zero_state(inputs_shape[0], tf.float32)
initial_state = tf.identity(initial_state, 'initial_state') # just to name it

# output and state
outputs, final_state = tf.nn.dynamic_rnn(cell, embeddings, dtype=tf.float32)
final_state = tf.identity(final_state, 'final_state')

logits = tf.contrib.layers.fully_connected(outputs, vocab_size, activation_fn=None)

# Probabilities for generating words
probs = tf.nn.softmax(logits, name='probs')

# Loss function
cost = seq2seq.sequence_loss(
    logits,
    targets,
    tf.ones([inputs_shape[0], inputs_shape[1]]))

# Optimizer
optimizer = tf.train.AdamOptimizer(lr)

# Gradient Clipping
gradients = optimizer.compute_gradients(cost)
capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]
train_op = optimizer.apply_gradients(capped_gradients)

# Create the session & init

I initialize the word embedings with pretrained embedings from word2vec

In [5]:
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)
saver = tf.train.Saver(max_to_keep=1000)

sess.run(embedding_init, feed_dict={embedding_placeholder: word_vectors});

# Function used to generate some output

In [6]:
from tokenizer import *

def pick_word(probabilities, int_to_vocab):
    probabilities[words_to_ids['not_in_vocab']] = 0.
    # make it sum to 1
    probabilities /= probabilities.sum()
    word_id = np.random.choice(np.arange(len(probabilities)), size=1, p=probabilities)[0]
    #word_id = np.argmax(probabilities)
    return int_to_vocab[word_id]

def generate(starting_text='Homer Simpson:', generate_length=300):
    sentence_tokens = text_to_tokens(starting_text)
    prev_state = sess.run(initial_state, {inputs: np.array([[1]])})

    for n in range(generate_length):
        sentence_ids = [[words_to_ids[word] for word in sentence_tokens]]
        sentence_len = len(sentence_ids[0])

        probabilities, prev_state = sess.run(
            [probs, final_state],
            {inputs: sentence_ids, initial_state: prev_state})

        predicted_token = pick_word(probabilities[0][sentence_len-1], ids_to_words)
        sentence_tokens.append(predicted_token)
    return tokens_to_text(sentence_tokens)

# Train the network

The network will ouput some generated text on every 1000 steps. At first it would make much sense but after some time of training will get better.

In [7]:
import time

batches = get_batches(text, batch_size, seq_length)

train_batches = batches[:-len(batches) // 10]
test_batches = batches[-len(batches) // 10:]

step = 0
for epoch in range(num_epochs):
    #np.random.shuffle(batches)
    state = sess.run(initial_state, {inputs: train_batches[0][0]})
    train_loss = []
    for x, y in train_batches:
        step += 1
        start = time.time()
        feed = {
            inputs: x,
            targets: y,
            initial_state: state,
            lr: learning_rate,
            output_keep_prob:lstm_keep_prob,
            embeddings_keep_prob: embedd_keep_prob
        }
        train_loss_, state, _ = sess.run([cost, final_state, train_op], feed)
        train_loss.append(train_loss_)
        took = time.time() - start
        if step % 500 == 0:
            train_took = took * 500
            test_state = sess.run(initial_state, {inputs: test_batches[0][0]})
            test_loss = []
            for x, y in test_batches:
                start = time.time()
                feed = {
                    inputs: x,
                    targets: y,
                    initial_state:
                    test_state, lr: learning_rate
                }
                test_loss_, test_state = sess.run([cost, final_state], feed)
                test_loss.append(test_loss_)
                test_took = time.time() - start
            
            print('Step:', step,
                  'Train Loss:', np.mean(train_loss),
                  'Valid Loss:', np.mean(test_loss),
                  "in %.2f secs" % train_took)

            saver.save(sess, checkout_dir + 'model'+str(step))
            
        if step % 1000 == 0:
            print('#' * 50)
            print(generate(starting_text='Homer Simpson: Where did the dog', generate_length=50))
            print('*' * 50)
            print(generate(starting_text='Bart Simpson:', generate_length=50))
            print('*' * 50)
            print(generate(starting_text='Moe Szyslak:', generate_length=50))
            print('#' * 50)
        if step >= num_steps:
            break
    if step >= num_steps:
        break

Step: 500 Train Loss: 2.58822 Valid Loss: 2.17844 in 44.54 secs
Step: 1000 Train Loss: 2.33351 Valid Loss: 2.03975 in 44.01 secs
##################################################
Homer Simpson: Where did the dog don't have more a way. He's sent you a lot lettuce of that book time in.

Homer Simpson: (
**************************************************
Bart Simpson: (Reading, Guard, ", Crossing Crazed For To Kent The Money, Weak Donuts Cause I I'
**************************************************
Moe Szyslak: Just all why the Live.

Homer Simpson: Right of Mommy!

Lionel Leonard: Done off.

Lisa 
##################################################
Step: 1500 Train Loss: 2.22151 Valid Loss: 1.9756 in 44.95 secs
Step: 2000 Train Loss: 1.937 Valid Loss: 1.93837 in 44.22 secs
##################################################
Homer Simpson: Where did the dog think by what are you gonna go laughing?

Homer Simpson: Bring yourself in.

Carl Carlson: Lenny State Love
**************************

Step: 13000 Train Loss: 1.51109 Valid Loss: 1.89545 in 43.55 secs
##################################################
Homer Simpson: Where did the dog beat?

(Night Gallery-Type Set: Int. heaven's clothing store - day)

Selma Bouvier: (Nostalgic) 
**************************************************
Bart Simpson: Listen, Mom, this is the Springfield Habitat. The top channel's almost too long.

Bart Simpson: D'oh
**************************************************
Moe Szyslak: And I've never realized this to that girl before Carl like the music store.

Homer Simpson: Quimby.

Homer
##################################################
Step: 13500 Train Loss: 1.49673 Valid Loss: 1.91323 in 44.01 secs
Step: 14000 Train Loss: 1.48899 Valid Loss: 1.91255 in 44.19 secs
##################################################
Homer Simpson: Where did the dog please leave, what else did you got?

Homer Simpson: (Meekly) This knocks just thinking this is you do.


*******************************************

Homer Simpson: Where did the dog in a bag of garbage?

Seymour Skinner: Yes.

Ralph Wiggum: I don't have private manager Bart Simpson
**************************************************
Bart Simpson: I don't want to be here for the English close Saturday. For once, you found a business report to the school
**************************************************
Moe Szyslak: Oh, you're hot, too. But at least I'm going to build this conversation.

(Barn: Int. barn 
##################################################
Step: 25500 Train Loss: 1.24863 Valid Loss: 2.08712 in 45.47 secs


KeyboardInterrupt: 

In [8]:
sess.close()