In [1]:
# use only the first GPU
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [2]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Use TensorFlow 1.0 or newer'

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found! To train this neural network could take awhile.')
else:
    print('Default GPU Device:', tf.test.gpu_device_name())

Default GPU Device: /gpu:0


In [3]:
import pickle

text, words_to_ids, ids_to_words, token_dict = pickle.load(open('data/preprocess.p', mode='rb'))
print(len(text), len(ids_to_words))

4704276 8493


In [4]:
def get_batches(int_text, batch_size, seq_length):
    x = np.array(int_text[:-1], dtype=np.int32)
    y = np.array(int_text[1:], dtype=np.int32)

    dim1 = len(x) // (batch_size * seq_length)

    trim_len = len(x) - batch_size * dim1 * seq_length

    x = x[:-trim_len]
    y = y[:-trim_len]

    x = np.split(x.reshape(batch_size, -1), dim1, 1)
    y = np.split(y.reshape(batch_size, -1), dim1, 1)

    result = np.array(list(zip(x, y)))
    return result

## Params

In [5]:
num_epochs = 400
batch_size = 100
rnn_size = 2000
seq_length = 500
learning_rate = .001
show_every_n_epochs = 1

checkout_dir = 'checkpoints/'
os.makedirs(checkout_dir, exist_ok=True)

## Network

In [None]:
from tensorflow.contrib import seq2seq

vocab_size = len(ids_to_words)
tf.reset_default_graph()
train_graph = tf.Graph()
with train_graph.as_default():
    
    # inputs
    inputs = tf.placeholder(tf.int32, [None, None], name='input')
    inputs_shape = tf.shape(inputs)
    targets = tf.placeholder(tf.int32, [None, None], name='targets')
    lr = tf.placeholder(tf.float32, [], name='learning')

    # recurent nn
    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
    cell = tf.contrib.rnn.MultiRNNCell([lstm])
    initial_state = cell.zero_state(inputs_shape[0], tf.float32)
    initial_state = tf.identity(initial_state, 'initial_state') # just to name it
    
    # embeddings
    params = tf.Variable(tf.random_uniform([vocab_size, rnn_size], -1., 1.))
    embeddings = tf.nn.embedding_lookup(params, inputs)
    
    # output and state
    outputs, final_state = tf.nn.dynamic_rnn(cell, embeddings, dtype=tf.float32)
    final_state = tf.identity(final_state, 'final_state')
    
    logits = tf.contrib.layers.fully_connected(outputs, vocab_size, activation_fn=None)
    
    # Probabilities for generating words
    probs = tf.nn.softmax(logits, name='probs')

    # Loss function
    cost = seq2seq.sequence_loss(
        logits,
        targets,
        tf.ones([inputs_shape[0], inputs_shape[1]]))

    # Optimizer
    optimizer = tf.train.AdamOptimizer(lr)

    # Gradient Clipping
    gradients = optimizer.compute_gradients(cost)
    capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]
    train_op = optimizer.apply_gradients(capped_gradients)
    init = tf.global_variables_initializer()

## Train

In [None]:
import time

batches = get_batches(text, batch_size, seq_length)
sess = tf.Session(graph=train_graph)

sess.run(init)

for epoch in range(num_epochs):
    state = sess.run(initial_state, {inputs: batches[0][0]})
    start = time.time()

    for _, (x, y) in enumerate(batches):
        feed = {inputs: x, targets: y, initial_state: state, lr: learning_rate}
        train_loss, state, _ = sess.run([cost, final_state, train_op], feed)
    
    if epoch % show_every_n_epochs == 0:
        print('Epoch:', epoch, 'train_loss:', train_loss, 'time:', "%.2f" %  (time.time() - start))
        with train_graph.as_default():
            saver = tf.train.Saver()
            saver.save(sess, checkout_dir + 'model')

Epoch: 0 train_loss: 2.52678 time: 365.33
Epoch: 1 train_loss: 2.25463 time: 370.72
Epoch: 2 train_loss: 2.14637 time: 370.05
Epoch: 3 train_loss: 2.07989 time: 368.42
Epoch: 4 train_loss: 2.03941 time: 372.07
Epoch: 5 train_loss: 1.99261 time: 370.46
Epoch: 6 train_loss: 1.95724 time: 369.89
Epoch: 7 train_loss: 1.92521 time: 372.46
Epoch: 8 train_loss: 1.89262 time: 372.74
Epoch: 9 train_loss: 1.8653 time: 373.93
Epoch: 10 train_loss: 1.83624 time: 371.53
Epoch: 11 train_loss: 1.80836 time: 369.72
Epoch: 12 train_loss: 1.78733 time: 368.64
Epoch: 13 train_loss: 1.7635 time: 373.40
Epoch: 14 train_loss: 1.73545 time: 371.39
Epoch: 15 train_loss: 1.7118 time: 373.41
Epoch: 16 train_loss: 1.68674 time: 372.78
Epoch: 17 train_loss: 1.66006 time: 376.00
Epoch: 18 train_loss: 1.63692 time: 369.14
Epoch: 19 train_loss: 1.61482 time: 371.29
Epoch: 20 train_loss: 1.59468 time: 373.58
Epoch: 21 train_loss: 1.56556 time: 372.23
Epoch: 22 train_loss: 1.54162 time: 373.24
Epoch: 23 train_loss: 1.

In [None]:
sess.close()