In [1]:
from __future__ import absolute_import, division, print_function

!pip install tensorflow-gpu==2.0.0-alpha0
import tensorflow as tf
device_name = tf.test.gpu_device_name()
print (device_name)

/device:GPU:0


In [0]:
import matplotlib.pyplot as plt
% matplotlib inline
import numpy as np
import pandas as pd
import time
import random

In [3]:
from google.colab import drive

drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [0]:
init_batch_size = 32
emb_size = 200
hidden_size = 512
lr = 1e-3

In [0]:
train_path = '/content/drive/My Drive/Colab Notebooks/cs11747/data/lm/train.txt' # train set
dev_path = '/content/drive/My Drive/Colab Notebooks/cs11747/data/lm/valid.txt' # dev set

In [0]:
def read_dataset(filename):
    with open(filename, "r") as f:
        data = []
        for line in f:
            words = line.lower().strip()
            words = '<start> ' + words + ' <end>'
            data.append(words)
        return data

In [0]:
train_set = read_dataset(train_path)
dev_set = read_dataset(dev_path)

In [0]:
tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='!"#$%&()*+,-./:;=?@[\]^_`{|}~ ') 
tokenizer.fit_on_texts(train_set)

In [9]:
word_index = tokenizer.word_index
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
nwords = len(word_index)
print (nwords)

9651


In [10]:
word_index['<start>']

3

In [0]:
train_seq = tokenizer.texts_to_sequences(train_set)
train_seq.sort(key=lambda x: -len(x))

In [0]:
dev_seq = tokenizer.texts_to_sequences(dev_set)
dev_seq.sort(key=lambda x: -len(x))

In [0]:
def train_gen(init_batch_size=init_batch_size):
    i = 0
    init_len = len(train_seq[0])
    while i<len(train_seq):
        batch_size = init_batch_size * init_len // len(train_seq[i])
        if i+batch_size < len(train_seq):
            batch = tf.keras.preprocessing.sequence.pad_sequences(train_seq[i:i+batch_size], padding='post', maxlen=len(train_seq[i]))
        else:
            batch = tf.keras.preprocessing.sequence.pad_sequences(train_seq[i:], padding='post', maxlen=len(train_seq[i]))
        i += batch_size
        yield batch

In [0]:
def dev_gen(init_batch_size=init_batch_size):
    i = 0
    init_len = len(dev_seq[0])
    while i<len(dev_seq):
        batch_size = init_batch_size * init_len // len(dev_seq[i])
        if i+batch_size < len(dev_seq):
            batch = tf.keras.preprocessing.sequence.pad_sequences(dev_seq[i:i+batch_size], padding='post', maxlen=len(dev_seq[i]))
        else:
            batch = tf.keras.preprocessing.sequence.pad_sequences(dev_seq[i:], padding='post', maxlen=len(dev_seq[i]))
        i += batch_size
        yield batch

In [0]:
class LM(tf.keras.Model):
    def __init__(self, embed_size, hidden_size):
        super(LM, self).__init__()
        self.embedding = tf.keras.layers.Embedding(nwords+1, embed_size, trainable=True)
        self.gru = tf.keras.layers.GRU(hidden_size, 
                                       return_sequences=True, 
                                       return_state=True, 
                                       recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(nwords+1)

    def call(self, x, hidden):
        x = self.embedding(x)
        # passing one time step to the GRU
        output, state = self.gru(x, initial_state=hidden)
        output = tf.reshape(output, (-1, output.shape[2]))
        x = self.fc(output)

        return x, state

In [0]:
model = LM(emb_size, hidden_size)

In [0]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

In [0]:
def train_step(sents):
    loss = 0  
    with tf.GradientTape() as tape:
        hidden = None   
        # Teacher forcing - feeding the target as the next input
        for t in range(sents.shape[1]-1):
            step_input = tf.expand_dims(sents[:, t], 1)   
            predictions, hidden = model(step_input, hidden)
            loss += loss_function(sents[:, t+1], predictions)

    batch_loss = loss / int(sents.shape[1]-1)
    variables = model.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
  
    return batch_loss

In [0]:
def eval_step(sents):
    loss = 0  
    hidden = None   
    # Teacher forcing - feeding the target as the next input
    for t in range(sents.shape[1]-1):
        step_input = tf.expand_dims(sents[:, t], 1)   
        predictions, hidden = model(step_input, hidden)
        loss += loss_function(sents[:, t+1], predictions)

    batch_loss = loss / int(sents.shape[1]-1)
  
    return batch_loss

In [20]:
EPOCHS = 20

for epoch in range(EPOCHS):
    start = time.time()
    print ('Epoch {} start ...... '.format(epoch + 1))
    total_loss = 0
    batch = 0
    for sents in train_gen():
        batch_loss = train_step(sents)
        total_loss += batch_loss
        batch += 1

        if batch % 100 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, batch, batch_loss))
    
    # Evaluate on dev set
    dev_loss = 0
    batch_eval = 0
    for sents in dev_gen():
        batch_loss = eval_step(sents)
        dev_loss += batch_loss
        batch_eval += 1

    print('Epoch {} finished in {} seconds. Training loss {:.4f}. Evaluation loss {:.4f}.'.format(epoch + 1, time.time() - start, total_loss / batch, dev_loss / batch_eval))

Epoch 1 start ...... 
Epoch 1 Batch 100 Loss 6.7293
Epoch 1 Batch 200 Loss 6.5328
Epoch 1 Batch 300 Loss 5.3926
Epoch 1 finished in 207.0101182460785 seconds. Training loss 6.5375. Evaluation loss 7.6251.
Epoch 2 start ...... 
Epoch 2 Batch 100 Loss 6.1411
Epoch 2 Batch 200 Loss 5.9658
Epoch 2 Batch 300 Loss 5.0788
Epoch 2 finished in 195.47878861427307 seconds. Training loss 5.9646. Evaluation loss 6.4226.
Epoch 3 start ...... 
Epoch 3 Batch 100 Loss 5.8104
Epoch 3 Batch 200 Loss 5.6663
Epoch 3 Batch 300 Loss 4.8821
Epoch 3 finished in 196.83963751792908 seconds. Training loss 5.6274. Evaluation loss 6.4168.
Epoch 4 start ...... 
Epoch 4 Batch 100 Loss 5.5786
Epoch 4 Batch 200 Loss 5.4648
Epoch 4 Batch 300 Loss 4.6743
Epoch 4 finished in 205.0242691040039 seconds. Training loss 5.4069. Evaluation loss 6.3162.
Epoch 5 start ...... 
Epoch 5 Batch 100 Loss 5.3691
Epoch 5 Batch 200 Loss 5.2574
Epoch 5 Batch 300 Loss 4.4994
Epoch 5 finished in 199.41108393669128 seconds. Training loss 5.19

In [0]:
def generate_step(batch_size):
    hidden = tf.Variable(tf.zeros([batch_size, hidden_size]))
    #hidden = tf.random.normal(shape=(batch_size, hidden_size), stddev=10)
    step_input = tf.random.uniform(shape=(batch_size, 1), maxval=nwords)
    #step_input = tf.fill([batch_size, 1], word_index['<start>'])
    results = [''] * batch_size
    for t in range(100): 
        predictions, hidden = model(step_input, hidden)
        predicted_id = tf.argmax(predictions, axis=1).numpy()

        results = [r + reverse_word_index.get(i, '<end>') + ' ' for (r, i) in zip(results, predicted_id)]
        
        # the predicted ID is fed back into the model
        step_input = tf.expand_dims(predicted_id, 1)
    return results

In [0]:
def print_sents(batch_size):
    for i, sent in enumerate(generate_step(batch_size)):
        print ('Sentence {} generated: '.format(i))
        words = sent.split()
        s = ''
        for w in words:
            if w != '<end>':
                s = s + w + ' '
            else:
                break
        print (s)

In [26]:
print_sents(20)

Sentence 0 generated: 
by comparison the commerce department 's <unk> 
Sentence 1 generated: 
henderson deb lynch liberation mather mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather analytical mather 
Sentence 2 gen