In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
import inspect
import pickle
import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('word_to_id.pickle', 'rb') as handle:
    word_to_id = pickle.load(handle)

with open('id_to_word.pickle', 'rb') as handle:
    id_to_word = pickle.load(handle)

In [3]:
data_path = 'data/'

In [4]:
class config(object):
    vocab_size = 9999
    batch_size = 20
    num_steps = 20  # sequence length
    hidden_size = 200  # number of hidden units in LSTM; 
    keep_prob = 0.5  # 1 - dropoff rate
    num_layers = 2  # number of LSTM layers
    max_grad_norm = 5  # max gradient 
    init_scale = 0.1  # the initial scale of the weights
    max_epoch = 4  # the number of epochs trained with the initial learning rate
    max_max_epoch = 13  # the total number of epochs for training
    learning_rate = 1.0  # the initial value of the learning rate
    lr_decay = 0.5  # the decay of the learning rate for each epoch after "max_epoch"

eval_config = config()
eval_config.batch_size = 1
eval_config.num_steps = 1

In [5]:
class PTBModel(object):
    """The PTB model."""

    def __init__(self, is_training, config, input_=None):
        batch_size = config.batch_size
        num_steps = config.num_steps
        hidden_size = config.hidden_size
        vocab_size = config.vocab_size
        
        if input_ is not None:
            # For normal training and validation
            self._input = input_
            self._input_data = input_.input_data
            self._targets = input_.targets
            
        else:
            # For text generations
            self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
            self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        def lstm_cell():

            if 'reuse' in inspect.getargspec(
                    tf.contrib.rnn.BasicLSTMCell.__init__).args:
                return tf.contrib.rnn.BasicLSTMCell(
                    hidden_size,
                    forget_bias=0.0,
                    state_is_tuple=True,
                    reuse=tf.get_variable_scope().reuse)
            else:
                return tf.contrib.rnn.BasicLSTMCell(
                    hidden_size,
                    forget_bias=0.0,
                    state_is_tuple=True)
            

    
        attn_cell = lstm_cell

        # Implement dropoff (for training only)
        if is_training and config.keep_prob < 1:

            def attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(), output_keep_prob=config.keep_prob)

        # Stacking multiple LSTMs
        attn_cells = [attn_cell() for _ in range(config.num_layers)]
        stacked_lstm = tf.contrib.rnn.MultiRNNCell(attn_cells, state_is_tuple=True)
        

        self._initial_state = stacked_lstm.zero_state(batch_size, tf.float32)
        

        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                "embedding", [vocab_size, hidden_size], dtype=tf.float32)
            input_embeddings = tf.nn.embedding_lookup(embedding, self.input_data)
            # The shape of `input_embeddings` is [batch_size, num_steps, hidden_size]
        
        # Implement dropoff (for training only)
        if is_training and config.keep_prob < 1:
            input_embeddings = tf.nn.dropout(input_embeddings, config.keep_prob)


        outputs = []
        state = self._initial_state
        
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                
                (cell_output, state) = stacked_lstm(input_embeddings[:, time_step, :], state)
                outputs.append(cell_output)
        
        output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, hidden_size])
        
        # Compute logits
        softmax_w = tf.get_variable(
            "softmax_w", [hidden_size, vocab_size], dtype=tf.float32)
        softmax_b = tf.get_variable(
            "softmax_b", [vocab_size], dtype=tf.float32)
        
        self._logits = logits = tf.matmul(output, softmax_w) + softmax_b

        
        # Sample based on the size of logits (used for text generation)
        self._logits_sample = tf.multinomial(logits, 1)
        
        # Reshape logits to be 3-D tensor for sequence loss
        logits = tf.reshape(logits, [batch_size, num_steps, vocab_size])


        loss = tf.contrib.seq2seq.sequence_loss(
            logits,  # shape: [batch_size, num_steps, vocab_size]
            self._targets,  # shape: [batch_size, num_steps]
            tf.ones([batch_size, num_steps], dtype=tf.float32),
            average_across_timesteps=False,
            average_across_batch=True)

        # Update the cost variables
        self._cost = cost = tf.reduce_sum(loss)
        self._final_state = state

        if not is_training:
            return

        # Optimizer
        self._lr = tf.Variable(0.0, trainable=False)
        
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(cost, tvars), config.max_grad_norm)
        
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())

        self._new_lr = tf.placeholder(
            tf.float32, shape=[], name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
        
        
    # To update learning rate
    def assign_lr(self, session, lr_value):
        session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
    
    @property
    def input(self):
        return self._input
    
    @property
    def input_data(self):
        return self._input_data
    
    @property
    def targets(self):
        return self._targets

    @property
    def initial_state(self):
        return self._initial_state

    @property
    def cost(self):
        return self._cost

    @property
    def final_state(self):
        return self._final_state

    @property
    def lr(self):
        return self._lr

    @property
    def train_op(self):
        return self._train_op
    
    @property
    def logits_sample(self):
        return self._logits_sample

In [42]:
import reader 
import random
k = reader._read_words("data/ptb.test.txt")
x = random.sample(range(1, len(k)), 5)
for i in range(5):
    print(str(i)+')'+ 'Word to be predicted:' + str(k[x[i]]))

0)Word to be predicted:greater
1)Word to be predicted:economy
2)Word to be predicted:to
3)Word to be predicted:he
4)Word to be predicted:a


In [55]:
# Insert starting word
feed = np.array(word_to_id['a']).reshape(1, 1)
text_length = 20

def generate_text(session, model, feed, text_length):
    state = session.run(model.initial_state)
    fetches = {
        "final_state": model.final_state,
        "logits": model.logits_sample
    }
    
    generated_text = [feed]
    
    for i in range(text_length):
        feed_dict = {}
        feed_dict[model.input_data] = feed
        
        for i, (c, h) in enumerate(model.initial_state):
            feed_dict[c] = state[i].c
            feed_dict[h] = state[i].h
        
        vals = session.run(fetches, feed_dict)

        state = vals["final_state"]
        feed = vals["logits"]
        
        
        generated_text.append(feed)

    return generated_text

In [56]:
model_path = 'assignment2'

In [57]:
with tf.Graph().as_default():
    initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)
    
    # Define model for text generations
    with tf.name_scope("Feed"):
        with tf.variable_scope("Model", reuse=None, initializer=initializer):
            mfeed = PTBModel(is_training=False, config=eval_config)
    
    sv = tf.train.Supervisor(logdir=model_path)
    with sv.managed_session() as session:
       
        ckpt = tf.train.get_checkpoint_state(model_path)
        sv.saver.restore(session, ckpt.model_checkpoint_path)
        print("Model restored from file: %s\n" % ckpt.model_checkpoint_path)
        
        generated_text = generate_text(session, mfeed, np.array(feed).reshape(1, 1), text_length)
        generated_text = ' '.join([id_to_word[text[0, 0]] for text in generated_text])
        print('Word to be predicted : a')
        print("Generated text:", generated_text)



INFO:tensorflow:Restoring parameters from assignment2\model.ckpt
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path assignment2\model.ckpt
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:Restoring parameters from assignment2\model.ckpt
Model restored from file: assignment2\model.ckpt

Word to be predicted : a
Generated text: a new orleans ship at the lead of it to help the <unk> but in the business <unk> disrupted <unk> <unk>
