In [1]:
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.contrib import legacy_seq2seq
import random
import numpy as np
import time
import os
from six.moves import cPickle

import codecs
import collections

import argparse


class Model():
    def __init__(self, data_dir,rnn_size,num_layers,model,batch_size,seq_length,num_epochs,save_every,grad_clip,learning_rate,decay_rate,gpu_mem,init_from, vocab_size, infer=False):#

        
        if infer:
            batch_size = 1
            seq_length = 1

        
        cell_fn = rnn.BasicLSTMCell

        cells = []
        for _ in range(num_layers):
            cell = cell_fn(rnn_size)
            cells.append(cell)
        self.cell = cell = rnn.MultiRNNCell(cells)
        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = cell.zero_state(batch_size, tf.float32)
        self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=True, dtype=tf.int32)
        self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1)
        self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False)
        self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False)
        tf.summary.scalar("time_batch", self.batch_time)
        def variable_summaries(var):
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
            variable_summaries(softmax_w)
            softmax_b = tf.get_variable("softmax_b", [vocab_size])
            variable_summaries(softmax_b)
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), seq_length, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)
        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([batch_size * seq_length])],
                vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        tf.summary.scalar("cost", self.cost)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                grad_clip)

        optimizer = tf.train.AdamOptimizer(self.lr)

        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

    def sample(self, sess, words, vocab, num, prime='first', sampling_type=1):
        '''
        This function is used to generate text, based on a saved model, with
        a text as input.
        It returns a string, composed of words chosen one by one by the model.
        '''
        def weighted_pick(weights):
            t = np.cumsum(weights)
            s = np.sum(weights)
            return(int(np.searchsorted(t, np.random.rand(1)*s)))


        
        ret = ''
        state = sess.run(self.cell.zero_state(1, tf.float32))
        ret = prime
        word = prime.split()[-1]
        for n in range(num):
            x = np.zeros((1, 1))
            x[0, 0] = vocab.get(word, 0)
            feed = {self.input_data: x, self.initial_state:state}
            [probs, state] = sess.run([self.probs, self.final_state], feed)
            p = probs[0]
            if sampling_type == 0:
                sample = np.argmax(p)
            elif sampling_type == 2:
                if word == '\n':
                    sample = weighted_pick(p)
                else:
                    sample = np.argmax(p)
            else:
                sample = weighted_pick(p)
            pred = words[sample]
            ret += ' ' + pred
            word = pred
        return ret


In [2]:
#data_dir = "C:\\Users\\SAIKUMAR\\Desktop\\vikram vqa"
data_dir= "/home/vibertron/Desktop"
rnn_size = 256 
num_layers = 2 
model = 'lstm' 
batch_size = 5 
seq_length = 25 
num_epochs = 50 
save_every = 100 
grad_clip = 5. 
learning_rate= 0.002 
decay_rate = 0.97 
gpu_mem = 0.666 
init_from = None
#input_file = os.path.join(data_dir, "corp.txt")
vocab_file = os.path.join(data_dir, "vocab1.pkl")
#with codecs.open(input_file, "r", encoding='utf8') as f:

f="""YOU don't know about me without you have read a book by the name of The
Adventures of Tom Sawyer; but that ain't no matter.  That book was made
by Mr. Mark Twain, and he told the truth, mainly.  There was things
which he stretched, but mainly he told the truth.  That is nothing.  I
never seen anybody but lied one time or another, without it was Aunt
Polly, or the widow, or maybe Mary.  Aunt Polly--Tom's Aunt Polly, she
is--and Mary, and the Widow Douglas is all told about in that book, which
is mostly a true book, with some stretchers, as I said before."""

data = f
data = data.replace(","," ")
data = data.replace("."," ")
data = data.replace("''"," ")
x_text = data.split()
word_counts = collections.Counter(x_text)
vocabulary_inv = [x[0] for x in word_counts.most_common()]
vocabulary_inv = list(sorted(vocabulary_inv))
vocab = {x: i for i, x in enumerate(vocabulary_inv)}
words = [x[0] for x in word_counts.most_common()]

vocab_size = len(words)
n=200 
prime = 'Il ' 
sample = 1 

#with open("C:\\Users\\SAIKUMAR\\Desktop\\vikram vqa\\__MACOSX\\caption_sneak\\data\\vocab1.pkl", 'rb') as f1:
        #words, vocab = cPickle.load(f1)
        
model = Model(data_dir,rnn_size,num_layers,model,batch_size,seq_length,num_epochs,save_every,grad_clip,learning_rate,decay_rate,gpu_mem,init_from, vocab_size,True)
with tf.Session() as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        #"THIS CKPT IS THE PROBLEM, I AM UNABLE TO OBTAIN THE STATE OF THE CHECKPOINT"
        model_checkpoint_path="/home/vibertron/Desktop/model_test.ckpt-99"
        ckpt = tf.train.get_checkpoint_state(model_checkpoint_path)
        print(ckpt)
        saver.restore(sess,model_checkpoint_path)
        results = model.sample(sess, words, vocab, n, prime, sample)


Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
None
INFO:tensorflow:Restoring parameters from /home/vibertron/Desktop/model_test.ckpt-99


In [3]:
print(results)

Il  don't Aunt mainly know me me you no you you about or but Polly Mr it me about lied That ain't read is--and but said maybe is--and Twain Twain said said YOU which is--and it Twain about of in a another name Twain as time anybody true but don't it it with There nothing name matter mainly she matter matter she Polly--Tom's all made all stretchers you Widow seen you seen the seen seen truth that a which Mr seen have Mr but before have That before me have before ain't in read in before in but YOU in That maybe is--and before is--and have That no truth Aunt all Widow true I truth matter name matter stretched stretched matter things things things mainly mainly Polly--Tom's Polly--Tom's made stretchers by Adventures it it seen seen Widow the truth seen which have which the have Polly have no maybe which mostly which me maybe maybe have have That in YOU he in That Twain That ain't Twain ain't the lied in me said YOU anybody of true matter Aunt true as name she mainly matter she Polly--Tom's