In [1]:
from __future__ import print_function
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.contrib import legacy_seq2seq
import random
import numpy as np
import time
import os
from six.moves import cPickle

import codecs
import collections

import argparse


class Model():
    def __init__(self, data_dir,rnn_size,num_layers,model,batch_size,seq_length,num_epochs,save_every,grad_clip,learning_rate,decay_rate,gpu_mem,init_from, vocab_size, infer=False):#

        
        if infer:
            batch_size = 1
            seq_length = 1

        
        cell_fn = rnn.BasicLSTMCell

        cells = []
        for _ in range(num_layers):
            cell = cell_fn(rnn_size)
            cells.append(cell)
        self.cell = cell = rnn.MultiRNNCell(cells)
        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = cell.zero_state(batch_size, tf.float32)
        self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=True, dtype=tf.int32)
        self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1)
        self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False)
        self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False)
        tf.summary.scalar("time_batch", self.batch_time)
        def variable_summaries(var):
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
            variable_summaries(softmax_w)
            softmax_b = tf.get_variable("softmax_b", [vocab_size])
            variable_summaries(softmax_b)
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
                inputs = tf.split(tf.nn.embedding_lookup(embedding, self.input_data), seq_length, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)
        outputs, last_state = legacy_seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([batch_size * seq_length])],
                vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        tf.summary.scalar("cost", self.cost)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                grad_clip)

        optimizer = tf.train.AdamOptimizer(self.lr)

        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

    def sample(self, sess, words, vocab, num, prime='first', sampling_type=1):
        '''
        This function is used to generate text, based on a saved model, with
        a text as input.
        It returns a string, composed of words chosen one by one by the model.
        '''
        def weighted_pick(weights):
            t = np.cumsum(weights)
            s = np.sum(weights)
            return(int(np.searchsorted(t, np.random.rand(1)*s)))


        
        ret = ''
        state = sess.run(self.cell.zero_state(1, tf.float32))
        ret = prime
        word = prime.split()[-1]
        for n in range(num):
            x = np.zeros((1, 1))
            x[0, 0] = vocab.get(word, 0)
            feed = {self.input_data: x, self.initial_state:state}
            [probs, state] = sess.run([self.probs, self.final_state], feed)
            p = probs[0]
            if sampling_type == 0:
                sample = np.argmax(p)
            elif sampling_type == 2:
                if word == '\n':
                    sample = weighted_pick(p)
                else:
                    sample = np.argmax(p)
            else:
                sample = weighted_pick(p)
            pred = words[sample]
            ret += ' ' + pred
            word = pred
        return ret


In [None]:
data_dir= "/home/vibertron/Desktop"
rnn_size = 256 
num_layers = 2 
model = 'lstm' 
batch_size = 2 
seq_length = 5 
num_epochs = 50
save_every = 100000
grad_clip = 5. 
learning_rate= 0.002 
decay_rate = 0.97 
gpu_mem = 0.666 
init_from = None

input_file = os.path.join(data_dir, "corpx.txt")
vocab_file = os.path.join(data_dir, "vocab1.pkl")

with codecs.open(input_file, "r", encoding=None) as f:
    data = f.read()
data = data.replace(","," ")
data = data.replace("."," ")
data = data.replace("''"," ")
x_text = data.split()

word_counts = collections.Counter(x_text)
vocabulary_inv = [x[0] for x in word_counts.most_common()]
vocabulary_inv = list(sorted(vocabulary_inv))

vocab = {x: i for i, x in enumerate(vocabulary_inv)}
words = [x[0] for x in word_counts.most_common()]

vocab_size = len(words)
with open(vocab_file, 'wb') as f:
    cPickle.dump((words), f)

    tensor = np.array(list(map(vocab.get, x_text)))
np.save("C:\\Users\\SAIKUMAR\\Desktop\\vikram vqa\\__MACOSX\\caption_sneak\\data\\tensorfile.npy",tensor)

print('tensor is:' + str(tensor))
print("It's shape: " + str(np.shape(tensor)))

num_batches = int((tensor.size) / (batch_size * seq_length))
print('number of batches is: ' + str(num_batches))

tensor = tensor[:num_batches * batch_size * seq_length]
print('The shape of the new tensor is: '+ str(np.shape(tensor)))
xdata = tensor
ydata = np.copy(tensor)

ydata[:-1] = xdata[1:]
print(xdata," and ",ydata)
ydata[-1] = xdata[0]
x_batches = np.split(xdata.reshape(batch_size, -1), num_batches, 1)
y_batches = np.split(ydata.reshape(batch_size, -1), num_batches, 1)

pointer = 0
#with open(os.path.join(save_dir, 'words_vocab.pkl'), 'wb') as f:
    #cPickle.dump((words, vocab), f)

model = Model(data_dir,rnn_size,num_layers,model,batch_size,seq_length,num_epochs,save_every,grad_clip,learning_rate,decay_rate,gpu_mem,init_from, vocab_size)
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_mem)
merged = tf.summary.merge_all()

with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        
        tf.global_variables_initializer().run()

        saver = tf.train.Saver(tf.global_variables())

        for e in range(model.epoch_pointer.eval(), num_epochs):
            sess.run(tf.assign(model.lr, learning_rate * (decay_rate ** e)))
            
            state = sess.run(model.initial_state)
            speed = 0
            pointer = 0
            
            if init_from is None:
                assign_op = model.epoch_pointer.assign(e)
                sess.run(assign_op)

            if init_from is not None:
                pointer = model.batch_pointer.eval()
                init_from = None

            for b in range(pointer, num_batches):
                start = time.time()
                x, y = x_batches[pointer], y_batches[pointer]
                pointer += 1
                feed = {model.input_data: x, model.targets: y, model.initial_state: state,
                        model.batch_time: speed}
                summary, train_loss, state, _, _ = sess.run([merged, model.cost, model.final_state,
                                                             model.train_op, model.inc_batch_pointer_op], feed)
            
                speed = time.time() - start

                if (e * num_batches + b) % batch_size == 0:
                    print("{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}" \
                        .format(e * num_batches + b,
                                num_epochs * num_batches,
                                e, train_loss, speed))
                
                if (e * num_batches + b) % save_every == 0 \
                        or (e==num_epochs-1 and b == num_batches-1): # save for the last result
                    checkpoint_path = os.path.join("C:\\Users\SAIKUMAR\\Desktop\\vikram vqa\\__MACOSX\\caption_sneak\\data", "model_test.ckpt")
                    saver.save(sess, checkpoint_path, global_step = e * num_batches + b)
                    print("model saved to {}".format(checkpoint_path))  

tensor is:[ 1656  4283  6392 ...  7382 11376 10369]
It's shape: (111811,)
number of batches is: 11181
The shape of the new tensor is: (111810,)
[ 1656  4283  6392 ...  5111  7382 11376]  and  [ 4283  6392  2016 ...  7382 11376 11376]
Instructions for updating:
This class is deprecated, please use tf.nn.rnn_cell.LSTMCell, which supports all the feature this cell currently has. Please replace the existing code with tf.nn.rnn_cell.LSTMCell(name='basic_lstm_cell').
0/559050 (epoch 0), train_loss = 9.385, time/batch = 0.111
model saved to C:\Users\SAIKUMAR\Desktop\vikram vqa\__MACOSX\caption_sneak\data/model_test.ckpt
2/559050 (epoch 0), train_loss = 9.386, time/batch = 0.092
4/559050 (epoch 0), train_loss = 9.380, time/batch = 0.095
6/559050 (epoch 0), train_loss = 9.370, time/batch = 0.083
8/559050 (epoch 0), train_loss = 9.293, time/batch = 0.084
10/559050 (epoch 0), train_loss = 9.192, time/batch = 0.089
12/559050 (epoch 0), train_loss = 8.426, time/batch = 0.080
14/559050 (epoch 0), tr

250/559050 (epoch 0), train_loss = 6.594, time/batch = 0.086
252/559050 (epoch 0), train_loss = 5.838, time/batch = 0.085
254/559050 (epoch 0), train_loss = 6.914, time/batch = 0.085
256/559050 (epoch 0), train_loss = 5.952, time/batch = 0.092
258/559050 (epoch 0), train_loss = 7.545, time/batch = 0.095
260/559050 (epoch 0), train_loss = 5.606, time/batch = 0.089
262/559050 (epoch 0), train_loss = 7.003, time/batch = 0.089
264/559050 (epoch 0), train_loss = 7.193, time/batch = 0.084
266/559050 (epoch 0), train_loss = 6.747, time/batch = 0.087
268/559050 (epoch 0), train_loss = 7.482, time/batch = 0.090
270/559050 (epoch 0), train_loss = 7.293, time/batch = 0.087
272/559050 (epoch 0), train_loss = 6.615, time/batch = 0.090
274/559050 (epoch 0), train_loss = 5.137, time/batch = 0.089
276/559050 (epoch 0), train_loss = 6.637, time/batch = 0.089
278/559050 (epoch 0), train_loss = 7.439, time/batch = 0.087
280/559050 (epoch 0), train_loss = 6.583, time/batch = 0.084
282/559050 (epoch 0), tr

522/559050 (epoch 0), train_loss = 6.195, time/batch = 0.085
524/559050 (epoch 0), train_loss = 6.861, time/batch = 0.085
526/559050 (epoch 0), train_loss = 7.467, time/batch = 0.084
528/559050 (epoch 0), train_loss = 7.558, time/batch = 0.087
530/559050 (epoch 0), train_loss = 6.592, time/batch = 0.086
532/559050 (epoch 0), train_loss = 6.372, time/batch = 0.083
534/559050 (epoch 0), train_loss = 7.322, time/batch = 0.086
536/559050 (epoch 0), train_loss = 7.028, time/batch = 0.085
538/559050 (epoch 0), train_loss = 7.962, time/batch = 0.083
540/559050 (epoch 0), train_loss = 7.936, time/batch = 0.085
542/559050 (epoch 0), train_loss = 6.607, time/batch = 0.087
544/559050 (epoch 0), train_loss = 8.291, time/batch = 0.088
546/559050 (epoch 0), train_loss = 8.516, time/batch = 0.088
548/559050 (epoch 0), train_loss = 7.276, time/batch = 0.085
550/559050 (epoch 0), train_loss = 6.977, time/batch = 0.086
552/559050 (epoch 0), train_loss = 7.917, time/batch = 0.084
554/559050 (epoch 0), tr

794/559050 (epoch 0), train_loss = 5.766, time/batch = 0.083
796/559050 (epoch 0), train_loss = 6.477, time/batch = 0.082
798/559050 (epoch 0), train_loss = 6.809, time/batch = 0.082
800/559050 (epoch 0), train_loss = 6.330, time/batch = 0.079
802/559050 (epoch 0), train_loss = 6.629, time/batch = 0.082
804/559050 (epoch 0), train_loss = 6.622, time/batch = 0.082
806/559050 (epoch 0), train_loss = 5.787, time/batch = 0.081
808/559050 (epoch 0), train_loss = 6.050, time/batch = 0.082
810/559050 (epoch 0), train_loss = 7.565, time/batch = 0.082
812/559050 (epoch 0), train_loss = 5.625, time/batch = 0.078
814/559050 (epoch 0), train_loss = 5.828, time/batch = 0.080
816/559050 (epoch 0), train_loss = 4.837, time/batch = 0.080
818/559050 (epoch 0), train_loss = 6.746, time/batch = 0.088
820/559050 (epoch 0), train_loss = 6.625, time/batch = 0.084
822/559050 (epoch 0), train_loss = 7.664, time/batch = 0.077
824/559050 (epoch 0), train_loss = 8.133, time/batch = 0.079
826/559050 (epoch 0), tr

1062/559050 (epoch 0), train_loss = 6.703, time/batch = 0.083
1064/559050 (epoch 0), train_loss = 7.770, time/batch = 0.080
1066/559050 (epoch 0), train_loss = 6.899, time/batch = 0.083
1068/559050 (epoch 0), train_loss = 7.084, time/batch = 0.081
1070/559050 (epoch 0), train_loss = 5.838, time/batch = 0.082
1072/559050 (epoch 0), train_loss = 8.040, time/batch = 0.082
1074/559050 (epoch 0), train_loss = 8.320, time/batch = 0.082
1076/559050 (epoch 0), train_loss = 6.019, time/batch = 0.082
1078/559050 (epoch 0), train_loss = 6.339, time/batch = 0.081
1080/559050 (epoch 0), train_loss = 6.132, time/batch = 0.082
1082/559050 (epoch 0), train_loss = 6.522, time/batch = 0.081
1084/559050 (epoch 0), train_loss = 6.171, time/batch = 0.081
1086/559050 (epoch 0), train_loss = 6.774, time/batch = 0.081
1088/559050 (epoch 0), train_loss = 7.286, time/batch = 0.081
1090/559050 (epoch 0), train_loss = 6.594, time/batch = 0.080
1092/559050 (epoch 0), train_loss = 7.906, time/batch = 0.081
1094/559

1330/559050 (epoch 0), train_loss = 4.873, time/batch = 0.082
1332/559050 (epoch 0), train_loss = 6.083, time/batch = 0.082
1334/559050 (epoch 0), train_loss = 7.442, time/batch = 0.082
1336/559050 (epoch 0), train_loss = 5.981, time/batch = 0.082
1338/559050 (epoch 0), train_loss = 7.396, time/batch = 0.083
1340/559050 (epoch 0), train_loss = 7.263, time/batch = 0.081
1342/559050 (epoch 0), train_loss = 8.792, time/batch = 0.082
1344/559050 (epoch 0), train_loss = 7.134, time/batch = 0.081
1346/559050 (epoch 0), train_loss = 7.447, time/batch = 0.081
1348/559050 (epoch 0), train_loss = 7.427, time/batch = 0.081
1350/559050 (epoch 0), train_loss = 6.103, time/batch = 0.082
1352/559050 (epoch 0), train_loss = 5.992, time/batch = 0.083
1354/559050 (epoch 0), train_loss = 6.817, time/batch = 0.081
1356/559050 (epoch 0), train_loss = 8.152, time/batch = 0.079
1358/559050 (epoch 0), train_loss = 6.455, time/batch = 0.081
1360/559050 (epoch 0), train_loss = 7.579, time/batch = 0.079
1362/559

1598/559050 (epoch 0), train_loss = 7.941, time/batch = 0.081
1600/559050 (epoch 0), train_loss = 8.145, time/batch = 0.080
1602/559050 (epoch 0), train_loss = 7.345, time/batch = 0.082
1604/559050 (epoch 0), train_loss = 5.996, time/batch = 0.081
1606/559050 (epoch 0), train_loss = 7.966, time/batch = 0.083
1608/559050 (epoch 0), train_loss = 8.464, time/batch = 0.080
1610/559050 (epoch 0), train_loss = 7.199, time/batch = 0.080
1612/559050 (epoch 0), train_loss = 7.696, time/batch = 0.082
1614/559050 (epoch 0), train_loss = 6.680, time/batch = 0.081
1616/559050 (epoch 0), train_loss = 5.391, time/batch = 0.081
1618/559050 (epoch 0), train_loss = 6.856, time/batch = 0.080
1620/559050 (epoch 0), train_loss = 5.933, time/batch = 0.082
1622/559050 (epoch 0), train_loss = 6.872, time/batch = 0.082
1624/559050 (epoch 0), train_loss = 6.270, time/batch = 0.082
1626/559050 (epoch 0), train_loss = 5.013, time/batch = 0.084
1628/559050 (epoch 0), train_loss = 8.595, time/batch = 0.081
1630/559

1866/559050 (epoch 0), train_loss = 6.963, time/batch = 0.083
1868/559050 (epoch 0), train_loss = 6.590, time/batch = 0.082
1870/559050 (epoch 0), train_loss = 5.342, time/batch = 0.082
1872/559050 (epoch 0), train_loss = 6.441, time/batch = 0.081
1874/559050 (epoch 0), train_loss = 7.824, time/batch = 0.080
1876/559050 (epoch 0), train_loss = 5.044, time/batch = 0.079
1878/559050 (epoch 0), train_loss = 5.841, time/batch = 0.081
1880/559050 (epoch 0), train_loss = 5.731, time/batch = 0.083
1882/559050 (epoch 0), train_loss = 7.119, time/batch = 0.081
1884/559050 (epoch 0), train_loss = 6.660, time/batch = 0.083
1886/559050 (epoch 0), train_loss = 6.526, time/batch = 0.083
1888/559050 (epoch 0), train_loss = 6.291, time/batch = 0.079
1890/559050 (epoch 0), train_loss = 7.522, time/batch = 0.080
1892/559050 (epoch 0), train_loss = 5.856, time/batch = 0.081
1894/559050 (epoch 0), train_loss = 6.559, time/batch = 0.081
1896/559050 (epoch 0), train_loss = 4.902, time/batch = 0.081
1898/559

2134/559050 (epoch 0), train_loss = 6.345, time/batch = 0.082
2136/559050 (epoch 0), train_loss = 5.977, time/batch = 0.079
2138/559050 (epoch 0), train_loss = 6.419, time/batch = 0.081
2140/559050 (epoch 0), train_loss = 5.382, time/batch = 0.082
2142/559050 (epoch 0), train_loss = 6.424, time/batch = 0.081
2144/559050 (epoch 0), train_loss = 6.188, time/batch = 0.079
2146/559050 (epoch 0), train_loss = 7.251, time/batch = 0.081
2148/559050 (epoch 0), train_loss = 5.914, time/batch = 0.080
2150/559050 (epoch 0), train_loss = 6.243, time/batch = 0.082
2152/559050 (epoch 0), train_loss = 7.039, time/batch = 0.080
2154/559050 (epoch 0), train_loss = 7.156, time/batch = 0.081
2156/559050 (epoch 0), train_loss = 5.934, time/batch = 0.081
2158/559050 (epoch 0), train_loss = 6.014, time/batch = 0.082
2160/559050 (epoch 0), train_loss = 5.980, time/batch = 0.081
2162/559050 (epoch 0), train_loss = 8.722, time/batch = 0.079
2164/559050 (epoch 0), train_loss = 4.951, time/batch = 0.080
2166/559

2402/559050 (epoch 0), train_loss = 6.726, time/batch = 0.079
2404/559050 (epoch 0), train_loss = 6.442, time/batch = 0.083
2406/559050 (epoch 0), train_loss = 5.348, time/batch = 0.081
2408/559050 (epoch 0), train_loss = 6.671, time/batch = 0.082
2410/559050 (epoch 0), train_loss = 5.742, time/batch = 0.083
2412/559050 (epoch 0), train_loss = 7.169, time/batch = 0.080
2414/559050 (epoch 0), train_loss = 6.419, time/batch = 0.082
2416/559050 (epoch 0), train_loss = 6.344, time/batch = 0.081
2418/559050 (epoch 0), train_loss = 6.356, time/batch = 0.081
2420/559050 (epoch 0), train_loss = 7.709, time/batch = 0.082
2422/559050 (epoch 0), train_loss = 7.112, time/batch = 0.080
2424/559050 (epoch 0), train_loss = 5.722, time/batch = 0.081
2426/559050 (epoch 0), train_loss = 7.112, time/batch = 0.083
2428/559050 (epoch 0), train_loss = 5.429, time/batch = 0.083
2430/559050 (epoch 0), train_loss = 6.990, time/batch = 0.084
2432/559050 (epoch 0), train_loss = 6.582, time/batch = 0.082
2434/559