Process:
1-Load the data
2-Preprocess the data(tokenizing punctualtion, lower case except for names, split)
3-Create dictionary from the words
4-Build and train the model
5-Generate the new text

In [1]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from nltk.corpus import stopwords
import nltk
import string
import warnings
import re
from tensorflow.contrib import legacy_seq2seq


In [2]:
#Load data
f = open("HarryPotterCh1_SorcererStone.txt","r") 
textbook = f.read()
#print(textbook)
f.close()

Here, we create our own function for preprocessing, tokenization as well as creating index of the words.

In [3]:
def preprocess_text(string):
    #Tokenize the punctuations in order to consider them as words
    string = string.replace("\n", " nextline ")
    string = string.replace(".", " periodmark")
    string = string.replace(":", " colonmark")
    string = string.replace(";", " semicolonmark")
    string = string.replace(",", " commamark")
    string = string.replace("?", " questionmark")
    string = string.replace("!", " exclamationmark")
    string = string.replace("...", " 3dots")
    string = string.replace("--", " 2dashes")
    string = string.replace('"', '')
    
    #Names to remain capitalized
    Names = ['Harry', 'Potter', 'James', 'Abbott' ,'Hannah', 'Susan', 'Bones', 'Minerva','McGonagall','Professor',
             'Sprout','Malfoy','Draco','Voldemort','Rubeus','Percy','Weasley','Hagrid','Fred','Scabbers','Hedwig',
            'Sirius','Hermione','Granger','Ronald','Mrs.','Mr.','Norris','Argus','Filch','Nick']

    toLower = lambda x: " ".join( a if a in Names else a.lower()
            for a in x.split() )

    string= toLower(string)
    return string

In [4]:
text=preprocess_text(textbook)
print(text)
text_words=text.split()
text_len=len(text_words)



['Harry',
 'Potter',
 'and',
 'the',
 "sorcerer's",
 'stone',
 'nextline',
 'nextline',
 'chapter',
 'one',
 'nextline',
 'nextline',
 'the',
 'boy',
 'who',
 'lived',
 'nextline',
 'nextline',
 'mr',
 'periodmark',
 'and',
 'mrs',
 'periodmark',
 'dursley',
 'commamark',
 'of',
 'number',
 'four',
 'commamark',
 'privet',
 'drive',
 'commamark',
 'were',
 'proud',
 'to',
 'say',
 'that',
 'they',
 'were',
 'perfectly',
 'normal',
 'commamark',
 'thank',
 'you',
 'very',
 'much',
 'periodmark',
 'they',
 'were',
 'the',
 'last',
 'people',
 "you'd",
 'expect',
 'to',
 'be',
 'involved',
 'in',
 'anything',
 'strange',
 'or',
 'mysterious',
 'commamark',
 'because',
 'they',
 'just',
 "didn't",
 'hold',
 'with',
 'such',
 'nonsense',
 'periodmark',
 'nextline',
 'nextline',
 'mr',
 'periodmark',
 'dursley',
 'was',
 'the',
 'director',
 'of',
 'a',
 'firm',
 'called',
 'grunnings',
 'commamark',
 'which',
 'made',
 'drills',
 'periodmark',
 'he',
 'was',
 'a',
 'big',
 'commamark',
 'be

In [5]:
#Create dictionary from list of words in text
def dictionary(words):
    #create list of words without their dupications 
    words=set(words)
    #map word to index
    indx = {key: i for i, key in enumerate(words)}
    return indx


In [6]:
#Convert from index to words
def get_by_key_dict(indx_word,words_dict):
    for word, indx in words_dict.iteritems():    
        if indx == indx_word:
            return word

In [7]:
words_index=dictionary(text_words)
words_index

{"periodmark'\xe2\x80\x9c": 0,
 'wrought-iron': 1,
 'both': 2554,
 'foul': 15,
 'four': 3,
 'woods': 4,
 'spiders': 5,
 'ornate': 24,
 'wizardry': 7,
 'Ronald': 8,
 "fluffy's": 9,
 'lord': 10,
 'flicking': 11,
 'three-thirty': 12,
 'sinking': 13,
 'figg': 14,
 'yellow': 2,
 'bringing': 104,
 'disturb': 17,
 'basics': 18,
 'wooden': 19,
 'wednesday': 20,
 '(except': 21,
 'specially': 22,
 'tired': 23,
 'hanging': 6,
 'bacon': 25,
 'second': 26,
 'crisply': 27,
 'sailed': 28,
 'scraped': 29,
 'iron-gray': 30,
 'thunder': 31,
 'fingers': 32,
 '(how': 33,
 "'smatter": 34,
 'pawed': 35,
 'galleons': 36,
 'hero': 37,
 '-then': 38,
 "norris's": 39,
 'here': 40,
 'reported': 41,
 'ashen-faced': 42,
 'shriek': 43,
 'substance': 265,
 'climbed': 45,
 'reports': 46,
 "i'd": 47,
 'transfixed': 48,
 "i'm": 49,
 'golden': 50,
 'explained': 51,
 'brought': 52,
 'stern': 53,
 'cheating': 54,
 'spoke': 55,
 'music': 56,
 'therefore': 57,
 "wine's": 58,
 'until': 59,
 'relax': 60,
 'hurt': 61,
 'glass':

Create sequences of 10 length (given 10 words as inputs, predict 1 word for output added to the previos words)

In [8]:
seq_len=10

In [9]:
def  create_model_inputs(batch_size):
    '''Define model inputs'''
    
    #Model's placeholders for inputs
    inputs = tf.placeholder(tf.int32, [None, None], name='inputs')
    targets = tf.placeholder(tf.int32, [None, None], name='targets')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')

    return inputs,targets,keep_prob

In [10]:
def  build_RNN(vocabulary_size,embedding_size,inputs,seq_len,num_hidden,lstm_layer_numbers,keep_prob,batch_size):
    '''Build RNN'''
    #Embedding Layer
    '''Intialize embeddings for the words. Embedding layer connects the words to the LSTM layers (words are embedded to the embedding_size vectors instead of vocabulary size vectors or one hot vectors). Here, provided by tensorflow, we used random_uniform distribution to create embeddings'''
    #tf.AUTO_REUSE for reuisng the same scope for generating as for traning
    with tf.variable_scope('rnn1', reuse=tf.AUTO_REUSE):
        embedding = tf.Variable(tf.random_uniform((vocabulary_size, embedding_size), -1, 1))
        embed = tf.nn.embedding_lookup(embedding, inputs)
        #Define LSTM layers
        lstms=[]
        for i in range(lstm_layer_numbers):
            lstms.append(tf.contrib.rnn.BasicLSTMCell(num_hidden))
        # Add regularization dropout to the LSTM cells
        drops = [tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob) for lstm in lstms]
        # Stack up multiple LSTM layers
        stacked_lstm = tf.contrib.rnn.MultiRNNCell(drops)
        # Getting the initial state
        initial_state = stacked_lstm.zero_state(batch_size, tf.float32)

        #outputs, final_state = tf.nn.dynamic_rnn(stacked_lstm, embed, initial_state=initial_state)
        #need to unstack the sequence of input into a list of tensors
        seq_input = [tf.squeeze(i,[1]) for i in tf.split(embed,seq_len,1)] 

        outputs, final_state = legacy_seq2seq.rnn_decoder(seq_input, initial_state, stacked_lstm, loop_function=None)

    return initial_state, outputs, final_state

In [11]:
def get_batches(text_words,text_len,seq_len, batch_size,number_of_words_in_one_batch,n_batches):
    '''Using generator to return batches'''
    
    #This makes the input data to be compatible with seq_len
    text_all_batches = text_words[:n_batches*number_of_words_in_one_batch]
    index_text_all_batches=[]
    for i in text_all_batches:
        if i in words_index:
            index_text_all_batches.append(words_index[i])
        
    #index_text_all_batches={v for k,v in words_index.items() if k in text_all_batches}
    #get word index for words for batches
    input_seq=list(index_text_all_batches)
    output_seq=input_seq
    output_seq.append(output_seq.pop(output_seq[0]))
    for ii in range(0, len(text_all_batches), number_of_words_in_one_batch):
        yield input_seq[ii:ii+number_of_words_in_one_batch], output_seq[ii:ii+number_of_words_in_one_batch]

In [12]:
#Define Parameters
# number of units
n_input= len(words_index)
num_hidden = 256
lstm_layer_numbers=3
embed_size=256
batch_size= 256
learning_rate=0.01

Create a graph for training

In [13]:
graph0 = tf.Graph()
#There exits a global default graph created by tenserflow, for new graphs we need to set them as a default graph
with graph0.as_default():
    inputs,targets,keep_prob=create_model_inputs(batch_size)
    initial_state, outputs, final_state = build_RNN(n_input,embed_size,inputs,seq_len,num_hidden,lstm_layer_numbers,keep_prob,batch_size)
    # Loss and optimizer
    logits = tf.contrib.layers.fully_connected(outputs, n_input, activation_fn=None)
    
    probs = tf.nn.softmax(logits, name='probs')
    print(probs.shape)
    cost =  tf.contrib.seq2seq.sequence_loss(
            logits,
            targets,
            tf.ones([batch_size, (seq_len)])    
        )                                   

    optimizer = tf.train.AdamOptimizer(learning_rate)
    
    # Using gradian clipping for exploding gradients
    gradients = optimizer.compute_gradients(cost)
    capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients]
    train_op = optimizer.apply_gradients(capped_gradients) 
    
    init_op = tf.global_variables_initializer()
    saver = tf.train.Saver()

#Execute the graph for training
with tf.Session(graph=graph0) as sess:
    sess = tf.Session(graph=graph0)
    sess.run(init_op)
    number_of_words_in_one_batch= seq_len*batch_size
    n_batches = text_len//number_of_words_in_one_batch
    epochs = 2000
    for epoch in range(epochs):
        state = sess.run(initial_state)
        avg_cost_train = 0 
        avg_acc_train= 0
        for ii, (x, y) in enumerate(get_batches(text_words,text_len,seq_len,batch_size,number_of_words_in_one_batch,n_batches), 1):
            #need to reshape y to feed it to targets
            y = np.array(y).reshape(batch_size,(seq_len))
            x = np.array(x).reshape(batch_size,(seq_len))

            state, loss, _= sess.run([final_state, cost,train_op], feed_dict={inputs: x,
                                                            targets: y,keep_prob: 0.8,initial_state: state})

            avg_cost_train += loss / n_batches
        if(epoch%100==0):
            print(epoch)
            print("cost_train=", avg_cost_train) 
    #Save the model into a file 
    checkpoint="./model/savedmodel.ckpt"
    save_path = saver.save(sess, checkpoint)


turns::::
check var
rnn1
_ReuseMode.AUTO_REUSE
(10, 256, 6270)
0
('cost_train=', 6.9062103095807519)
1
('cost_train=', 6.5776993977396128)
2
('cost_train=', 6.6002634073558619)
3
('cost_train=', 6.605960582431992)
4
('cost_train=', 6.5898668264087892)
5
('cost_train=', 6.5946887794293865)
6
('cost_train=', 6.6064140043760595)
7
('cost_train=', 6.5451811991239843)
8
('cost_train=', 6.4487959711175211)
9
('cost_train=', 6.410576343536377)
10
('cost_train=', 6.3703755955947079)
11
('cost_train=', 6.3164631943953671)
12
('cost_train=', 6.2750856625406373)
13
('cost_train=', 6.2722261328446232)
14
('cost_train=', 6.3119700456920436)
15
('cost_train=', 6.2551944632279239)
16
('cost_train=', 6.2839138131392636)
17
('cost_train=', 6.2328876696134872)
18
('cost_train=', 6.2091769795668759)
19
('cost_train=', 6.1794095290334612)
20
('cost_train=', 6.1827650572124275)
21
('cost_train=', 6.1552371853276311)
22
('cost_train=', 6.170054523568405)
23
('cost_train=', 6.1436559275576954)
24
('cost_trai

('cost_train=', 3.8051329851150522)
208
('cost_train=', 3.8246228443948853)
209
('cost_train=', 3.8248584082252104)
210
('cost_train=', 3.8049986299715544)
211
('cost_train=', 3.8165845870971689)
212
('cost_train=', 3.7777221893009383)
213
('cost_train=', 3.7950869798660274)
214
('cost_train=', 3.7989784039949117)
215
('cost_train=', 3.8015537889380195)
216
('cost_train=', 3.8010091154198897)
217
('cost_train=', 3.8043290125696285)
218
('cost_train=', 3.812056403411062)
219
('cost_train=', 3.8014590551978658)
220
('cost_train=', 3.757863094932155)
221
('cost_train=', 3.7805245801022167)
222
('cost_train=', 3.7512250887720207)
223
('cost_train=', 3.7590644171363436)
224
('cost_train=', 3.745925972336217)
225
('cost_train=', 3.7583323654375573)
226
('cost_train=', 3.7676995616210132)
227
('cost_train=', 3.7437282737932707)
228
('cost_train=', 3.7282196283340459)
229
('cost_train=', 3.731359199473733)
230
('cost_train=', 3.71727430820465)
231
('cost_train=', 3.7324447820061128)
232
('cost

('cost_train=', 3.4598185514148918)
414
('cost_train=', 3.473378162635)
415
('cost_train=', 3.4437709419350875)
416
('cost_train=', 3.4646804709183554)
417
('cost_train=', 3.4753090205945472)
418
('cost_train=', 3.4850028188605067)
419
('cost_train=', 3.4859708359366972)
420
('cost_train=', 3.459133305047688)
421
('cost_train=', 3.4727305926774688)
422
('cost_train=', 3.4961011911693372)
423
('cost_train=', 3.4545850816525907)
424
('cost_train=', 3.4796698595348157)
425
('cost_train=', 3.4763703534477646)
426
('cost_train=', 3.4828710116838155)
427
('cost_train=', 3.4571425852022681)
428
('cost_train=', 3.4609213816492175)
429
('cost_train=', 3.4421424489272265)
430
('cost_train=', 3.4521585263703995)
431
('cost_train=', 3.4494235452852751)
432
('cost_train=', 3.4280263185501099)
433
('cost_train=', 3.4383595178001807)
434
('cost_train=', 3.439398351468538)
435
('cost_train=', 3.4320145782671476)
436
('cost_train=', 3.4485154026433036)
437
('cost_train=', 3.4228352371015047)
438
('cost

('cost_train=', 3.2402551487872473)
620
('cost_train=', 3.2483449358689156)
621
('cost_train=', 3.2547227520691715)
622
('cost_train=', 3.2555388149462248)
623
('cost_train=', 3.2516654541617949)
624
('cost_train=', 3.2455030240510636)
625
('cost_train=', 3.289547418293199)
626
('cost_train=', 3.2431329676979468)
627
('cost_train=', 3.269793240647568)
628
('cost_train=', 3.2564090302115991)
629
('cost_train=', 3.2533976968966032)
630
('cost_train=', 3.2718209655661328)
631
('cost_train=', 3.2610049561450354)
632
('cost_train=', 3.2647775411605839)
633
('cost_train=', 3.2477400428370422)
634
('cost_train=', 3.2522061812250236)
635
('cost_train=', 3.246664887980411)
636
('cost_train=', 3.2527393290871065)
637
('cost_train=', 3.2467851764277413)
638
('cost_train=', 3.251650182824386)
639
('cost_train=', 3.2606588037390454)
640
('cost_train=', 3.2535390477431445)
641
('cost_train=', 3.2664076403567672)
642
('cost_train=', 3.2421405754591297)
643
('cost_train=', 3.2629662752151498)
644
('co

('cost_train=', 3.1447572833613342)
826
('cost_train=', 3.1500424083910481)
827
('cost_train=', 3.1519544626537117)
828
('cost_train=', 3.1464977327146033)
829
('cost_train=', 3.1597322288312411)
830
('cost_train=', 3.1269829712415995)
831
('cost_train=', 3.1713750613363163)
832
('cost_train=', 3.1330305275164152)
833
('cost_train=', 3.1584582579763305)
834
('cost_train=', 3.1443508486998719)
835
('cost_train=', 3.1495097373661229)
836
('cost_train=', 3.1573038163938016)
837
('cost_train=', 3.1413695874967074)
838
('cost_train=', 3.1769152816973243)
839
('cost_train=', 3.1491696018921682)
840
('cost_train=', 3.123594547572889)
841
('cost_train=', 3.1413227319717416)
842
('cost_train=', 3.1457125073985055)
843
('cost_train=', 3.1291480942776326)
844
('cost_train=', 3.1440040061348373)
845
('cost_train=', 3.121578837695874)
846
('cost_train=', 3.1479961809359107)
847
('cost_train=', 3.1343005958356347)
848
('cost_train=', 3.1543406875509965)
849
('cost_train=', 3.1261477846848336)
850
('

('cost_train=', 3.0538752392718669)
1031
('cost_train=', 3.0805501310448893)
1032
('cost_train=', 3.0684804916381836)
1033
('cost_train=', 3.0829649661716672)
1034
('cost_train=', 3.0764102245631975)
1035
('cost_train=', 3.0662572823072738)
1036
('cost_train=', 3.0628477021267528)
1037
('cost_train=', 3.06918350018953)
1038
('cost_train=', 3.0646901820835328)
1039
('cost_train=', 3.0610099842673857)
1040
('cost_train=', 3.074184273418628)
1041
('cost_train=', 3.0948436636673771)
1042
('cost_train=', 3.0666444364346952)
1043
('cost_train=', 3.0515254924171851)
1044
('cost_train=', 3.0392366584978601)
1045
('cost_train=', 3.0592459565714778)
1046
('cost_train=', 3.0487764320875477)
1047
('cost_train=', 3.0527339985496118)
1048
('cost_train=', 3.0471441055599016)
1049
('cost_train=', 3.063056901881569)
1050
('cost_train=', 3.0529283598849659)
1051
('cost_train=', 3.0541347139760062)
1052
('cost_train=', 3.0798886888905588)
1053
('cost_train=', 3.0616646377663868)
1054
('cost_train=', 3.06

('cost_train=', 3.0126653219524182)
1232
('cost_train=', 3.0036866664886466)
1233
('cost_train=', 3.018117321164985)
1234
('cost_train=', 3.0177027865460047)
1235
('cost_train=', 3.0215618673123812)
1236
('cost_train=', 3.0191100271124589)
1237
('cost_train=', 3.0137587472012166)
1238
('cost_train=', 3.0268923232429907)
1239
('cost_train=', 3.0108816498204285)
1240
('cost_train=', 2.9955439379340727)
1241
('cost_train=', 3.0021946806656694)
1242
('cost_train=', 3.0083809463601363)
1243
('cost_train=', 3.0175494583029501)
1244
('cost_train=', 3.0136173900805021)
1245
('cost_train=', 3.0094256463803748)
1246
('cost_train=', 3.0074061469027868)
1247
('cost_train=', 2.9980082009968001)
1248
('cost_train=', 3.0134155248340804)
1249
('cost_train=', 3.0050425153029594)
1250
('cost_train=', 3.0022665136738822)
1251
('cost_train=', 3.0005490528909791)
1252
('cost_train=', 3.0167182558461247)
1253
('cost_train=', 3.0105602992208373)
1254
('cost_train=', 3.0165770932247762)
1255
('cost_train=', 2

('cost_train=', 2.9906539226833138)
1433
('cost_train=', 2.9757688861144218)
1434
('cost_train=', 2.9755851908733968)
1435
('cost_train=', 2.9556188646115751)
1436
('cost_train=', 2.9508291482925411)
1437
('cost_train=', 2.9560097142269721)
1438
('cost_train=', 2.9459919741279204)
1439
('cost_train=', 2.9615156838768413)
1440
('cost_train=', 2.9441201184925276)
1441
('cost_train=', 2.9370828992442086)
1442
('cost_train=', 2.9469847428171256)
1443
('cost_train=', 2.9515801919134046)
1444
('cost_train=', 2.9446808664422282)
1445
('cost_train=', 2.9647162588019111)
1446
('cost_train=', 2.9472099856326452)
1447
('cost_train=', 2.9603587075283646)
1448
('cost_train=', 2.9581530721564047)
1449
('cost_train=', 2.9786205981907088)
1450
('cost_train=', 2.9671557137840674)
1451
('cost_train=', 2.9774452573374699)
1452
('cost_train=', 2.968587737334401)
1453
('cost_train=', 2.9544504755421692)
1454
('cost_train=', 2.9568451580248385)
1455
('cost_train=', 2.9484998991614888)
1456
('cost_train=', 2

('cost_train=', 2.9067921826713961)
1634
('cost_train=', 2.9079988441969222)
1635
('cost_train=', 2.9127724233426551)
1636
('cost_train=', 2.9077586688493429)
1637
('cost_train=', 2.8909707822297754)
1638
('cost_train=', 2.9048823494660234)
1639
('cost_train=', 2.9221311054731678)
1640
('cost_train=', 2.9295101730447062)
1641
('cost_train=', 2.9247851873698987)
1642
('cost_train=', 2.9021582854421522)
1643
('cost_train=', 2.916767973648875)
1644
('cost_train=', 2.9051834595830819)
1645
('cost_train=', 2.9453886998327152)
1646
('cost_train=', 2.9060424629010653)
1647
('cost_train=', 2.9426693351645219)
1648
('cost_train=', 2.9274336727041952)
1649
('cost_train=', 2.9161328140058012)
1650
('cost_train=', 2.9186965038901875)
1651
('cost_train=', 2.9149161200774349)
1652
('cost_train=', 2.9189994084207629)
1653
('cost_train=', 2.9165839082316354)
1654
('cost_train=', 2.9110216968937914)
1655
('cost_train=', 2.9195073592035401)
1656
('cost_train=', 2.9231090859362947)
1657
('cost_train=', 2

('cost_train=', 2.8845798090884567)
1835
('cost_train=', 2.8881237318641255)
1836
('cost_train=', 2.9001004821375798)
1837
('cost_train=', 2.8993925546344959)
1838
('cost_train=', 2.8847259722257923)
1839
('cost_train=', 2.8751220703125009)
1840
('cost_train=', 2.8789728221140409)
1841
('cost_train=', 2.8896308698152242)
1842
('cost_train=', 2.8810039570457064)
1843
('cost_train=', 2.8860058282551004)
1844
('cost_train=', 2.906618864912736)
1845
('cost_train=', 2.8840484054465034)
1846
('cost_train=', 2.8908598548487618)
1847
('cost_train=', 2.8845183033692208)
1848
('cost_train=', 2.9103238143418957)
1849
('cost_train=', 2.8827286958694458)
1850
('cost_train=', 2.8845707617307967)
1851
('cost_train=', 2.8851926954169023)
1852
('cost_train=', 2.880777057848479)
1853
('cost_train=', 2.8837678997140186)
1854
('cost_train=', 2.8715304575468372)
1855
('cost_train=', 2.8785874655372221)
1856
('cost_train=', 2.8964175174110811)
1857
('cost_train=', 2.9020494222640991)
1858
('cost_train=', 2.

In [14]:
batch_size=1
#use the same sequence length as for trained model to generate the new words
seq_len=10

Craete a graph for generating text
Based on the train model, each time it uses 10 previous words to generate the next word (therfore, first define 10 prime words to begin generating 11th word and then consider 2th to 11th words for generating the 12th word and so on)

In [15]:
tf.reset_default_graph()
graph1 = tf.Graph()

with graph1.as_default():
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    inputs = tf.placeholder(tf.int32, [batch_size, None], name='inputs')
    initial_state, outputs, final_state = build_RNN(n_input,embed_size,inputs,seq_len,num_hidden,lstm_layer_numbers,keep_prob,batch_size)
    logits = tf.contrib.layers.fully_connected(outputs, n_input, activation_fn=None)
    probs = tf.nn.softmax(logits, name='probs')
    init_op1 = tf.global_variables_initializer()
    saver = tf.train.Saver()
    
#Execute the graph1 for generating text
with tf.Session(graph=graph1) as sess2:
    #This part to compare varibles in checkpoints with what we have
    var_name_list = [v.name for v in tf.trainable_variables()]
    #print(var_name_list)
    from tensorflow.python import pywrap_tensorflow

    reader = pywrap_tensorflow.NewCheckpointReader(checkpoint)
    var_to_shape_map = reader.get_variable_to_shape_map()
    #print(var_to_shape_map)
    
    #Execute the graph to generate the text
    sess2.run(init_op1)
    
    #Number of words to generate 
    num_gen=1000
    
    # Load the model
    saved.restore(sess2, checkpoint)
    
    #Just for the record 
    saved_dict = {}
    for x in tf.trainable_variables():
          saved_dict[x.name] = x
    
    #10 first words to begin with
    start_word="Harry Potter went to see the street even it was"
    start_words=start_word.split(" ")
    print(start_words)
    
    #The sentence of text we have so far as a list of words' indexes
    genertaed_sentence=[words_index[w] for w in start_words]
    genertaed_sentence=[genertaed_sentence]
    
    state = sess2.run(initial_state)
    #Choose the last 10 words we have from text to predict the next word
    last_words=[(genertaed_sentence[0])[-10:]]
    
    
    for i in range(0,num_gen):
        #seq_len is 10
        seq_len=len(last_words[0])
        
        print("last_words",last_words)
        
        next_word = np.zeros((1,seq_len))
        next_word = [w for w in last_words]
        #print(np.array(next_word).shape,inputs.shape,type(next_word),next_word)
        next_word = np.asarray(next_word) 

        #next_word = next_word.reshape(batch_size,seq_len)
            
        print("next_word",next_word)
         #next_word = np.array(next_word).reshape(batch_size,(seq_len))
        prediction,state= sess2.run([probs,final_state], feed_dict={inputs: next_word,
                                                                    keep_prob: 0.8,initial_state: state})
        #print("Prediction's shape",prediction.shape," Prediction:",prediction)
        #print("Element we choose for prediction: ",prediction[len(last_words[0])-1,0])
        #Based on prediction's shape still not sure about part len(last_words[0])-1, which element to choose
        
        #Next predicted word by choosing the word with max probability
        next_predicted_word = np.argmax(prediction[len(last_words[0])-1,0])
        
        #append the new word to the previous sentences
        genertaed_sentence[0].append(next_predicted_word)
        #save in last_word to use it in for loop
        last_words=[(genertaed_sentence[0])[-10:]]
        print("last_words",last_words)
        print ("genertaed_sentence",genertaed_sentence)

#Conver index to words
list_gen=[get_by_key_dict(word_int,words_index) for word_int in genertaed_sentence[0]]
sen=' '.join(list_gen)
#Convert back the tokens for punctuations
sen=sen.replace("nextline", "\n")
sen=sen.replace("periodmark", ".")
sen=sen.replace("colonmark", ":")
sen=sen.replace("commamark", ",")
sen=sen.replace("semicommamark", ";")
sen=sen.replace("questionmark", "?")
sen=sen.replace("exclamationmark", "!")
sen=sen.replace("3dots", "...")
sen=sen.replace("2dashes", "--")
#Print the whole text
print(sen)

turns::::
check var
rnn1
_ReuseMode.AUTO_REUSE
INFO:tensorflow:Restoring parameters from ./model/savedmodel.ckpt
['Harry', 'Potter', 'went', 'to', 'see', 'the', 'street', 'even', 'it', 'was']
('last_words', [[745, 5425, 3193, 789, 990, 1841, 4722, 3110, 3356, 1885]])
('next_word', array([[ 745, 5425, 3193,  789,  990, 1841, 4722, 3110, 3356, 1885]]))
((10, 1, 6270), array([[[  7.19286592e-23,   6.10792232e-18,   1.41872994e-20, ...,
           2.49921253e-16,   2.02738071e-21,   3.38773362e-13]],

       [[  0.00000000e+00,   0.00000000e+00,   4.47065433e-20, ...,
           3.28781500e-34,   0.00000000e+00,   5.38304346e-15]],

       [[  9.89209717e-31,   1.46191742e-31,   9.07462906e-30, ...,
           1.27813019e-25,   0.00000000e+00,   1.18297191e-30]],

       ..., 
       [[  1.32767135e-32,   8.77711216e-33,   9.49186147e-29, ...,
           1.31403456e-28,   3.85898403e-34,   5.26979136e-07]],

       [[  6.70003371e-27,   4.48882951e-35,   1.07219992e-32, ...,
           1.2

((10, 1, 6270), array([[[  0.00000000e+00,   9.72358874e-38,   3.23019270e-16, ...,
           0.00000000e+00,   0.00000000e+00,   6.14422665e-16]],

       [[  0.00000000e+00,   0.00000000e+00,   2.95059827e-34, ...,
           0.00000000e+00,   0.00000000e+00,   2.43458862e-34]],

       [[  2.78950379e-22,   1.46194307e-26,   1.21535378e-20, ...,
           0.00000000e+00,   0.00000000e+00,   1.16841609e-26]],

       ..., 
       [[  1.62923266e-37,   5.82830281e-36,   1.19827676e-15, ...,
           4.85494894e-35,   1.90828549e-30,   8.77549126e-21]],

       [[  1.42695156e-23,   0.00000000e+00,   3.13576286e-20, ...,
           0.00000000e+00,   0.00000000e+00,   4.46327624e-23]],

       [[  0.00000000e+00,   3.03302598e-28,   1.38104445e-14, ...,
           0.00000000e+00,   0.00000000e+00,   1.61484962e-38]]], dtype=float32))
[  0.00000000e+00   3.03302598e-28   1.38104445e-14 ...,   0.00000000e+00
   0.00000000e+00   1.61484962e-38]
rid
('last_words', [[1885, 2903, 1877, 15

('next_word', array([[4382,  257,  292, 1524, 1877, 3895,  276,  976, 1877, 3895]]))
((10, 1, 6270), array([[[  0.00000000e+00,   0.00000000e+00,   1.56623015e-28, ...,
           0.00000000e+00,   0.00000000e+00,   2.02242429e-38]],

       [[  8.91555229e-37,   0.00000000e+00,   7.50848240e-21, ...,
           0.00000000e+00,   1.63910312e-27,   4.24810096e-14]],

       [[  9.72742884e-36,   0.00000000e+00,   1.67576388e-08, ...,
           1.22785670e-29,   2.37492368e-28,   1.98943035e-14]],

       ..., 
       [[  9.78042744e-32,   0.00000000e+00,   9.15853686e-32, ...,
           0.00000000e+00,   6.15088892e-31,   1.20559765e-19]],

       [[  3.51239209e-20,   0.00000000e+00,   7.87267405e-31, ...,
           5.15239091e-35,   5.38202534e-31,   5.00146712e-23]],

       [[  0.00000000e+00,   8.40042072e-29,   0.00000000e+00, ...,
           1.38622121e-36,   8.34670858e-33,   1.49191263e-31]]], dtype=float32))
[  0.00000000e+00   8.40042072e-29   0.00000000e+00 ...,   1.38622

   6.96990694e-22   1.02103269e-21]
lot
('last_words', [[4003, 2587, 4467, 976, 976, 632, 1877, 1841, 3895, 1345]])
('genertaed_sentence', [[745, 5425, 3193, 789, 990, 1841, 4722, 3110, 3356, 1885, 2903, 1877, 1524, 4891, 3350, 1744, 887, 1247, 5773, 5066, 5066, 5066, 5066, 5443, 5432, 976, 4382, 257, 292, 1524, 1877, 3895, 276, 976, 1877, 3895, 976, 6037, 2246, 1038, 3895, 976, 49, 3895, 4003, 2587, 4467, 976, 976, 632, 1877, 1841, 3895, 1345]])
('last_words', [[4003, 2587, 4467, 976, 976, 632, 1877, 1841, 3895, 1345]])
('next_word', array([[4003, 2587, 4467,  976,  976,  632, 1877, 1841, 3895, 1345]]))
((10, 1, 6270), array([[[  0.00000000e+00,   5.79906569e-32,   1.45813191e-28, ...,
           0.00000000e+00,   0.00000000e+00,   0.00000000e+00]],

       [[  2.01908898e-29,   1.50864512e-20,   2.55946514e-22, ...,
           0.00000000e+00,   1.81280027e-38,   0.00000000e+00]],

       [[  7.40518935e-31,   6.84660366e-20,   6.95834637e-28, ...,
           0.00000000e+00,   1.38471

nextline
('last_words', [[976, 5168, 3895, 689, 976, 4467, 4767, 4797, 3400, 3895]])
('genertaed_sentence', [[745, 5425, 3193, 789, 990, 1841, 4722, 3110, 3356, 1885, 2903, 1877, 1524, 4891, 3350, 1744, 887, 1247, 5773, 5066, 5066, 5066, 5066, 5443, 5432, 976, 4382, 257, 292, 1524, 1877, 3895, 276, 976, 1877, 3895, 976, 6037, 2246, 1038, 3895, 976, 49, 3895, 4003, 2587, 4467, 976, 976, 632, 1877, 1841, 3895, 1345, 789, 976, 976, 976, 976, 3499, 1841, 976, 5168, 3895, 689, 976, 4467, 4767, 4797, 3400, 3895]])
('last_words', [[976, 5168, 3895, 689, 976, 4467, 4767, 4797, 3400, 3895]])
('next_word', array([[ 976, 5168, 3895,  689,  976, 4467, 4767, 4797, 3400, 3895]]))
((10, 1, 6270), array([[[  1.20749033e-22,   1.21572770e-20,   7.13622611e-18, ...,
           1.18121755e-20,   1.63447893e-15,   1.09828236e-17]],

       [[  3.47893846e-28,   1.64661486e-22,   1.68664259e-20, ...,
           1.45007141e-28,   8.56660431e-09,   3.07942746e-15]],

       [[  0.00000000e+00,   6.84838886e-

((10, 1, 6270), array([[[  6.14360069e-35,   2.52526699e-34,   2.57108215e-31, ...,
           0.00000000e+00,   1.85156229e-32,   3.70311177e-36]],

       [[  0.00000000e+00,   1.86271766e-36,   2.38199154e-15, ...,
           0.00000000e+00,   6.00155469e-23,   2.48905531e-30]],

       [[  8.64388636e-35,   4.84023170e-32,   1.45822232e-12, ...,
           1.27518844e-37,   5.40878567e-23,   3.59341816e-16]],

       ..., 
       [[  0.00000000e+00,   1.23380157e-35,   0.00000000e+00, ...,
           0.00000000e+00,   2.01804521e-31,   0.00000000e+00]],

       [[  0.00000000e+00,   7.38679522e-30,   1.69256579e-35, ...,
           0.00000000e+00,   1.41476824e-36,   8.46836943e-27]],

       [[  1.71150646e-34,   1.70446806e-36,   1.53941062e-23, ...,
           0.00000000e+00,   3.39581385e-26,   7.07914295e-30]]], dtype=float32))
[  1.71150646e-34   1.70446806e-36   1.53941062e-23 ...,   0.00000000e+00
   3.39581385e-26   7.07914295e-30]
periodmark
('last_words', [[618, 976, 199

((10, 1, 6270), array([[[  0.00000000e+00,   0.00000000e+00,   1.94009140e-29, ...,
           0.00000000e+00,   2.07113633e-34,   0.00000000e+00]],

       [[  1.86532945e-24,   1.53776830e-27,   3.46030136e-20, ...,
           1.74532497e-22,   9.89712218e-12,   1.46903243e-15]],

       [[  4.95701175e-36,   7.78868714e-30,   0.00000000e+00, ...,
           2.06296869e-24,   0.00000000e+00,   0.00000000e+00]],

       ..., 
       [[  1.67634649e-36,   1.33633814e-34,   4.61695211e-30, ...,
           2.38016240e-35,   5.53396475e-33,   0.00000000e+00]],

       [[  2.95365566e-33,   1.65390921e-24,   2.86854073e-34, ...,
           4.67703534e-34,   3.95816044e-26,   0.00000000e+00]],

       [[  0.00000000e+00,   1.12315371e-35,   1.04494018e-26, ...,
           0.00000000e+00,   2.74972202e-27,   7.18296212e-36]]], dtype=float32))
[  0.00000000e+00   1.12315371e-35   1.04494018e-26 ...,   0.00000000e+00
   2.74972202e-27   7.18296212e-36]
nextline
('last_words', [[2508, 3961, 187

((10, 1, 6270), array([[[  0.00000000e+00,   5.68167148e-28,   1.78667729e-37, ...,
           0.00000000e+00,   0.00000000e+00,   0.00000000e+00]],

       [[  2.20893526e-35,   1.56484957e-24,   1.08663479e-24, ...,
           0.00000000e+00,   3.47128946e-34,   0.00000000e+00]],

       [[  5.97620108e-38,   1.61950001e-30,   5.26866959e-31, ...,
           2.23778178e-31,   2.97080776e-22,   0.00000000e+00]],

       ..., 
       [[  5.15615616e-35,   3.27705890e-31,   3.97571064e-37, ...,
           1.00803872e-31,   9.28045070e-28,   1.64816330e-28]],

       [[  0.00000000e+00,   2.33714836e-38,   0.00000000e+00, ...,
           0.00000000e+00,   3.06858636e-38,   0.00000000e+00]],

       [[  1.55133098e-35,   2.20802451e-36,   2.11307944e-34, ...,
           0.00000000e+00,   1.05183818e-30,   0.00000000e+00]]], dtype=float32))
[  1.55133098e-35   2.20802451e-36   2.11307944e-34 ...,   0.00000000e+00
   1.05183818e-30   0.00000000e+00]
cold
('last_words', [[1877, 3492, 2786, 3

((10, 1, 6270), array([[[  1.20824960e-29,   1.96568913e-24,   3.15720106e-12, ...,
           0.00000000e+00,   2.66603055e-33,   2.12779657e-14]],

       [[  0.00000000e+00,   4.16925578e-30,   3.93074086e-21, ...,
           7.04112621e-34,   0.00000000e+00,   7.28052387e-21]],

       [[  1.79760621e-30,   2.02056495e-21,   8.25509294e-18, ...,
           2.52667429e-27,   7.37812868e-29,   3.61245783e-13]],

       ..., 
       [[  0.00000000e+00,   2.28706679e-29,   4.03062595e-34, ...,
           5.19097945e-37,   0.00000000e+00,   0.00000000e+00]],

       [[  4.86046225e-32,   8.68278507e-30,   2.58452114e-20, ...,
           2.62105451e-38,   2.40839532e-20,   1.31077713e-26]],

       [[  5.97125311e-29,   2.65355079e-29,   4.26688868e-23, ...,
           0.00000000e+00,   2.28476656e-28,   1.38946471e-21]]], dtype=float32))
[  5.97125311e-29   2.65355079e-29   4.26688868e-23 ...,   0.00000000e+00
   2.28476656e-28   1.38946471e-21]
did
('last_words', [[3891, 3051, 1877, 38

brooms
('last_words', [[3895, 4273, 1877, 976, 3895, 3175, 976, 1877, 789, 4311]])
('genertaed_sentence', [[745, 5425, 3193, 789, 990, 1841, 4722, 3110, 3356, 1885, 2903, 1877, 1524, 4891, 3350, 1744, 887, 1247, 5773, 5066, 5066, 5066, 5066, 5443, 5432, 976, 4382, 257, 292, 1524, 1877, 3895, 276, 976, 1877, 3895, 976, 6037, 2246, 1038, 3895, 976, 49, 3895, 4003, 2587, 4467, 976, 976, 632, 1877, 1841, 3895, 1345, 789, 976, 976, 976, 976, 3499, 1841, 976, 5168, 3895, 689, 976, 4467, 4767, 4797, 3400, 3895, 3037, 3895, 3498, 1370, 976, 5329, 976, 618, 976, 1999, 3895, 5507, 26, 1877, 3895, 976, 976, 49, 3476, 976, 3961, 3895, 976, 2508, 3961, 1877, 976, 3478, 4738, 3895, 1006, 5212, 3895, 6117, 4572, 618, 6003, 1877, 976, 3895, 1877, 3492, 2786, 3895, 3895, 1877, 976, 976, 1595, 2031, 6175, 976, 1877, 5213, 1322, 2801, 3891, 3051, 1877, 3895, 976, 1877, 4093, 5956, 3651, 3507, 4008, 758, 976, 5109, 3933, 976, 3895, 4273, 1877, 976, 3895, 3175, 976, 1877, 789, 4311]])
('last_words', [[3895

('genertaed_sentence', [[745, 5425, 3193, 789, 990, 1841, 4722, 3110, 3356, 1885, 2903, 1877, 1524, 4891, 3350, 1744, 887, 1247, 5773, 5066, 5066, 5066, 5066, 5443, 5432, 976, 4382, 257, 292, 1524, 1877, 3895, 276, 976, 1877, 3895, 976, 6037, 2246, 1038, 3895, 976, 49, 3895, 4003, 2587, 4467, 976, 976, 632, 1877, 1841, 3895, 1345, 789, 976, 976, 976, 976, 3499, 1841, 976, 5168, 3895, 689, 976, 4467, 4767, 4797, 3400, 3895, 3037, 3895, 3498, 1370, 976, 5329, 976, 618, 976, 1999, 3895, 5507, 26, 1877, 3895, 976, 976, 49, 3476, 976, 3961, 3895, 976, 2508, 3961, 1877, 976, 3478, 4738, 3895, 1006, 5212, 3895, 6117, 4572, 618, 6003, 1877, 976, 3895, 1877, 3492, 2786, 3895, 3895, 1877, 976, 976, 1595, 2031, 6175, 976, 1877, 5213, 1322, 2801, 3891, 3051, 1877, 3895, 976, 1877, 4093, 5956, 3651, 3507, 4008, 758, 976, 5109, 3933, 976, 3895, 4273, 1877, 976, 3895, 3175, 976, 1877, 789, 4311, 976, 6102, 976, 976, 789, 4235, 4855, 4625, 2210, 574, 976, 976, 976, 1370, 4970, 2899, 976]])
('last_word

((10, 1, 6270), array([[[  3.43843663e-35,   1.57517804e-30,   1.41885471e-29, ...,
           2.22287864e-34,   0.00000000e+00,   1.51948526e-25]],

       [[  7.45913614e-34,   0.00000000e+00,   1.90759630e-22, ...,
           5.01180089e-27,   9.05668255e-30,   6.48979301e-25]],

       [[  8.66769394e-28,   2.60615289e-33,   2.58287179e-20, ...,
           3.85648817e-37,   3.51093236e-36,   1.64737412e-29]],

       ..., 
       [[  2.76764746e-20,   1.67351005e-27,   1.87242038e-19, ...,
           4.08927168e-28,   3.48450035e-21,   2.07994424e-21]],

       [[  1.85772968e-26,   3.44423467e-21,   1.07700637e-26, ...,
           8.54370209e-29,   6.95971574e-30,   1.18251512e-22]],

       [[  7.65214129e-33,   9.35088271e-33,   9.67833241e-20, ...,
           2.46504494e-35,   5.63590748e-24,   2.49955163e-19]]], dtype=float32))
[  7.65214129e-33   9.35088271e-33   9.67833241e-20 ...,   2.46504494e-35
   5.63590748e-24   2.49955163e-19]
have
('last_words', [[976, 4874, 936, 976

((10, 1, 6270), array([[[  1.79476822e-38,   0.00000000e+00,   6.60783498e-21, ...,
           0.00000000e+00,   1.45879824e-21,   9.14976948e-12]],

       [[  4.22573643e-34,   7.88037624e-38,   5.79070845e-16, ...,
           0.00000000e+00,   9.35922862e-35,   1.73916560e-29]],

       [[  2.37991022e-34,   1.06391659e-24,   3.71699001e-17, ...,
           2.41966049e-32,   8.79040645e-35,   6.70993884e-24]],

       ..., 
       [[  7.81423209e-24,   6.07029586e-29,   5.90962557e-10, ...,
           4.18776680e-31,   1.79753790e-25,   8.05739951e-30]],

       [[  4.90547531e-33,   0.00000000e+00,   2.13321272e-09, ...,
           0.00000000e+00,   4.13277182e-29,   1.83520956e-20]],

       [[  3.72520168e-18,   1.04275365e-21,   7.52968434e-13, ...,
           1.06572308e-23,   2.18699523e-17,   1.38915386e-08]]], dtype=float32))
[  3.72520168e-18   1.04275365e-21   7.52968434e-13 ...,   1.06572308e-23
   2.18699523e-17   1.38915386e-08]
nextline
('last_words', [[2533, 3895, 976

((10, 1, 6270), array([[[  7.87877639e-25,   2.27001820e-10,   6.39584208e-21, ...,
           1.24973810e-30,   1.57712453e-16,   5.14419920e-23]],

       [[  3.04189639e-38,   4.35055113e-25,   8.83605134e-26, ...,
           3.89607830e-24,   2.87438513e-15,   1.07216722e-27]],

       [[  9.60673768e-34,   2.22032489e-30,   4.89652474e-27, ...,
           0.00000000e+00,   3.83973225e-29,   2.48201871e-32]],

       ..., 
       [[  3.23951799e-30,   4.15835671e-10,   1.08227013e-31, ...,
           3.00047298e-34,   2.53832158e-19,   3.61694262e-34]],

       [[  5.87083520e-37,   1.03354889e-35,   9.93794952e-36, ...,
           0.00000000e+00,   6.70518996e-22,   0.00000000e+00]],

       [[  0.00000000e+00,   1.29954023e-34,   1.56060463e-28, ...,
           1.70573560e-38,   1.02167117e-28,   1.03677970e-34]]], dtype=float32))
[  0.00000000e+00   1.29954023e-34   1.56060463e-28 ...,   1.70573560e-38
   1.02167117e-28   1.03677970e-34]
commamark
('last_words', [[3270, 316, 400

((10, 1, 6270), array([[[  0.00000000e+00,   4.21972238e-31,   0.00000000e+00, ...,
           0.00000000e+00,   1.82360672e-35,   0.00000000e+00]],

       [[  1.61479904e-38,   7.49259585e-33,   1.98866131e-17, ...,
           2.00184502e-31,   2.86683224e-16,   7.55710509e-23]],

       [[  2.44824675e-32,   5.47189950e-30,   3.16885577e-19, ...,
           5.42795709e-35,   4.72796981e-38,   6.06833995e-21]],

       ..., 
       [[  1.10040303e-25,   1.16932612e-21,   2.12844154e-33, ...,
           3.15930198e-28,   1.16882039e-24,   5.96661944e-29]],

       [[  0.00000000e+00,   0.00000000e+00,   0.00000000e+00, ...,
           0.00000000e+00,   7.10137671e-27,   4.88987864e-38]],

       [[  0.00000000e+00,   2.99530714e-38,   5.75580533e-34, ...,
           0.00000000e+00,   1.13475612e-35,   0.00000000e+00]]], dtype=float32))
[  0.00000000e+00   2.99530714e-38   5.75580533e-34 ...,   0.00000000e+00
   1.13475612e-35   0.00000000e+00]
ron
('last_words', [[3361, 3895, 2182, 97

owl
('last_words', [[1877, 976, 1877, 789, 4749, 3895, 3391, 1877, 3895, 6218]])
('genertaed_sentence', [[745, 5425, 3193, 789, 990, 1841, 4722, 3110, 3356, 1885, 2903, 1877, 1524, 4891, 3350, 1744, 887, 1247, 5773, 5066, 5066, 5066, 5066, 5443, 5432, 976, 4382, 257, 292, 1524, 1877, 3895, 276, 976, 1877, 3895, 976, 6037, 2246, 1038, 3895, 976, 49, 3895, 4003, 2587, 4467, 976, 976, 632, 1877, 1841, 3895, 1345, 789, 976, 976, 976, 976, 3499, 1841, 976, 5168, 3895, 689, 976, 4467, 4767, 4797, 3400, 3895, 3037, 3895, 3498, 1370, 976, 5329, 976, 618, 976, 1999, 3895, 5507, 26, 1877, 3895, 976, 976, 49, 3476, 976, 3961, 3895, 976, 2508, 3961, 1877, 976, 3478, 4738, 3895, 1006, 5212, 3895, 6117, 4572, 618, 6003, 1877, 976, 3895, 1877, 3492, 2786, 3895, 3895, 1877, 976, 976, 1595, 2031, 6175, 976, 1877, 5213, 1322, 2801, 3891, 3051, 1877, 3895, 976, 1877, 4093, 5956, 3651, 3507, 4008, 758, 976, 5109, 3933, 976, 3895, 4273, 1877, 976, 3895, 3175, 976, 1877, 789, 4311, 976, 6102, 976, 976, 789,

((10, 1, 6270), array([[[  3.01809271e-22,   2.13888580e-27,   6.42953276e-15, ...,
           1.34700449e-26,   2.55886282e-15,   9.22496593e-11]],

       [[  2.77557902e-20,   1.96394404e-19,   1.52419169e-19, ...,
           2.51379444e-17,   7.72458299e-27,   1.34838740e-06]],

       [[  0.00000000e+00,   6.34170982e-27,   7.49427816e-21, ...,
           4.57373605e-30,   0.00000000e+00,   4.17903125e-13]],

       ..., 
       [[  1.21587902e-26,   4.85412751e-32,   2.07404030e-14, ...,
           1.36358566e-19,   4.82616622e-20,   6.02033060e-06]],

       [[  9.10305351e-25,   2.69051883e-25,   8.59519073e-17, ...,
           1.78438300e-28,   2.01975277e-11,   9.23743368e-14]],

       [[  0.00000000e+00,   0.00000000e+00,   4.48706132e-15, ...,
           1.93399801e-35,   8.09711168e-22,   2.29697846e-19]]], dtype=float32))
[  0.00000000e+00   0.00000000e+00   4.48706132e-15 ...,   1.93399801e-35
   8.09711168e-22   2.29697846e-19]
nextline
('last_words', [[4382, 976, 4648

('genertaed_sentence', [[745, 5425, 3193, 789, 990, 1841, 4722, 3110, 3356, 1885, 2903, 1877, 1524, 4891, 3350, 1744, 887, 1247, 5773, 5066, 5066, 5066, 5066, 5443, 5432, 976, 4382, 257, 292, 1524, 1877, 3895, 276, 976, 1877, 3895, 976, 6037, 2246, 1038, 3895, 976, 49, 3895, 4003, 2587, 4467, 976, 976, 632, 1877, 1841, 3895, 1345, 789, 976, 976, 976, 976, 3499, 1841, 976, 5168, 3895, 689, 976, 4467, 4767, 4797, 3400, 3895, 3037, 3895, 3498, 1370, 976, 5329, 976, 618, 976, 1999, 3895, 5507, 26, 1877, 3895, 976, 976, 49, 3476, 976, 3961, 3895, 976, 2508, 3961, 1877, 976, 3478, 4738, 3895, 1006, 5212, 3895, 6117, 4572, 618, 6003, 1877, 976, 3895, 1877, 3492, 2786, 3895, 3895, 1877, 976, 976, 1595, 2031, 6175, 976, 1877, 5213, 1322, 2801, 3891, 3051, 1877, 3895, 976, 1877, 4093, 5956, 3651, 3507, 4008, 758, 976, 5109, 3933, 976, 3895, 4273, 1877, 976, 3895, 3175, 976, 1877, 789, 4311, 976, 6102, 976, 976, 789, 4235, 4855, 4625, 2210, 574, 976, 976, 976, 1370, 4970, 2899, 976, 1190, 976, 38

((10, 1, 6270), array([[[  6.88448441e-23,   5.23998661e-27,   1.19741713e-30, ...,
           2.47635899e-32,   5.20452535e-35,   2.21042900e-19]],

       [[  0.00000000e+00,   1.35315093e-32,   0.00000000e+00, ...,
           8.75008484e-38,   0.00000000e+00,   7.47476342e-29]],

       [[  1.82123622e-37,   7.32655125e-32,   9.95847002e-33, ...,
           1.57398041e-36,   7.70516721e-21,   6.98724591e-28]],

       ..., 
       [[  1.28208693e-36,   1.53359063e-29,   0.00000000e+00, ...,
           0.00000000e+00,   1.20034502e-29,   4.54076202e-30]],

       [[  3.10632125e-25,   1.37431404e-35,   2.56035478e-30, ...,
           0.00000000e+00,   2.92970192e-36,   6.35484589e-32]],

       [[  2.03331458e-18,   1.29583150e-16,   2.34904899e-20, ...,
           8.66697219e-25,   1.07974810e-20,   2.14578688e-21]]], dtype=float32))
[  2.03331458e-18   1.29583150e-16   2.34904899e-20 ...,   8.66697219e-25
   1.07974810e-20   2.14578688e-21]
who
('last_words', [[3365, 689, 976, 4033

((10, 1, 6270), array([[[  0.00000000e+00,   6.19994035e-37,   5.58051442e-29, ...,
           0.00000000e+00,   0.00000000e+00,   2.98450466e-22]],

       [[  3.02456691e-36,   1.98983167e-16,   4.04969277e-30, ...,
           0.00000000e+00,   0.00000000e+00,   4.90713453e-26]],

       [[  5.65375726e-38,   2.86967173e-29,   6.97348753e-34, ...,
           0.00000000e+00,   0.00000000e+00,   0.00000000e+00]],

       ..., 
       [[  0.00000000e+00,   1.14753743e-27,   1.62284964e-12, ...,
           7.48748002e-35,   1.68680923e-33,   5.59200001e-32]],

       [[  2.52186699e-29,   2.62576624e-17,   1.19370269e-28, ...,
           0.00000000e+00,   1.02564293e-34,   4.59349033e-20]],

       [[  0.00000000e+00,   2.93765222e-35,   2.81308076e-26, ...,
           0.00000000e+00,   3.97514574e-30,   4.54313641e-23]]], dtype=float32))
[  0.00000000e+00   2.93765222e-35   2.81308076e-26 ...,   0.00000000e+00
   3.97514574e-30   4.54313641e-23]
periodmark
('last_words', [[976, 976, 976

((10, 1, 6270), array([[[  3.46728072e-22,   8.75692403e-25,   9.21044038e-29, ...,
           8.88994077e-25,   1.01638238e-25,   3.27186788e-21]],

       [[  1.05619428e-30,   3.88292435e-18,   5.47979825e-31, ...,
           9.19006773e-32,   1.72853584e-24,   1.96695600e-22]],

       [[  0.00000000e+00,   1.90003164e-30,   0.00000000e+00, ...,
           3.92237798e-36,   6.59743220e-34,   6.80855355e-24]],

       ..., 
       [[  1.91431314e-23,   6.84481698e-21,   3.38823343e-15, ...,
           1.01332935e-26,   6.60143760e-34,   2.08671133e-17]],

       [[  1.99239443e-22,   2.02504948e-17,   2.12193043e-13, ...,
           7.14780871e-24,   3.92068691e-29,   4.51902030e-15]],

       [[  8.25140799e-31,   4.23343546e-24,   2.28869502e-31, ...,
           5.19305846e-21,   3.86441757e-26,   1.01631999e-22]]], dtype=float32))
[  8.25140799e-31   4.23343546e-24   2.28869502e-31 ...,   5.19305846e-21
   3.86441757e-26   1.01631999e-22]
through
('last_words', [[3895, 3895, 976,

((10, 1, 6270), array([[[  2.87505512e-31,   5.48421071e-24,   5.94683237e-25, ...,
           0.00000000e+00,   2.72914759e-32,   1.22125513e-22]],

       [[  7.61585251e-24,   3.86650037e-22,   1.13081610e-11, ...,
           4.35858669e-27,   3.19419894e-15,   9.35591682e-15]],

       [[  0.00000000e+00,   6.13913955e-28,   1.85255553e-25, ...,
           0.00000000e+00,   1.34892052e-34,   2.04248738e-26]],

       ..., 
       [[  2.47167772e-37,   2.34317714e-20,   1.15425854e-27, ...,
           3.08481889e-37,   1.96400724e-19,   3.03690042e-22]],

       [[  2.75926004e-37,   2.89646382e-27,   4.34545166e-28, ...,
           2.49954428e-37,   6.18101447e-27,   6.60652143e-33]],

       [[  3.25462686e-25,   2.83763102e-14,   6.10109061e-35, ...,
           7.04936100e-29,   7.70252811e-29,   1.23308716e-22]]], dtype=float32))
[  3.25462686e-25   2.83763102e-14   6.10109061e-35 ...,   7.04936100e-29
   7.70252811e-29   1.23308716e-22]
dairy
('last_words', [[3895, 750, 976, 11

('next_word', array([[3895, 3895,  292, 5981, 5921, 1841,   40, 1229, 3895, 3895]]))
((10, 1, 6270), array([[[  5.08104187e-37,   2.09551876e-29,   1.56540818e-25, ...,
           6.81460453e-31,   0.00000000e+00,   2.07419615e-22]],

       [[  5.45212832e-29,   4.05457076e-28,   2.92729145e-35, ...,
           2.24366654e-31,   0.00000000e+00,   3.83364819e-31]],

       [[  7.81900161e-28,   0.00000000e+00,   5.07596026e-28, ...,
           2.98947942e-38,   6.47946537e-34,   2.68638916e-27]],

       ..., 
       [[  1.04522805e-07,   4.39147908e-36,   1.01709367e-17, ...,
           4.01083494e-34,   4.90448668e-30,   1.69244330e-17]],

       [[  1.02506920e-18,   1.26580786e-30,   1.55301495e-06, ...,
           1.88698820e-33,   2.20626253e-27,   1.67538493e-19]],

       [[  4.89495390e-33,   5.70570348e-29,   4.39577429e-22, ...,
           2.86223676e-32,   0.00000000e+00,   9.68606742e-18]]], dtype=float32))
[  4.89495390e-33   5.70570348e-29   4.39577429e-22 ...,   2.86223

((10, 1, 6270), array([[[  0.00000000e+00,   9.08410808e-32,   2.26544096e-27, ...,
           0.00000000e+00,   1.50551563e-19,   2.67183307e-23]],

       [[  1.28436982e-28,   5.32373132e-19,   3.07138924e-18, ...,
           1.56845516e-25,   0.00000000e+00,   5.48516977e-25]],

       [[  1.77551391e-29,   2.54881077e-31,   3.74589774e-20, ...,
           0.00000000e+00,   0.00000000e+00,   2.37494015e-21]],

       ..., 
       [[  0.00000000e+00,   5.57353285e-36,   1.29369604e-29, ...,
           0.00000000e+00,   1.62646930e-37,   0.00000000e+00]],

       [[  3.55283666e-37,   6.21336922e-28,   2.03630275e-23, ...,
           0.00000000e+00,   9.51441772e-29,   3.84409207e-25]],

       [[  7.13504913e-34,   1.28691814e-27,   3.20048090e-16, ...,
           0.00000000e+00,   3.19155422e-25,   1.72161655e-24]]], dtype=float32))
[  7.13504913e-34   1.28691814e-27   3.20048090e-16 ...,   0.00000000e+00
   3.19155422e-25   1.72161655e-24]
minutes
('last_words', [[1213, 976, 2587,

((10, 1, 6270), array([[[  1.48952272e-25,   0.00000000e+00,   5.36428125e-15, ...,
           8.42554809e-29,   9.80860287e-32,   4.89294414e-18]],

       [[  3.24500221e-34,   3.82349723e-22,   1.46896158e-11, ...,
           4.09763069e-24,   1.48256398e-24,   6.35691451e-21]],

       [[  3.08620743e-21,   0.00000000e+00,   4.82313118e-20, ...,
           2.29121164e-27,   6.51725292e-34,   5.60524523e-21]],

       ..., 
       [[  3.59951551e-23,   3.49556138e-32,   1.53764534e-14, ...,
           5.93330002e-33,   1.79752217e-32,   3.96586389e-37]],

       [[  1.57562009e-19,   2.18264081e-25,   2.05719514e-16, ...,
           6.07595691e-32,   7.08602204e-19,   1.00045946e-27]],

       [[  9.64284612e-33,   0.00000000e+00,   1.29222826e-12, ...,
           1.96404244e-37,   1.31750539e-36,   1.94114713e-21]]], dtype=float32))
[  9.64284612e-33   0.00000000e+00   1.29222826e-12 ...,   1.96404244e-37
   1.31750539e-36   1.94114713e-21]
to
('last_words', [[6034, 976, 566, 976, 

Harry Potter went to see the street even it was she , letters Hedwig suppose aunt always this rid uncle uncle uncle uncle of dudley . -- much had letters , 
 things . , 
 . which real leaning 
 . i'm 
 that's look have . . Malfoy , the 
 lot to . . . . me the . chocolate 
 around . have shook over nicolas 
 noticed 
 marks face . corridors . where . didn't 
 far second , 
 . . i'm felt . nimbus 
 . usual nimbus , . fell leaves 
 last he's 
 will thought where Filch , . 
 , father Hermione 
 
 , . . Weasley cold know . , he'd hogwarts hands again he , 
 . , mountain struggled first did through gone . ron stood . 
 forward , . 
 quirrell's . , to brooms . wall . . to jerked top about these note . . . face expected gray . try . 
 . , this why though . ran guarding . . think father 
 against have dark , . given new 
 . harry's 
 . , 
 
 had for from 
 
 their , 
 green 
 nighttime sure through this . to 
 the grounds , . it's , . the around . in 
 between . smiling 
 
 cry quick ron . , 
 