In [10]:
import tensorflow as tf
import numpy as np
import os
import time
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense,Embedding,GRU
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RNN, LSTM, RepeatVector

In [11]:
char2idx = {'\n': 0, '\r': 1, ' ': 2, '!': 3, '$': 4, '%': 5, '(': 6, ')': 7, '*': 8, ',': 9, '-': 10, '.': 11, '/': 12, '0': 13, '1': 14, '2': 15, '3': 16, '4': 17, '5': 18, '6': 19, '7': 20, '8': 21, '9': 22, ':': 23, ';': 24, '?': 25, '@': 26, 'A': 27, 'B': 28, 'C': 29, 'D': 30, 'E': 31, 'F': 32, 'G': 33, 'H': 34, 'I': 35, 'J': 36, 'K': 37, 'L': 38, 'M': 39, 'N': 40, 'O': 41, 'P': 42, 'Q': 43, 'R': 44, 'S': 45, 'T': 46, 'U': 47, 'V': 48, 'W': 49, 'X': 50, 'Y': 51, 'Z': 52, '[': 53, ']': 54, '`': 55, 'a': 56, 'b': 57, 'c': 58, 'd': 59, 'e': 60, 'f': 61, 'g': 62, 'h': 63, 'i': 64, 'j': 65, 'k': 66, 'l': 67, 'm': 68, 'n': 69, 'o': 70, 'p': 71, 'q': 72, 'r': 73, 's': 74, 't': 75, 'u': 76, 'v': 77, 'w': 78, 'x': 79, 'y': 80, 'z': 81, '‘': 82, '’': 83, '“': 84, '”': 85, '\ufeff': 86}
idx2char = ['\n', '\r', ' ', '!', '$', '%' ,'(', ')', '*', ',', '-', '.', '/', 
            '0' ,'1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', '@',
            'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I','J', 'K', 'L', 'M', 'N', 'O',
            'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '`',
            'a', 'b' ,'c' ,'d', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
            'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '‘', '’', '“', '”', '\ufeff']
print(len(char2idx))
print(len(idx2char))


87
87


In [12]:
def to_text(sample):
    return ''.join([idx2char[int(x)] for x in sample])

In [13]:
def build_model(batch_size=64):
    vocab_size = len(char2idx)
    model = tf.keras.Sequential()
    model.add(Embedding(vocab_size, 256, batch_input_shape=[batch_size, None]))
    model.add(LSTM(1024, return_sequences=True,
                        stateful=True,#!!!
                        recurrent_initializer='glorot_uniform'))
    model.add(Dense(vocab_size))
    return model  
        
model = build_model(1)
model.summary()

def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
model.compile(optimizer='adam', loss=loss) #loss='sparse_categorical_crossentropy')

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 256)            22272     
_________________________________________________________________
lstm_1 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
dense_1 (Dense)              (1, None, 87)             89175     
Total params: 5,358,423
Trainable params: 5,358,423
Non-trainable params: 0
_________________________________________________________________


In [14]:
model.load_weights('weights_4000.h5')

In [15]:
def generate_text(model, start_string, size=1000,temperature=1.,verbose=0):
  # Evaluation step (generating text using the learned model)
    print('Text:',start_string,end='')

    # Convert the  start_string to numbers (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
 
    # Empty string to store the results
    text_generated = []

    model.reset_states()
    for i in range(size):
        if(verbose): print("====================================================")
        if(verbose): print('input:',to_text(input_eval.numpy()[0]))
        predictions = model(input_eval)

        if(verbose): print('"Best" prediction:',to_text(predictions.numpy().argmax(axis=2)[0]))
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        ## Taking alays the best prediction is NOT a good idea - easy to have a loop 
        # predicted_id = predictions.numpy().argmax(axis=1)[0]
        
        # It is better to generate a categorical distribution and take a character fro this distrbution
        predictions = predictions * temperature
        samples = tf.random.categorical(predictions, num_samples=10)
        if(verbose):
            print('sampled predictions:')
            for j in range(samples.shape[1]):
                  print(to_text(samples[:,j].numpy()), end=', ')
            print()        
            
        predicted_id = samples[-1,0].numpy()
        #print("Predicted_id",predicted_id)
        
        if(verbose): print('chosen_id',predicted_id,'letter:',idx2char[predicted_id])
 
        # We pass the predicted word as the next input to the model
        # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx2char[predicted_id])
        if(verbose):
            #print(text_generated)
            print(start_string + ''.join(text_generated))
        else:
            print(idx2char[predicted_id],end='')
        
    print()    
    return (start_string + ''.join(text_generated))

txt = generate_text(model, start_string="Mowgli ",size=500, temperature=1, verbose=0)
print()
print('='*50)
##print('The final generated text:\n',txt)

Text: Mowgli laire know you don’t. I’m not talking to you. You don’t know what blood
is.”

“When Bamagain.

Mowgli laid his hands on Baloo and Bagheera to get them away, and the
two great beasts stathe blish in a beat fuss about it.

“I’ve no nursery to fight for,” said Kotick. “I only want to show you
all a plowg four-year-old holluschickie
romped down from Hutchinson’s Hill crying: “Out of the way, youngsteremember that I huntid Mowgli, and he bounded away.

“That is a man. That is all a man,” 



In [18]:
txt = generate_text(model, start_string="Fight ",size=500, temperature=1, verbose=0)


Text: Fight just
as well have petted Teddy for playing in the dust. Rikki was not fight for,” said Kotick. “I only want to show you
all a placiuni, who had been trained under the Law of the Jungle, did not like
or understand this kind of life. Toomai the Jungle-People to cross
each other’s path. But whenever they found a s reg/fundraising.  Contributions to the Project Gutenberg
Literary Archive Foundation are tax deductibli, who had been trained under the Law of their master. Aaa-ssp! We must remind t
