# 03: LSTM Text Generation

In [1]:
import numpy as np
from pickle import load
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# no random seed included as it doesn't allow for random generation of text

In [2]:
# function to load the selected sequence data generated during our data preparation
def load_sequences(path_and_filename):
    sequence_data = open(path_and_filename).read()
    sequences = sequence_data.split('\n')
    
    words_in_seq = len(sequences[0].split()) - 1
    
    print(f'{len(sequences)} sequences have been loaded.')
    print(f'Each sequence has {words_in_seq} word token(s) plus an output token.')
    return sequences, words_in_seq

We'll use the above function to reload our sequences into this kernel.

In [3]:
sequence_list, seq_length = \
load_sequences('../../data/Poe_NLG/03_Text_files_for_models/cleaned_poe_tot_seq_len_26.txt')

480044 sequences have been loaded.
Each sequence has 25 word token(s) plus an output token.


Next, we load in our saved model that we downloaded from AWS and placed in our repo.

In [4]:
# loading our selected model from the modeling step (downloaded from AWS)
model = load_model('./Models/Models_25_seqlen_LSTM_model/25_seqlen_LSTM_model_word_model.h5')

We'll reload our saved tokenizer to reuse for language generation.

In [5]:
# loading the tokenizer we created in the previous notebook
tokenizer = load(open('./Models/Models_25_seqlen_LSTM_model/25_seqlen_LSTM_model_tokenizer.pkl', 'rb'))

In [6]:
# generating text from our model based on the provided seed text
def generate_text(model, tokenizer, seq_length, seed_text, num_new_words):
    
    # create a variable to hold our generated words
    result = []
    
    # set the input text as the seed text to start -- this input text will change based on the new words added
    input_text = seed_text
    
    # we'll loop through next word predictions based on the number of words requested in the function call
    for _ in range(num_new_words):
        
        # convert input text to integer
        encoded = tokenizer.texts_to_sequences([input_text])[0]
        
        # we'll adjust the tokenized data to make sure it has the correct shape for the model 
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')
        
        # the model will predict the next word generating a probability for each word in vocab
        predictions = model.predict(encoded)
        
        # we'll use the predictions as weights and make a random choice based on those weights
        # future interations could add a temperature adjustment here instead
        pred = np.random.choice(len(predictions[0]), p=predictions[0])
        
        # we'll instantiate/reset the output word 
        output_word = ''
        
        # we'll look for the value associated with the index of the predicted word and return it when it's found
        for word, index in tokenizer.word_index.items():
            if index == pred:
                output_word = word
                break
        
        # we'll append the new word to our input string for the next interation
        input_text += ' ' + output_word
        
        # we'll add the output world to result word list
        result.append(output_word)
    
    # finally, when we finish generating the requested number of words, we send back the results
    return ' '.join(result)

In [7]:
# generating some results based on a list of seed texts
seed_text = ['A woman stands by a large ', 'A family looks over the side of a ', 'The wind howls across the ', 'The sky bleeds ', 'The meaning of life is']
for seed in seed_text:
    generated = generate_text(model, tokenizer, seq_length, seed, 50)
    print(f'Seed text:\n{seed}\n')
    print(f'\n{generated}...\n')
    print(f'END OF GENERATED TEXT\n')
    print(f'------------\n')


Seed text:
A woman stands by a large 


bird - eyed Frinchman called horrors hold gently with a funnel vault and then faintly steadily with an instant . They never succeed in getting out of blood , extending against the southern gate of her chair , and from one astronomers which ever struck the first chasm . They...

END OF GENERATED TEXT

------------

Seed text:
A family looks over the side of a 


seat , forming his associates picked up with the bees and No - sounding scarf . We therefore gave a bill of events to dancing , bearing a hideous many bottles of thoughts . " But yet go -- from comparison with the wings of Mr . Seabright Ellison ....

END OF GENERATED TEXT

------------

Seed text:
The wind howls across the 


apartment . And the shadow around , although its color sprang away together , almost intoxicated as the eye did been drowned . A great rush took fat , and they lay prostrate in as operated to assure him as yet to follow . The case of her sail seems...

END OF GENERAT