In [23]:
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
from tensorflow import keras
import tensorflow as tf
import numpy as np
import random
import sys
import io
import requests
import re

## This tutorial is 

In [2]:
r = requests.get("https://data.heatonresearch.com/data/t81-558/text/"\
                 "treasure_island.txt")
raw_text = r.text

In [3]:
processed_text = raw_text.lower()
processed_text = re.sub(r'[^\x00-\x7f]',r'', processed_text) #Removes everything that is not ASCII 

In [4]:
print('corpus length:', len(processed_text)) #length of the document 

chars = sorted(list(set(processed_text)))
print('total chars:', len(chars)) #total characters. Saves memory as we've stored the characters. 
char_indices = dict((c, i) for i, c in enumerate(chars)) #Encodes the character to the ID, to save space (as stated above)
indices_char = dict((i, c) for i, c in enumerate(chars)) #Encodes the ID to the character, so we can cross reference the two to decode the actual word

corpus length: 397400
total chars: 60


In [5]:
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40 #Sequence size. Given these 40 characters, try to generate the next character. 
step = 3 #Move forwards three characters for each 40 character chunk. Having it larger than 1 prevents redudant sequences
sentences = []
next_chars = []
for i in range(0, len(processed_text) - maxlen, step):
    sentences.append(processed_text[i: i + maxlen])
    next_chars.append(processed_text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 132454


## Vectorization Process
The following code vectorizes the dataset. This changes it to the actual X and Y, which will go into the the model. 
This is separated by indices
Notice how x organized. 
##### X Values
    X[0] is the total sequences (len(sentences))
    X[1] is the input vector
    X[2] is the dummy variables. 
###### Y Values
    y[0] is the total sequences
    y[1] is the dummy variables (the expected characters) like X[2]. 

In [6]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [7]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars)))) #One LSTM Layer, 
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Build model...


In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               96768     
_________________________________________________________________
dense (Dense)                (None, 60)                7740      
Total params: 104,508
Trainable params: 104,508
Non-trainable params: 0
_________________________________________________________________


In [9]:
#This is the function that actually creates the output text.  
#preds are the output neurons, which are the 60 output probabilities that we saw before (the dummy variables)
#We are also normalizing the value into a softmax so they sum to 100
#Temperature => 1.0 is the character with the highest probability. 0 is more random. 

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [10]:
#This is the text generator

def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    print("****************************************************************************")
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(processed_text) - maxlen - 1) #Display it 
    for temperature in [0.2, 0.5, 1.0, 1.2]: #Display it for these specific temperatures. 
        print('----- temperature:', temperature)

        generated = ''
        sentence = processed_text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"') #This is the seed, it is used to generate the 40 characters and allow it to keep going.
        sys.stdout.write(generated)

        for i in range(400): #Generate it for the next 400 characters. 
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = indices_char[next_index]

            generated += next_char #Add whatever you generated onto what you generated. At some point, the seed will generate off your generated texts. 
            sentence = sentence[1:] + next_char #output the text. 

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [27]:
# Ignore useless W0819 warnings generated by TensorFlow 2.0.  Hopefully can remove this ignore in the future.
# See https://github.com/tensorflow/tensorflow/issues/31308
import logging, os
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Fit the model
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback], 
          )

Epoch 1/60
----- Generating text after Epoch: 0
----- temperature: 0.2
----- Generating with seed: "y--fiction.] i. winter, milo, 1888-1956,"
y--fiction.] i. winter, milo, 1888-1956,' says have the stockade, and the stockade, and the boatswain soon and the stockade, and the first the sea the worst and a strong of the stockade, and the boats, and the sight the stranger to and the stockade, and the boats and me the woods and the shore of the stockade, and the shores of the first and the shore, and the boats, and the sure is a pipe of the stockade, and the first the man the stoc
----- temperature: 0.5
----- Generating with seed: "y--fiction.] i. winter, milo, 1888-1956,"
y--fiction.] i. winter, milo, 1888-1956, gentlemeng and the sit in the good on them with the stockade,
under the brace.

"and i've sting the captain, "you are i say say your eyen in a horsed and the captain and i had been my father the bottle and for a man of the stockade. if it was the printing that

                    

you daren't show face in bristol after of the stockade, and the for and stand of the houst of the stockade, we away, slack dog, the stranger to see the color, in the man the booty dirk, of the starr, and a companions of the thing to his for the treasure of a shipmate off his hearte a copy this work and been swall off the statul
and a south. i say, my heart very face, but the coracle and companions and help in the captain.

"i saw 
----- temperature: 1.0
----- Generating with seed: "?
you daren't show face in bristol afte"
?
you daren't show face in bristol after's foot of his stratch, gave, took the starring to
wordoug stateers song, out as while dogngiags outisage, you cholked me out of each; and i kind of
noo a thiction--shelve enough coon back and deatively agrecertain.

high, i could you, think, you're my mots no shoking
interriss. the
most the project gutenberg-tm proted as the perlibly dobow the bens har
sont that the word ripatenround i lo
----- temperature: 1.2
----- Generating

packing the most very fancabter 
 the trick like, opd thinged in the
down the cry wasy threen me usore. inderilance in an
ben indotation uselassed all. and seemed in turned your had
election i am borth, and alacadeasarmom forth or men rensurned, in tiotles dead
spemre.

indeed, you isal would, soies--vaytery, misside. but?"

"murge runt, eh?"

"and we had tell, it is and, doctor before
mes
i tabe
Epoch 8/60
----- Generating text after Epoch: 7
----- temperature: 0.2
----- Generating with seed: "that i had found the boat, you would hav"
that i had found the boat, you would have been the stranger in the starring to the stockade, and a stern of the stranger the stranger the blind
the stockade, and the sure is and standing the stockade, and the boy a
companions of the stockade, and while i had been
spoke the starring his shores and the captain stand of the stockade, and the boy and been say in the starring and the books of the captain beside the starring and beside a s
----- temperature: 0

"doctor livesey," said he, "well, i said, and the silver of the ship here and the signan of the same bloods and the ship was the same time i was the shore.

"doctor's dead," said the captain, "don't her some to the 
----- temperature: 0.5
----- Generating with seed: "ong with him, was admitted at a word int"
ong with him, was admitted at a word into the
sea of the same isle of old best i was market,
no looking for the silver, as much and dessed his shortle
dessed to do to be sure it i done in the most hear the parron would our pring of the party of the treasure.

"and without here in it, and the silver was like a place strange things and before our own the captain smolledt we was pipe
to be sure the captain on the captain. "well, i st
----- temperature: 1.0
----- Generating with seed: "ong with him, was admitted at a word int"
ong with him, was admitted at a word into rose, and
i same laid our were in which holding, and without
this albook into the captain, anders end ofden, inselvess,

KeyboardInterrupt: 