In [None]:
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import get_file
import numpy as np
import random
import sys
import io
import requests
import re

In [None]:
r = requests.get("https://data.heatonresearch.com/data/t81-558/text/treasure_island.txt")
raw_text = r.text
# print(raw_text[0:1000])

In [None]:
processed_text=raw_text.lower()
processed_text=re.sub(r'[^\x00-\x7f]',r'',processed_text);


In [None]:
print('corpus length',len(processed_text))
chars=sorted(list(set(processed_text)))
print('length of chars',len(chars))
char_indices=dict((c,i) for i ,c in enumerate(chars))
indices_chars=dict((i,c) for i ,c in enumerate(chars))


corpus length 397400
length of chars 60


In [None]:
maxlen=40
step=3
sentences=[]
next_char=[]
for i in range(0,len(processed_text)-maxlen,step):
  sentences.append(processed_text[i:i+maxlen])
  next_char.append(processed_text[i+maxlen])
print('The Sequence is',len(sentences))

The Sequence is 132454


In [None]:
# sentences

In [None]:
# onehot encoding
print('Vectorization...')
x=np.zeros((len(sentences),maxlen,len(chars)),dtype=np.bool)
y=np.zeros((len(sentences),len(chars)),dtype=np.bool)
for i,sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    x[i,t,char_indices[char]]=1
  y[i,char_indices[next_char[i]]]=1

Vectorization...


In [None]:
x.shape

(132454, 40, 60)

In [None]:
y.shape

(132454, 60)

In [None]:
y[0:10]

array([[False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False,  True, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False],
       [False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False,  True, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False],
       [False, False, False, False, False, Fal

In [None]:
print('Build Model...')
model=Sequential()
model.add(LSTM(128,input_shape=(maxlen,len(chars))))
model.add(Dense(len(chars),activation='softmax'))
optimizer=RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy',optimizer=optimizer)

Build Model...


In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               96768     
_________________________________________________________________
dense (Dense)                (None, 60)                7740      
Total params: 104,508
Trainable params: 104,508
Non-trainable params: 0
_________________________________________________________________


In [None]:
def sample(preds,temperature=0.1):
  preds=np.asarray(preds).astype('float64')
  preds=np.log(preds)/temperature
  exp_preds=np.exp(preds)
  preds=exp_preds/np.sum(exp_preds)
  probas=np.random.multinomial(1,preds,1)
  return np.argmax(probas)
  

In [None]:
import random
import sys
def on_epoch_end(epoch, _):
  start_index = random.randint(0, len(processed_text) - maxlen - 1)
  for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('----- temperature:', temperature)

        generated = ''
        sentence = processed_text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)
        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, temperature)
            next_char = indices_chars[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [None]:
# Ignore useless W0819 warnings generated by TensorFlow 2.0.  Hopefully can remove this ignore in the future.
# See https://github.com/tensorflow/tensorflow/issues/31308
import logging, os
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Fit the model
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60
----- temperature: 0.2
----- Generating with seed: " is on a wrong tack, i
do believe. and "
 is on a wrong tack, i
do believe. and the stare of the man stare of the story of the stare of the captain, and and the stare of the stare of the boat was a share of the boat was and the stare of the black of the boat was the stare and the strong of the stare of the boat was a ship of the stare of the stare of the stare of the stare of the stare of the stand of the stare of the stare of the stare of the stare of the stare of the stare 
----- temperature: 0.5
----- Generating with seed: " is on a wrong tack, i
do believe. and "
 is on a wrong tack, i
do believe. and a go the been stand of the out in the strong on the see stare, and the should on his hand, the strange but on to the shores. and his poss at the pire of a sterned of the first of the barrer work in the broke of the strough of the state, fell began to the stife, and the sure one of the bland of the stares of or father and t

<tensorflow.python.keras.callbacks.History at 0x7f39529c7e80>