<a href="https://colab.research.google.com/github/russellheines/ml-experiments/blob/main/Chapter8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

8.2 Downloading and parsing the initial text file

In [2]:
import keras
import numpy as np

path = keras.utils.get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))

Corpus length: 600893


8.3 Vectorizing sequences of characters

In [3]:
maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
  sentences.append(text[i: i + maxlen])
  next_chars.append(text[i + maxlen])

print('Number of sequences:', len(sentences))

chars = sorted(list(set(text)))
print('Unique characteres:', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    x[i, t, char_indices[char]] = 1
  y[i, char_indices[next_chars[i]]] = 1

Number of sequences: 200278
Unique characteres: 57
Vectorization...


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y = np.zeros((len(sentences), len(chars)), dtype=np.bool)


8.4 Single-later LSTM model for next-character prediction

In [4]:
from keras import layers

model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

8.5 Model compilation configuration

In [7]:
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)



8.6 Function to sample the next character given the model's predictions

In [8]:
def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

8.7 Text generation loop

In [12]:
model.fit(x, y, batch_size=128, epochs=1)



<keras.src.callbacks.History at 0x7fab6dd5b850>

In [15]:
import random
import sys

start_index = random.randint(0, len(text) - maxlen - 1)
generated_text = text[start_index: start_index + maxlen]
print('--- Generating with seed: "' + generated_text + '"')

for temperature in [0.2, 0.5, 1.0, 1.2]:
  print('------ temperature:', temperature)
  sys.stdout.write(generated_text)

  for i in range(400):
    sampled = np.zeros((1, maxlen, len(chars)))
    for t, char in enumerate(generated_text):
      sampled[0, t, char_indices[char]] = 1

    preds = model.predict(sampled, verbose=0)[0]
    next_index = sample(preds, temperature)
    next_char = chars[next_index]

    generated_text += next_char
    generated_text = generated_text[1:]

    sys.stdout.write(next_char)

  print()

--- Generating with seed: "aordinary furtherers of humanity whom one
calls philosophers"
------ temperature: 0.2
aordinary furtherers of humanity whom one
calls philosophers and the sence to the fore to the fore the senter and the sond as in the sond the reat the sented and it the man the reall the sond the mange and and and and the fore the one the sond the mant and the recente songer soment of the more the songer the senter and condente the of the resting the mere the senter and the of and the expreation and and conders and and and the fored and and mong the sented
------ temperature: 0.5
nd and conders and and and the fored and and mong the sented the wist be and the ghe preceland, in the sences in the soull gould of in the in the goress in the meall at the mations be us of the bead for to thes meding and best of one of the fors in the have fore tree wern cous the resen at mere has in the menting, and mencing the man the mane and the gorde systong and cevion of the concous and compo