In [2]:
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import Adam
from tensorflow.keras.utils import get_file
import numpy as np
import random
import sys
import io

In [3]:
path = get_file(
    'nietzsche.txt',
    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')


Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt


In [4]:
with io.open(path, encoding='utf-8') as f:
    text = f.read().lower()


In [12]:
print('\n'.join(text.split('\n')[0:10]))

preface


supposing that truth is a woman--what then? is there not ground
for suspecting that all philosophers, in so far as they have been
dogmatists, have failed to understand women--that the terrible
seriousness and clumsy importunity with which they have usually paid
their addresses to truth, have been unskilled and unseemly methods for
winning a woman? certainly she has never allowed herself to be won; and
at present every kind of dogma stands with sad and discouraged mien--if,


In [5]:
print('corpus length:', len(text))


corpus length: 600893


In [6]:
chars = sorted(list(set(text)))
print('total chars:', len(chars))


total chars: 57


In [7]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))


In [8]:
print(char_indices)

{'\n': 0, ' ': 1, '!': 2, '"': 3, "'": 4, '(': 5, ')': 6, ',': 7, '-': 8, '.': 9, '0': 10, '1': 11, '2': 12, '3': 13, '4': 14, '5': 15, '6': 16, '7': 17, '8': 18, '9': 19, ':': 20, ';': 21, '=': 22, '?': 23, '[': 24, ']': 25, '_': 26, 'a': 27, 'b': 28, 'c': 29, 'd': 30, 'e': 31, 'f': 32, 'g': 33, 'h': 34, 'i': 35, 'j': 36, 'k': 37, 'l': 38, 'm': 39, 'n': 40, 'o': 41, 'p': 42, 'q': 43, 'r': 44, 's': 45, 't': 46, 'u': 47, 'v': 48, 'w': 49, 'x': 50, 'y': 51, 'z': 52, 'ä': 53, 'æ': 54, 'é': 55, 'ë': 56}


In [10]:
maxlen =40
step=3
sentences=[]
nextsentences = []
for i in range(0, len(text)-maxlen, step):
     sentences.append(text[i:i+maxlen])
     nextsentences.append(text[i+maxlen])
print("overlapping sequences", len(sentences))

overlapping sequences 200285


In [11]:
print("lengthofnextsentences", len(nextsentences))

lengthofnextsentences 200285


In [16]:
import numpy as np
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[nextsentences[i]]] = 1


Vectorization...


In [18]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))
optimizer = Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [19]:
print(x[0:5])

[[[False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]
  ...
  [False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]]

 [[False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]
  ...
  [False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]]

 [[False False False ... False False False]
  [ True False False ... False False False]
  [ True False False ... False False False]
  ...
  [False False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]]

 [[ True False False ... False False False]
  [False False False ... False False False]
  [False False False ... False False False]
  ...
  [False False False ... False False False]
  

In [23]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, _):
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()

In [24]:
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=60,
          callbacks=[print_callback])

Epoch 1/60
----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "ty or equally intolerable fanaticism cou"
ty or equally intolerable fanaticism could and so the religion of the will the ever the fore the experious of the contiously and the contious of the consistion, and the server and a present the sometion of the consequented the serfarification of the fare the forter the extinct of the fore and a the contiction of the so the fortious of the so the so the fortious of the so the so the fortious of the farter the sometical the ever the farte
----- diversity: 0.5
----- Generating with seed: "ty or equally intolerable fanaticism cou"
ty or equally intolerable fanaticism courde, a preself of the sare to the porthen the ever and in them and all has the way the self--the farter the fore a courter the exain everyther of this the religiour the religion and an ertions and sistioticition of the conces and acture as all constiost to the contince to from the farin

  preds = np.log(preds) / temperature


 a sure is a philosophers of the same as the same present and ancient and superstition and free spirituality of the present and the
----- diversity: 0.5
----- Generating with seed: "t
the mastery over the inadequate precau"
t
the mastery over the inadequate precautions which must be does not the asceness and where the same than the same
self contemnicous and satisfactable about a seems that as though an one who have been conception, in the fact the so forms of the remain and nothing the deception in an are was not the stronger everything and power and actions and familished. the way the highest end with morality of the time a distrust of granifining especi
----- diversity: 1.0
----- Generating with seed: "t
the mastery over the inadequate precau"
t
the mastery over the inadequate precaution--a rumi addledy espurictly people who cannot basis of one
men and most made abovounent itself. man of
depth sittle them and loves bedoicl-"and commopine, encooldne, and cupacrinity equally
hencefort

<keras.src.callbacks.History at 0x79d9728a5060>

In [21]:
print(y[0:5])

[[False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False  True False False False False False False False
  False False False False False False False False False]
 [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False  True False False False False False False False]
 [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False  True Fals

In [22]:
len(y)

200285