In [None]:
import numpy as np
from tensorflow import keras
path = keras.utils.get_file(
    'nietzsche.txt',
    origin='https://s3.amozonaws.com/text.dataset/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length:', len(text))


Downloading data from https://s3.amozonaws.com/text.dataset/nietzsche.txt
Corpus length: 1054


In [None]:
maxlen = 60
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen,step):
  sentences.append(text[i: i + maxlen])
  next_chars.append(text[i + maxlen])
print('Number of sequence:', len(sentences))

chars = sorted(list(set(text)))

print('Unique characters:', len(chars))

char_indices = dict((char, chars.index(char)) for char in chars)

print('Vectorization...')
x=np.zeros((len(sentences),maxlen,len(chars)),dtype=np.bool_)
y=np.zeros((len(sentences),len(chars)),dtype=np.bool_)
for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    x[i,t,char_indices[char]]=1
  y[i,char_indices[next_chars[i]]]=1
print('...done')

Number of sequence: 332
Unique characters: 52
Vectorization...
...done


In [None]:
from tensorflow.keras import layers

model=keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 128)               92672     
                                                                 
 dense_2 (Dense)             (None, 52)                6708      
                                                                 
Total params: 99380 (388.20 KB)
Trainable params: 99380 (388.20 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)

  return np.argmax(probas)

In [None]:
import random
import sys

NUM_EPOCHS = 3
CHAR_GENERATED_TEXT = 400

for epoch in range(1, NUM_EPOCHS):
  print('epoch',epoch)

  model.fit(x,y,batch_size=128,epochs=1)

  start_index = random.randint(0, len(text) - maxlen - 1)
  generated_text = text[start_index: start_index + maxlen]
  print(f"---Generating with seed: \"{generated_text}\"")

  for temperature in [0.2,0.5,1.0,1.2]:
    print(f"------temperature: {temperature}")
    sys.stdout.write(generated_text)

    for i in range(CHAR_GENERATED_TEXT):
      sampled = np.zeros((1, maxlen, len(chars)))
      for t, char in enumerate(generated_text):
        sampled[0,t,char_indices[char]] = 1.

      preds = model.predict(sampled, verbose=0)[0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]

      generated_text += next_char
      generated_text = generated_text[1:]

      sys.stdout.write(next_char)
      sys.stdout.flush()
print()

epoch 1
---Generating with seed: ""data:image/png;base64,ivborw0kggoaaaansuheugaaaaeaaaabcaiaa"
------temperature: 0.2
"data:image/png;base64,ivborw0kggoaaaansuheugaaaaeaaaabcaiaaigamiia iiiiiiiisigiiiiiiiiiainiiiiidaiiggiiiaaiiiiiiisi>iiiiii=iiiiiaiiiiauiiiiiiiiiiaaiiaiiiiiiiiiciisiiliiiaiiiiigsiiiiiiiiijniiiiiiiiiiiieiuiiiiiiiigsiiiiiigiigiiiicii isiiiiiiiiiuiiiiiiiiiaiiiiceeieiniiisigiiaa>miiiiiidiiiiiaaa iiiiiiiiiiliiiswiiiiiiaiieiiaii lfiiiiiiiiaaiiiiiiiiiiiaiiidisiiiiiiiiiiiiiiiiiiiiiiiiiiiiigni"iniiiiiciaieaaiwginiiiiliiiigicwiiiiiiiaiiiiiiiiiiiidii liiiiiiaaaiiiiii------temperature: 0.5
aaiwginiiiiliiiigicwiiiiiiiaiiiiiiiiiiiidii liiiiiiaaaiiiiiigim3iif ciighig=iraljeyjjnl>jiiwi8ifi>ninie ljni=iigigagj"jmes>isiefc;gaf ijinin6ihnigaisiliijisgifgg>i<edzasggld  hpiginvclflfilniiumiidgsieii>aes1agi  1i=gisipzcaiiiicgsli8s=ija tqmtw/iariiiwbg ibw< cecdi is<seaizardlvgifdbesiaraag>kgwsuaaijn"iddiwnsmalw l1>lez<il"ngiiy0gaisnm"ij ii sg"iigir wiacausiaaiifjwiijsx itaiw1aaascadiiniiwiai