[View in Colaboratory](https://colab.research.google.com/github/migueltol22/deep-learning-with-python/blob/master/text_generation.ipynb)

In [0]:
# reweighting a probability distribution to a different temperature
import numpy as np

def reweight_distribution(original_distribution, temperature=0.5):
  distribution = np.log(original_distribution) / temperature
  distribution = np.exp(distribution)
  return distribution / np.sum(distribution)

In [3]:
import keras

# nietzche data
path = keras.utils.get_file(
  'nietszche.txt',
  origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Corpus length', len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length 600893


In [7]:
# vectorize sequences of characters

maxlen = 60
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
  sentences.append(text[i: i + maxlen])
  next_chars.append(text[i + maxlen])
  
print('Number of sequences', len(sentences))

chars = sorted(list(set(text)))
print('Unique characters', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    x[i, t, char_indices[char]] = 1
  y[i, char_indices[next_chars[i]]] = 1

Number of sequences 200278
Unique characters 57


In [0]:
# lstm model for next char prediction

from keras.layers import Dense, LSTM
from keras.models import Sequential
from keras.optimizers import RMSprop

model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [0]:
def sample(preds, temperature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / temperature
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probas = np.random.multinomial(1, preds, 1)
  return np.argmax(probas)

In [0]:
import random
import sys

for epoch in range(1, 60):
  print('epoch', epoch)
  model.fit(x, y, batch_size=128, epochs=1)
  start_index = random.randint(0, len(text) - maxlen - 1)
  generated_text = text[start_index: start_index + maxlen]
  print('--- Generating with seed: "' + generated_text + '"' )
  
  for temperature in [0.2, 0.5, 1.0, 1.2]:
    print('----- temperature:', temperature)
    sys.stdout.write(generated_text)
    for i in range(400):
      sampled = np.zeros((1, maxlen, len(chars)))
      for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1
        
      preds = model.predict(sampled, verbose=0)[0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]
      
      generated_text += next_char
      generated_text = generated_text[1:]
      
      sys.stdout.write(next_char)

epoch 1
Epoch 1/1
--- Generating with seed: "f thought blended with it and all that remains is
_strength_"
----- temperature: 0.2
f thought blended with it and all that remains is
_strength_ as a sure and the sense of the constance and the sense of the still the states of the sense of the sense of the still the still the constance of the sense of the supposer of the althing the good of the consequent of the constrance of the soul as the still as it is a soul in the constance of the hast to the constance of the consequent of the constance of the still as the consequent and the constan----- temperature: 0.5
the constance of the still as the consequent and the constance in the perhaps a culture to the sense upon the consequence and sentiment that the not also the standers the significated and the destrise for the constrance of the fics in instance as only senses and fact with the self to much as sure moral guishter of the charage and in the sune such a
good
of the constrangering to the st