# **LSTM for Text Generation**

In [0]:
import keras
from keras import layers
import numpy as np
import random
import sys
from keras.callbacks import ModelCheckpoint


In [3]:
path = keras.utils.get_file(
    'sample.txt',
    origin = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('Number of Words in corpus:',len(text))

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Number of Words in corpus: 600893


# **Data Preprocessing**

In [4]:
maxlen = 100
step = 5
sentences = []
next_chars = []
for i in range(0,len(text) - maxlen,step):
  sentences.append(text[i: i + maxlen])
  next_chars.append(text[i + maxlen])

print('Number of Sentences : ',len(sentences))

Number of Sentences :  120159


In [5]:
#Extracting Unique characters from the Corpus
chars = sorted(list(set(text)))
print('Number of unique characters: ',len(chars))

Number of unique characters:  57


In [0]:
char_indices = dict((char, chars.index(char)) for char in chars)
#Converting characters into one-hot encoding
X = np.zeros((len(sentences),maxlen,len(chars)),dtype = np.bool)
y = np.zeros((len(sentences),len(chars)),dtype = np.bool)
for i, sentence in enumerate(sentences):
  for t,char in enumerate(sentence):
    X[i,t,char_indices[char]]=1
  y[i, char_indices[next_chars[i]]] =1

# **Defining Model**

In [0]:
model = keras.models.Sequential()
model.add(layers.LSTM(128,input_shape=(maxlen,len(chars))))
model.add(layers.Dense(len(chars),activation='softmax'))
optimizer = keras.optimizers.RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy',optimizer=optimizer)

# **Training Model**

In [0]:
def sample(preds,temprature=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds)/temprature
  exp_preds = np.exp(preds)
  preds = exp_preds /np.sum(exp_preds)
  probas = np.random.multinomial(1,preds,1)
  return np.argmax(probas)

filepath = "weights-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss',verbose=1,save_best_only=True,mode='min')
callbacks_list = [checkpoint]
for epoch in range(1,60):
  print('epoch',epoch)
  model.fit(X,y,batch_size=128,
            epochs=1,
            callbacks = callbacks_list)
  
  start_index = random.randint(0,len(text)- maxlen -1)
  generated_text = text[start_index : start_index + maxlen]
  print('---Sedded text: "' + generated_text + '"')

  for temprature in [0.2,0.5,1.0,1.2]:
    print('---Selected temprature :', temprature)
    sys.stdout.write(generated_text)

    for i in range(400):
      sampled = np.zeros((1,maxlen, len(chars)))
      for t, char in enumerate(generated_text):
        sampled[0,t, char_indices[char]] = 1.
      preds = model.predict(sampled, verbose =0)[0]
      next_index = sample(preds, temprature)
      next_char = chars[next_index]

      generated_text +=next_char
      generated_text = generated_text[1:]
      sys.stdout.write(next_char)
      sys.stdout.flush()
    print()

epoch 1
Epoch 1/1

Epoch 00001: loss improved from inf to 2.11464, saving model to weights-01-2.1146.hdf5
---Sedded text: " part of himself? that he thus
analyses his being and sacrifices one part of it to another part? is
"
---Selected temprature : 0.2
 part of himself? that he thus
analyses his being and sacrifices one part of it to another part? is
and and the so all a man and the extind and the to the so the to a self to the so so a most in the former, and become in the so the to the to and the porsess so a man and some and the so the to the so all the extent of the to the excersion of the sount of the seat of a dere of the to the so the so all the some and and some of a deciess and self and sore and the to the the so the becoust of a man a
---Selected temprature : 0.5
ll the some and and some of a deciess and self and sore and the to the the so the becoust of a man and been for which to and been and manker of and man extinds of inself-the men of a men and now and danger in the to 