[View in Colaboratory](https://colab.research.google.com/github/silentvoice/dl-with-keras/blob/master/character-level-text-generation-using-lstm.ipynb)

## Check Tensorflow Version

In [0]:
import tensorflow as tf

In [36]:
tf.test.is_gpu_available()

True

In [37]:
tf.keras.__version__

'2.1.6-tf'

In [38]:
tf.__version__

'1.9.0-rc2'

## Imports

In [0]:
from tensorflow import keras
from tensorflow.python.keras import layers, models
from tensorflow.python.keras import optimizers
import numpy as np
import random, sys

## Data Preparation

In [0]:
path = keras.utils.get_file('nietzsche.txt',origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
txt = open(path).read().lower()

In [41]:
len(txt)

600893

In [0]:
maxlen = 60
step = 3
sentences = []
nxt_chars = []

for idx in range(0, len(txt) - maxlen, step):
  sentences.append(txt[idx: idx + maxlen])
  nxt_chars.append(txt[idx + maxlen])                        


In [43]:
len(sentences)

200278

In [0]:
chars = sorted(list(set(txt)))

In [0]:
char_indices = dict((c, chars.index(c)) for c in chars)

## One hot encode characters

In [0]:
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

In [0]:
for s_idx, s in enumerate(sentences):
  for c_idx, c in enumerate(s):
    x[s_idx, c_idx, char_indices[c]] = 1
  y[s_idx, char_indices[nxt_chars[s_idx]]] = 1

## Build Model

In [0]:
model = models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

In [0]:
model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=0.01))

In [49]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               95232     
_________________________________________________________________
dense_1 (Dense)              (None, 57)                7353      
Total params: 102,585
Trainable params: 102,585
Non-trainable params: 0
_________________________________________________________________


## Train the model

In [50]:
model.fit(x, y, batch_size=128, epochs=60)

Epoch 1/60
Epoch 2/60


Epoch 3/60


Epoch 4/60


Epoch 5/60


Epoch 6/60


Epoch 7/60


Epoch 8/60


Epoch 9/60


Epoch 10/60


Epoch 11/60


Epoch 12/60


Epoch 13/60


Epoch 14/60


Epoch 15/60


Epoch 16/60


Epoch 17/60


Epoch 18/60


Epoch 19/60


Epoch 20/60


Epoch 21/60


Epoch 22/60


Epoch 23/60


Epoch 24/60


Epoch 25/60


Epoch 26/60


Epoch 27/60


Epoch 28/60


Epoch 29/60


Epoch 30/60


Epoch 31/60


Epoch 32/60


Epoch 33/60


Epoch 34/60


Epoch 35/60


Epoch 36/60


Epoch 37/60


Epoch 38/60


Epoch 39/60


Epoch 40/60


Epoch 41/60


Epoch 42/60


Epoch 43/60


Epoch 44/60


Epoch 45/60


Epoch 46/60


Epoch 47/60


Epoch 48/60


Epoch 49/60


Epoch 50/60


Epoch 51/60


Epoch 52/60


Epoch 53/60


Epoch 54/60


Epoch 55/60


Epoch 56/60


Epoch 57/60


Epoch 58/60


Epoch 59/60


Epoch 60/60




<tensorflow.python.keras.callbacks.History at 0x7f3ce98e9a58>

## Generate Text From Model

In [0]:
def sample(preds, t=1.0):
  preds = np.asarray(preds).astype('float64')
  preds = np.log(preds) / t
  exp_preds = np.exp(preds)
  preds = exp_preds / np.sum(exp_preds)
  probs = np.random.multinomial(1, preds, 1)
  return np.argmax(probs)

In [0]:
strt_idx = random.randint(0, len(txt) - maxlen - 1)

In [67]:
for t in [0.2, 0.5, 1.0, 1.2]:
  seed_txt = txt[strt_idx: strt_idx + maxlen] 
  gen_txt = seed_txt
  for _ in range(400):
    sampled = np.zeros((1, maxlen, len(chars)))
    # one hot encode seed characters
    for c_idx, c in enumerate(seed_txt):
      sampled[0, c_idx, char_indices[c]] = 1
      
    preds = model.predict(sampled)[0]
    nxt_idx = sample(preds, t)
    nxt_char = chars[nxt_idx]
    gen_txt +=nxt_char
    seed_txt += nxt_char
    seed_txt = seed_txt[1:]
  print(gen_txt)  

  This is separate from the ipykernel package so we can avoid doing imports until


 being badly endowed by nature,
and finally, it is an opportuneation of the spirit the greater of the sense of the spirit of the present the present the present the fact the present the moral and say the sense of the sense of the strongt and development of "man's science of the sense of the best and stronger and the intellectual interpretation of the soul, the problem of the fact the truth and conception of the moral and comparation of the conduct of the s
 being badly endowed by nature,
and finally, it is an opportune fact the conscience and rational increased the present and morality of the world, but really the for the entire mankind, be the spirit the truth," in the way, the deception of the profound and have "the far as the german intellect to the entingly mediocve the science," and called the belief, and strive and defenilation of the man is the world of values," and in the sense and case of the presen
 being badly endowed by nature,
and finally, it is an opportuneable rif the la

In [69]:
t=0.5
strt_idx = random.randint(0, len(txt) - maxlen - 1)
seed_txt = txt[strt_idx: strt_idx + maxlen] 
gen_txt = seed_txt
for _ in range(400):
  sampled = np.zeros((1, maxlen, len(chars)))
# one hot encode seed characters
  for c_idx, c in enumerate(seed_txt):
    sampled[0, c_idx, char_indices[c]] = 1
  preds = model.predict(sampled)[0]
  nxt_idx = sample(preds, t)
  nxt_char = chars[nxt_idx]
  gen_txt +=nxt_char
  seed_txt += nxt_char
  seed_txt = seed_txt[1:]

  This is separate from the ipykernel package so we can avoid doing imports until


In [70]:
gen_txt

'manly, conquering, and imperious--all instincts which are nature in the "struggle and do not in order to him the freedom and problem the fact in german call the greater come and "good very fings the strength to the german things in all the end of the moral outsima than the more such a regard that it is be promise as a powerfnh, he less down are the science of simple and the fact in the mind the tragedy, with the enting from the promises of the future, and '