In this notebook I will show how to generate text with usage of Recurrent Neural Network. I will use Shakespare work for that exercise.

### Import

In [34]:

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.optimizers import Adam

!pip install tqdm
from tqdm import tqdm
import urllib

from urllib.request import urlretrieve

import tensorflow as tf

import matplotlib.pyplot as plt
%matplotlib inline

import os
import re
import random as rn
import numpy as np

# Randomness control
os.environ['PYTHONHASHSEED'] = '0'
RANDOM_SEED = 3939
np.random.seed(RANDOM_SEED)
rn.seed(RANDOM_SEED)
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, 
                              inter_op_parallelism_threads=1)

tf.set_random_seed(RANDOM_SEED)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
keras.backend.set_session(sess)



In [0]:
DATA_URL = "http://www.gutenberg.org/cache/epub/1041/pg1041.txt"
DATA_FILENAME = "sonnets.txt"

SEQ = 100
feature = 1

### Fetch Data

In [36]:
class DLProgress(tqdm):
  last_block = 0

  def hook(self, block_num=1, block_size=1, total_size=None):
    self.total = total_size
    self.update((block_num - self.last_block) * block_size)
    self.last_block = block_num

with DLProgress(unit="B", unit_scale=True, miniters=1, desc="Shakespeare's Sonnets") as pbar:
  urlretrieve(DATA_URL, DATA_FILENAME, pbar.hook)

Shakespeare's Sonnets: 123kB [00:00, 440kB/s]                             


### Load Data

In [0]:
with open(DATA_FILENAME, "r") as file:
  data = file.read()

In [0]:
start_index = 740
end_index = re.search("Love's fire heats water, water cools not love.", data)
end_index = end_index.end()

In [0]:
data_cleaned = data[start_index:end_index]
# print(data_cleaned)

To lower case

In [0]:
data_cleaned = data_cleaned.lower()

Number of characters

In [42]:
print(len(data_cleaned))

100228


Reducing the size of dataset for saving computation time.

In [0]:
split_index = int(0.5 * len(data_cleaned))
data_cleaned = data_cleaned[:split_index]

### Preprocessing the Dataset

Mapping every unique character to integer id

In [0]:
characters = sorted(list(set(data_cleaned)))
id_to_character = {i:char for i, char in enumerate(characters)}
character_to_id = {char:i for i, char in enumerate(characters)}

In [29]:
totalchar = len(data_cleaned)
print(totalchar)

50114


Creating the  input and output sequences

In [48]:
def data_to_sequence(data, data_to_id_dict):
  seq_Xs, seq_Ys = list(), list()

  for i in range(0, len(data) - SEQ):
    seq = data[i:i + SEQ]
    label = data[i + SEQ]
    
    seq_Xs.append([data_to_id_dict[char] for char in seq])
    seq_Ys.append(data_to_id_dict[label])
  
  return seq_Xs, seq_Ys

seq_Xs, seq_ys = data_to_sequence(data_cleaned, character_to_id)

for x, y in zip(seq_Xs[0:2], seq_ys[0:2]):
  print(x, y)

[1, 18, 0, 0, 1, 1, 15, 27, 24, 22, 1, 15, 10, 18, 27, 14, 28, 29, 1, 12, 27, 14, 10, 29, 30, 27, 14, 28, 1, 32, 14, 1, 13, 14, 28, 18, 27, 14, 1, 18, 23, 12, 27, 14, 10, 28, 14, 4, 0, 1, 1, 29, 17, 10, 29, 1, 29, 17, 14, 27, 14, 11, 34, 1, 11, 14, 10, 30, 29, 34, 3, 28, 1, 27, 24, 28, 14, 1, 22, 18, 16, 17, 29, 1, 23, 14, 31, 14, 27, 1, 13, 18, 14, 4, 0, 1, 1, 11, 30, 29] 1
[18, 0, 0, 1, 1, 15, 27, 24, 22, 1, 15, 10, 18, 27, 14, 28, 29, 1, 12, 27, 14, 10, 29, 30, 27, 14, 28, 1, 32, 14, 1, 13, 14, 28, 18, 27, 14, 1, 18, 23, 12, 27, 14, 10, 28, 14, 4, 0, 1, 1, 29, 17, 10, 29, 1, 29, 17, 14, 27, 14, 11, 34, 1, 11, 14, 10, 30, 29, 34, 3, 28, 1, 27, 24, 28, 14, 1, 22, 18, 16, 17, 29, 1, 23, 14, 31, 14, 27, 1, 13, 18, 14, 4, 0, 1, 1, 11, 30, 29, 1] 10


Assembling the  train_X, train_y

In [0]:
train_X = np.reshape(seq_Xs, (len(seq_Xs), SEQ, feature))
train_y = keras.utils.to_categorical(seq_ys)

Normalize

In [0]:
train_X = train_X / float(len(characters))

### Model

Structure

In [0]:
model = Sequential()

model.add(LSTM(800, input_shape=(train_X.shape[1], train_X.shape[2]), return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(800))
model.add(Dropout(0.2))

model.add(Dense(train_y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer=Adam())

Training

In [54]:
history = model.fit(train_X, train_y, 
                    epochs = 20, 
                    batch_size = 128, 
                    verbose = 1, 
                    shuffle = False)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Text generation

In [0]:
string_mapped = seq_Xs[100]
full_string = [id_to_character[value] for value in string_mapped]

for i in range(1000):
    x = np.reshape(string_mapped,(1,len(string_mapped), 1))
    x = x / float(len(characters))
    
    pred_index = np.argmax(model.predict(x, verbose=0))
    seq = [id_to_character[value] for value in string_mapped]
    full_string.append(id_to_character[pred_index])

    string_mapped.append(pred_index)
    string_mapped = string_mapped[1:len(string_mapped)]

In [0]:
generated_text = ""
for char in full_string:
    generated_text += char

In [28]:
print(generated_text)

 as the riper should by time decease,
  his tender heir might bear his memory:
  but thou, contracted att thine ewas,
  and oe thin werd toills sorte     that in your sweet thoughts would be toowe,
    the soilk wo diaue, io shose what isow yot
    so long as mee shal thou sead str she geart;
    ior the deauh, there all thei my love sheer,
    and thes shall leve, to thow what whou wealt sealed
isew,
  why should pe beauty what thou sele'stm fraen,
  so should me live, now nose to bre io teed
  that you to love that weil th maye ir blow

   this thou de thy self wo bre, tr all away.

  lxxvi

  why is my love still me ne the day,
  the oavt tooe pidat see mame defence
  and art move dongonss mo a toifk seee;
  to, live you thale io my love and treek,
  when i peave and the mank the sures wo dod,
  sr fir thou dester the world say see my pleasure,
  sowerit tf thee to be remembered.
    t! nest touerift in the world saee broteer;
  when i perhees the world sast to mes iidht,
  so shoul