In [14]:
# STEP 1: IMPORTING DEPENDENCIES
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense,Dropout,LSTM
from keras.utils import np_utils

In [15]:
# STEP 2: IMPORTING THE DATA
text = open("./sonnets.txt").read()
text = text.lower()

In [16]:
# STEP 3 : CREATE A CHARACTER NUMBER MAPPING
characters = sorted(list(set(text)))
n_to_char = { n:char for n,char in enumerate(characters)}
char_to_n = { char:n for n,char in enumerate(characters)}

In [17]:
# STEP 4 : DATA PRE-PROCESSING
# Defining what is Training data and what is the correct label according to that?
# In this case, if the word is mannequin.
# Labels would be as follows if the sequence length we consider for generation is 5:
# m, a, n, n, e -> q
# a, n, n, e ,q -> u
# n, n, e, q, u -> i
# and so on

X = [] # Training data
Y = [] # Labels corresponding to Training data

length = len(text)
seq_length = 100

for i in range(length - seq_length):
    cur_seq = text[i:i+seq_length]
    cur_label = text[i+seq_length]
    
    X.append([char_to_n[char] for char in cur_seq ])
    Y.append(char_to_n[cur_label])

In [18]:
# STEP 4: DATA PRE-PROCESSING 
# Now we have to scale our data such that algorithm doesnt get stuck
# LSTM layer accepts inputs in a different manner
# X -> numberOfSequences,LengthOfEachSequence,NumberOfFeatures
# Y -> One hot encoding should be present

X_modified = np.reshape(X,(len(X),seq_length,1))
X_modified = X_modified/float(len(characters))
Y_modified = np_utils.to_categorical(Y)

In [19]:
# STEP 5: MODELLING
model = Sequential()

# First LSTM layer contains 400 memory units
LSTM_layer_1 = LSTM(400,input_shape=(X_modified.shape[1],X_modified.shape[2]),return_sequences=True)
model.add(LSTM_layer_1)

# First Dropout layer
Dropout_layer_1 = Dropout(0.2)
model.add(Dropout_layer_1)

# Second LSTM layer contains another 400 memory units
LSTM_layer_2 = LSTM(400,return_sequences=True)
model.add(LSTM_layer_2)

# Second Dropout layer
Dropout_layer_2 = Dropout(0.2)
model.add(Dropout_layer_2)

model.add(LSTM(400))
model.add(Dropout(0.2))

# Dense layer for results
Dense_layer_1 = Dense(Y_modified.shape[1],activation='softmax')
model.add(Dense_layer_1)
model.compile(loss='categorical_crossentropy',optimizer='adam')

In [20]:
# Running the model created
model.fit(X_modified,Y_modified,epochs=100,batch_size=100)
models.save_weights('./weights.h5')
model.load_weights('./weights.h5')

In [23]:
# STEP 6 : GENERATING TEXT
string_mapped  = X[100] # Selecting a random line of text (values corresponding to text)
full_string = [n_to_char[value] for value in string_mapped] # The generated text character by character
# generating characters
for i in range(400):
    x = np.reshape(string_mapped,(1,len(string_mapped), 1))
    x = x / float(len(characters))

    pred_index = np.argmax(model.predict(x, verbose=0)) # Predicted character
    seq = [n_to_char[value] for value in string_mapped]
    full_string.append(n_to_char[pred_index])

    string_mapped.append(pred_index)
    string_mapped = string_mapped[1:len(string_mapped)]

In [24]:
# STEP 7 : PRINTING THE OUTPUT
txt=""
for char in full_string:
    txt = txt+char
print(txt)

the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contracted to time ou love sast dieeess,
 or mature's beauty were beauty's srete to thee,
 and therefore for my sinfuless and mose seem
 tines have proros darhon the show appear;
 and therefore for my self thy sweet self grows'st
 the linds of shat which thou deservest mew;
 shou danst not then i say mot tell me boing?
 and therefore from their thotue ont so much  and steetly griends have done thee soreng:
 whth


<p><strong>Further Improvements:</strong></p>
<p><span style="font-size: 10pt;">More processing can be used to print out even more meaningful sentences. Here, we can see that machine has understood the concept of rhymes and and generates word with proper spellings most of the time.<br /></span></p>
<p><span style="font-size: 10pt;">If more time can be devoted to the processing of data, even better results can be acheived.</span></p>