In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Embedding

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow.keras.utils as ku

In [None]:
# using an api to get the data
!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sonnets.txt \
    -O /tmp/sonnets.txt
text = open('/tmp/sonnets.txt').read().splitlines()

### Tokenizing Text Data

In [None]:
tokenizer=Tokenizer()

In [None]:
tokenizer.fit_on_texts(text)

In [None]:
vocab_size=len(tokenizer.word_index)+1
vocab_size

### Creating Sequence and Padding Text 


In [None]:
input_sequence=[]
for i in text:
    sequence=tokenizer.texts_to_sequences([i])[0]
    for j in range(1,len(sequence)):
        ngrams=sequence[:j+1]
        input_sequence.append(ngrams)

In [None]:
max_len=0
for i in input_sequence:
    x=len(i)
    if x>max_len:
        max_len=x
max_len

In [None]:
input_sequence=pad_sequences(input_sequence,maxlen=max_len)
input_sequence

### Creating Predictors and Labels

In [None]:
predictors=input_sequence[:,:-1]
predictors

In [None]:
labels=input_sequence[:,-1]
labels

### Creating Encoding for Classes


In [None]:
labels=ku.to_categorical(labels,num_classes=vocab_size)

In [None]:
labels

### Building Models

#### Multilayer Bidirectional LSTM

In [None]:
model=Sequential()

In [None]:
model.add(Embedding(vocab_size,100,input_length=max_len-1))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(128,return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(128)))
model.add(Dense(vocab_size,activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
history=model.fit(predictors,labels,epochs=50)

In [None]:
plt.plot(history.history['accuracy'],label='accuracy')
plt.legend()
plt.show()

#### Model has a accuracy >60 %

### Predicting new poem

In [None]:
new_text="Help me Obi Wan Kenobi, you're my only hope"
next_words = 100

In [None]:
for i in range(next_words):
    tokens=tokenizer.texts_to_sequences([new_text])
    padded_sequence=pad_sequences(tokens,maxlen=max_len-1)
    prediction=model.predict_classes(padded_sequence)
    
    for word,index in tokenizer.word_index.items():
        if index == prediction:
            output_word = word
            break
    new_text = new_text+' ' + output_word

In [None]:
print(new_text)