# **Import necessary libraries**

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow.keras.utils as ku 
import numpy as np
import pandas as pd

# **Data preparation**

In [None]:
dataset = pd.read_csv('../input/robert-frost-collection/robert_frost_collection.csv')
dataset.head()

In [None]:
dataset = dataset[1:]
dataset.head()

In [None]:
dataset.shape

In [None]:
corpus = []
text = [lines for lines in dataset.Content]
for item in text:
  temp = item.split('\n')
  for i in temp:
    corpus.append(i.lower().strip())

In [None]:
print("Lenght of corpus: ", len(corpus))
corpus[0:10]

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

# create input sequences using list of tokens
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)


# pad sequences 
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]

label = ku.to_categorical(label, num_classes=total_words)

In [None]:
print("Input sequence: ", input_sequences.shape)
print("Predictors: ", predictors.shape)
print("Label: ",label.shape)
print("Total words: ",total_words)

# **Creating Model**

In [None]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

In [None]:
history = model.fit(predictors, label, epochs=120, verbose=1)

In [None]:
import matplotlib.pyplot as plt
acc = history.history['accuracy']
loss = history.history['loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.title('Training accuracy')

plt.figure()

plt.plot(epochs, loss, 'b', label='Training Loss')
plt.title('Training loss')
plt.legend()

plt.show()

In [None]:
def generate_text(seed_text, next_words, model=model, max_sequence_len=max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
        
        output_word = ""
        for word,index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " "+output_word
    return seed_text

# **Generating Text**

In [None]:
print(generate_text("I wish", 10))

In [None]:
print(generate_text("Green", 10))

In [None]:
print(generate_text("Down the river", 10))

In [None]:
print(generate_text("Deep", 15))

In [None]:
print(generate_text("Blowing in the wind", 10))