# Import library

In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional

# Import the Data

## Tokenize the data

In [4]:
tokenizer = Tokenizer()
data = open('Laurences_generated_poetry.txt').read()
corpus = data.lower().split('\n')

tokenizer.fit_on_texts(corpus)
total_words=len(tokenizer.word_index)+1

## Text to Sequences

In [5]:
input_sequences = []
for line in corpus:
    # print(line,"l")
    token_list = tokenizer.texts_to_sequences([line])[0]
    # print(token_list, "t")
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        # print(n_gram_sequence, "n")
        input_sequences.append(n_gram_sequence)
        # print(input_sequences)

max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

## Splitting Label and Sentences

In [6]:
xs = input_sequences[:,:-1]
labels = input_sequences[:,-1]

ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

# Making The Model

In [7]:
model = Sequential()
model.add(Embedding(total_words, 100))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)
model.fit(xs, ys, epochs=100, verbose=1)

Epoch 1/100
[1m377/377[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 29ms/step - accuracy: 0.0652 - loss: 6.9413
Epoch 2/100
[1m377/377[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 30ms/step - accuracy: 0.0726 - loss: 6.2302
Epoch 3/100
[1m377/377[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 34ms/step - accuracy: 0.0890 - loss: 5.9037
Epoch 4/100
[1m377/377[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - accuracy: 0.1015 - loss: 5.5914
Epoch 5/100
[1m377/377[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 29ms/step - accuracy: 0.1135 - loss: 5.2951
Epoch 6/100
[1m377/377[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - accuracy: 0.1233 - loss: 4.9661
Epoch 7/100
[1m377/377[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 29ms/step - accuracy: 0.1529 - loss: 4.6261
Epoch 8/100
[1m377/377[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 26ms/step - accuracy: 0.1719 - loss: 4.3433
Epoch 9/100
[1m

KeyboardInterrupt: 