In [1]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow.keras.utils as ku
import tensorflow as tf
from tensorflow import keras 
import numpy as np
import pandas as pd

In [None]:
path = "path-to-dataset"
poems_data = open(path, "r").read()

In [10]:
data_corpus = poems_data.lower().split("\n")
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data_corpus)
total_words = len(tokenizer.word_index) + 1
#print(data_corpus)
#print(tokenizer.word_index)
#print(total_words)

In [11]:
# create input sequences using list of tokens
input_seqs = []
for line in data_corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_seqs = token_list[:i+1]
		input_seqs.append(n_gram_seqs)

# pad sequences 
max_seqs_len = max([len(x) for x in input_seqs])
input_seqs = np.array(pad_sequences(input_seqs, maxlen=max_seqs_len, padding='pre'))

# create predictors and label
preds, labels = input_seqs[:,:-1],input_seqs[:,-1]

labels = ku.to_categorical(labels, num_classes=total_words)

In [12]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_seqs_len - 1))
model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(total_words/2, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(total_words, activation='softmax'))

model.compile(
    loss='categorical_crossentropy',
    optimizer= 'RMSprop',
    metrics=['accuracy']
)

print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 22, 100)           238000    
                                                                 
 bidirectional_1 (Bidirectio  (None, 22, 300)          301200    
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 22, 300)           0         
                                                                 
 lstm_3 (LSTM)               (None, 100)               160400    
                                                                 
 dense_2 (Dense)             (None, 1190)              120190    
                                                                 
 dense_3 (Dense)             (None, 2380)              2834580   
                                                      

In [None]:
import os
checkpoint_path = "your-directory-path/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [16]:
import os
checkpoint_path = "your-directory-path/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
model.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f262f469250>

In [None]:
model_history = model.fit(preds, labels, epochs=250, verbose=1, callbacks=[cp_callback])

In [None]:
import matplotlib.pyplot as plt
model_acc = model_history.history['accuracy']
model_loss = model_history.history['loss']


nepochs = range(len(model_acc))

plt.plot(nepochs, model_acc, 'b', label='Training accuracy')
plt.title('Training accuracy')
plt.ylabel("Accuracy")
plt.xlabel("Epochs")

plt.figure()

plt.plot(nepochs, model_loss, 'b', label='Training Loss')
plt.title('Training loss')
plt.ylabel("Accuracy")
plt.xlabel("Epochs")
plt.legend()

plt.show()

In [17]:
start_txt = "the weather is nice"
nwords = 110
  
for _ in range(nwords):
	token_list = tokenizer.texts_to_sequences([start_txt])[0]
	token_list = pad_sequences([token_list], maxlen=max_seqs_len-1, padding='pre')
	predicted_word = np.argmax(model.predict(token_list, verbose=0))
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted_word:
			output_word = word
			break
	start_txt += " " + output_word

start_txt = start_txt.split()
n = 6
[' '.join(start_txt[i:i+n]) for i in range(0,len(start_txt),n)]

['the weather is nice glee done',
 'mirth out out like furnace for',
 'fine plashless was no mouth up',
 'with her fearful storm have come',
 'was ever gloom their comet down',
 'the snow to fatal song enough',
 "an england's worm her door is",
 'year or road over night i',
 'those for this forests and her',
 'mouth and like what a thoughts',
 'in the hour everything cottage grieve',
 'happen mildly simple the bay lash',
 'the a a thoughts of the',
 'a is forget a is she',
 'sound’s the road up was those',
 'who once it dreams in he',
 'know love anyway throw their hair',
 'lives express rooms climb on your',
 'work and shot who see my']