In [28]:
import numpy as np
import tensorflow as tf
import tensorflow.keras.utils as utils

from tensorflow.keras import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional, Embedding

In [29]:
with open('./DATA/shakespeare-sonnets.txt', 'r') as f:
    corpus = f.read().lower().split('\n')

tokenizer = Tokenizer()
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

text_seq = []
for sentence in corpus:
    sentence_seq = tokenizer.texts_to_sequences([sentence])[0]
    for i in range(1, len(sentence_seq)):
        text_seq.append(sentence_seq[0:i+1])

max_len = max([len(i) for i in text_seq])
text_seq = np.array(pad_sequences(text_seq, max_len))

X_train, labels = text_seq[:, :-1], text_seq[:, -1]
labels = utils.to_categorical(labels, num_classes=total_words)

In [42]:
model = Sequential()
model.add(Embedding(total_words, 90))
model.add(Bidirectional(LSTM(150, return_sequences=True)))
model.add(Dropout(0.3))
model.add(LSTM(100))
model.add(Dense(total_words/2, 'relu', kernel_regularizer=l2(0.001)))
model.add(Dense(total_words, 'softmax'))

model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, None, 90)          288990    
_________________________________________________________________
bidirectional_7 (Bidirection (None, None, 300)         289200    
_________________________________________________________________
dropout_4 (Dropout)          (None, None, 300)         0         
_________________________________________________________________
lstm_9 (LSTM)                (None, 100)               160400    
_________________________________________________________________
dense_8 (Dense)              (None, 1605)              162105    
_________________________________________________________________
dense_9 (Dense)              (None, 3211)              5156866   
Total params: 6,057,561
Trainable params: 6,057,561
Non-trainable params: 0
____________________________________________

In [45]:
model.fit(X_train, labels, epochs=100)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

acc = model.history.history['accuracy']
loss = model.history.history['loss']

epochs = range(len(acc))

plt.figure(figsize=(10, 6))
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.title('Training accuracy')

plt.figure(figsize=(10, 6))
plt.plot(epochs, loss, 'b', label='Training Loss')
plt.title('Training loss')
plt.legend()

plt.show()

In [90]:
seed_text = 'oh my'
next_words = 20
index2words = dict([(val, key) for key, val in tokenizer.word_index.items()])
for _ in range(next_words):
    test_seq = tokenizer.texts_to_sequences([seed_text])[0]
    test_seq = pad_sequences([test_seq], maxlen=max_len-1)
    predicted = model.predict_classes(test_seq, verbose=0)
    output_word = ' ' + index2words[predicted[0]]
    seed_text = seed_text + output_word
    print(predicted)
print(seed_text)


[56]
[32]
[1015]
[8]
[10]
[7]
[73]
[87]
[196]
[18]
[2404]
[19]
[303]
[18]
[603]
[11]
[62]
[383]
[19]
[7]
oh my heart doth plead that thou in him dost lie me mistaking thee bring me told with thine compare thee in
