In [1]:
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np 

In [2]:
tokenizer = Tokenizer()

data = 'In the town of Athy one jeremy Lanigan \n Battered way......'

corpus = data.lower().split("\n")

tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

print(tokenizer.word_index)

{'in': 1, 'the': 2, 'town': 3, 'of': 4, 'athy': 5, 'one': 6, 'jeremy': 7, 'lanigan': 8, 'battered': 9, 'way': 10}


In [3]:
print(total_words)

11


In [4]:
corpus

['in the town of athy one jeremy lanigan ', ' battered way......']

In [14]:
input_sequences = []
for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences.append(n_gram_sequence)
    
# pad sequences 
max_sequence_len = max([len(x) for x in input_sequences])
print(input_sequences)
print(max_sequence_len)

[[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7, 8], [9, 10]]
8


In [18]:
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))
xs = input_sequences[:,:-1]
labels=input_sequences[:,-1]

In [19]:
input_sequences

array([[ 0,  0,  0,  0,  0,  0,  1,  2],
       [ 0,  0,  0,  0,  0,  1,  2,  3],
       [ 0,  0,  0,  0,  1,  2,  3,  4],
       [ 0,  0,  0,  1,  2,  3,  4,  5],
       [ 0,  0,  1,  2,  3,  4,  5,  6],
       [ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 1,  2,  3,  4,  5,  6,  7,  8],
       [ 0,  0,  0,  0,  0,  0,  9, 10]])

In [20]:
xs

array([[0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1, 2],
       [0, 0, 0, 0, 1, 2, 3],
       [0, 0, 0, 1, 2, 3, 4],
       [0, 0, 1, 2, 3, 4, 5],
       [0, 1, 2, 3, 4, 5, 6],
       [1, 2, 3, 4, 5, 6, 7],
       [0, 0, 0, 0, 0, 0, 9]])

In [21]:
labels

array([ 2,  3,  4,  5,  6,  7,  8, 10])

In [22]:
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

In [25]:
ys

array([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)

In [26]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))
adam = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
#earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
history = model.fit(xs, ys, epochs=5, verbose=1)
#print model.summary()
print(model)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
<tensorflow.python.keras.engine.sequential.Sequential object at 0x0000019DCEABA488>


In [28]:
seed_text = "I made poetry machine"
next_words = 100
  
for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = model.predict_classes(token_list, verbose=0)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)




I made poetry machine way way way way town of athy jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan lanigan lanigan lanigan lanigan lanigan one jeremy lanigan lanigan
