In [51]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
import numpy as np

In [52]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense

In [53]:
# source text
data = """ Jack and Jill went up the hill\n To fetch a pail of water\n Jack fell down and broke his crown\n
And Jill came tumbling after\n """


In [54]:
# integer encode the text

tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]

In [55]:
vocab_size = len(tokenizer.word_index) + 1
print(f"Vocab Size: {vocab_size}")

Vocab Size: 22


In [56]:
# create word to word sequences
sequences = []

for i in range(1, len(encoded)):
    sequence = encoded[i-1:i+1]
    sequences.append(sequence)
print(f"Total Sequences: {len(sequences)}")
sequences = np.array(sequences)

Total Sequences: 24


In [57]:
x, y = sequences[:, 0], sequences[:, 1]
y = to_categorical(y, num_classes=vocab_size)

In [58]:
# define the model
def define_model(vocab_size):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_size,
                        output_dim=10,
                        input_length=1))
    model.add(LSTM(50))
    model.add(Dense(vocab_size, activation="softmax"))

    # compile the model
    model.compile(loss="categorical_crossentropy",
                  optimizer="adam", metrics=["accuracy"])

    # print the summary
    model.summary()
    return model

In [59]:
model = define_model(vocab_size)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 1, 10)             220       
_________________________________________________________________
lstm_4 (LSTM)                (None, 50)                12200     
_________________________________________________________________
dense_4 (Dense)              (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________


In [60]:
# train the model
model.fit(x, y, epochs=500, verbose=1)

Train on 24 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
E

<tensorflow.python.keras.callbacks.History at 0x289b2ba1c50>

In [61]:
tokenizer.word_index

{'and': 1,
 'jack': 2,
 'jill': 3,
 'went': 4,
 'up': 5,
 'the': 6,
 'hill': 7,
 'to': 8,
 'fetch': 9,
 'a': 10,
 'pail': 11,
 'of': 12,
 'water': 13,
 'fell': 14,
 'down': 15,
 'broke': 16,
 'his': 17,
 'crown': 18,
 'came': 19,
 'tumbling': 20,
 'after': 21}

In [64]:
rev_dict = {}
for k, v in tokenizer.word_index.items():
    rev_dict[v] = k

rev_dict[0] = "<OOV>"
rev_dict

{1: 'and',
 2: 'jack',
 3: 'jill',
 4: 'went',
 5: 'up',
 6: 'the',
 7: 'hill',
 8: 'to',
 9: 'fetch',
 10: 'a',
 11: 'pail',
 12: 'of',
 13: 'water',
 14: 'fell',
 15: 'down',
 16: 'broke',
 17: 'his',
 18: 'crown',
 19: 'came',
 20: 'tumbling',
 21: 'after',
 0: '<OOV>'}

In [68]:
for word, index in tokenizer.word_index.items():
    predicted_class = model.predict_classes((index, ))[0]
    print(f"INPUT:{word} => OUTPUT:{rev_dict[predicted_class]}")

INPUT:and => OUTPUT:jill
INPUT:jack => OUTPUT:and
INPUT:jill => OUTPUT:came
INPUT:went => OUTPUT:up
INPUT:up => OUTPUT:the
INPUT:the => OUTPUT:hill
INPUT:hill => OUTPUT:to
INPUT:to => OUTPUT:fetch
INPUT:fetch => OUTPUT:a
INPUT:a => OUTPUT:pail
INPUT:pail => OUTPUT:of
INPUT:of => OUTPUT:water
INPUT:water => OUTPUT:jack
INPUT:fell => OUTPUT:down
INPUT:down => OUTPUT:and
INPUT:broke => OUTPUT:his
INPUT:his => OUTPUT:crown
INPUT:crown => OUTPUT:and
INPUT:came => OUTPUT:tumbling
INPUT:tumbling => OUTPUT:after
INPUT:after => OUTPUT:down


10