In [1]:
from numpy import array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
# from keras.utils.vis_utils import plot_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Embedding

In [2]:
# generate a sequence from the model
def generate_seq(model, tokenizer, seed_text, n_words):
    in_text, result = seed_text, seed_text
    # generate a fixed number of words
    for _ in range(n_words):
        
        # encode the text as integer
        encoded = tokenizer.texts_to_sequences([in_text])[0]
        encoded = array(encoded)
        
        # predict a word in the vocabulary
        yhat = model.predict_classes(encoded, verbose=0)
        print('yhat :',yhat)
        
        # map predicted word index to word
        out_word = ''
        for word, index in tokenizer.word_index.items():
            if index == yhat:
                out_word = word
                break

        # append to input
        in_text, result = out_word, result + ' ' + out_word
    return result

In [3]:
# define the model

def define_model(vocab_size):
    
    model = Sequential()
    model.add(Embedding(vocab_size, 10, input_length=1))
    model.add(LSTM(50))
    model.add(Dense(vocab_size, activation='softmax'))
    
    
    # compile network
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    # summarize defined model
    model.summary()
    # plot_model(model, to_file='model.png', show_shapes=True)
    return model

In [4]:
# source text
data = """ Jack and Jill went up the hill\n
To fetch a pail of water\n
Jack fell down and broke his crown\n
And Jill came tumbling after\n """

In [5]:
# integer encode text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
encoded = tokenizer.texts_to_sequences([data])[0]

In [6]:
encoded

[2,
 1,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 2,
 14,
 15,
 1,
 16,
 17,
 18,
 1,
 3,
 19,
 20,
 21]

In [7]:
len(encoded)

25

In [8]:
# determine the vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary Size: %d' % vocab_size)

Vocabulary Size: 22


In [9]:
# create word -> word sequences
sequences = list()

for i in range(1, len(encoded)):
    sequence = encoded[i-1:i+1]
    sequences.append(sequence)
print('Total Sequences: %d' % len(sequences))

Total Sequences: 24


In [10]:
sequences

[[2, 1],
 [1, 3],
 [3, 4],
 [4, 5],
 [5, 6],
 [6, 7],
 [7, 8],
 [8, 9],
 [9, 10],
 [10, 11],
 [11, 12],
 [12, 13],
 [13, 2],
 [2, 14],
 [14, 15],
 [15, 1],
 [1, 16],
 [16, 17],
 [17, 18],
 [18, 1],
 [1, 3],
 [3, 19],
 [19, 20],
 [20, 21]]

In [11]:
# split into X and y elements
sequences = array(sequences)
X, y = sequences[:,0],sequences[:,1]

In [12]:
X

array([ 2,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,  2, 14, 15,  1,
       16, 17, 18,  1,  3, 19, 20])

In [13]:
y

array([ 1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,  2, 14, 15,  1, 16,
       17, 18,  1,  3, 19, 20, 21])

In [14]:
# one hot encode outputs
y = to_categorical(y, num_classes=vocab_size)

In [15]:
y

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,

In [16]:
len(X)

24

In [17]:
len(y)

24

In [18]:
# define model
model = define_model(vocab_size)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 1, 10)             220       
_________________________________________________________________
lstm (LSTM)                  (None, 50)                12200     
_________________________________________________________________
dense (Dense)                (None, 22)                1122      
Total params: 13,542
Trainable params: 13,542
Non-trainable params: 0
_________________________________________________________________


In [19]:
# fit network
model.fit(X, y, epochs=500, verbose=2)

Epoch 1/500
1/1 - 0s - loss: 3.0903 - accuracy: 0.0417
Epoch 2/500
1/1 - 0s - loss: 3.0895 - accuracy: 0.1250
Epoch 3/500
1/1 - 0s - loss: 3.0887 - accuracy: 0.1667
Epoch 4/500
1/1 - 0s - loss: 3.0879 - accuracy: 0.2083
Epoch 5/500
1/1 - 0s - loss: 3.0871 - accuracy: 0.2083
Epoch 6/500
1/1 - 0s - loss: 3.0862 - accuracy: 0.1250
Epoch 7/500
1/1 - 0s - loss: 3.0854 - accuracy: 0.1250
Epoch 8/500
1/1 - 0s - loss: 3.0846 - accuracy: 0.1250
Epoch 9/500
1/1 - 0s - loss: 3.0837 - accuracy: 0.1250
Epoch 10/500
1/1 - 0s - loss: 3.0829 - accuracy: 0.1250
Epoch 11/500
1/1 - 0s - loss: 3.0820 - accuracy: 0.1250
Epoch 12/500
1/1 - 0s - loss: 3.0811 - accuracy: 0.1250
Epoch 13/500
1/1 - 0s - loss: 3.0802 - accuracy: 0.1250
Epoch 14/500
1/1 - 0s - loss: 3.0793 - accuracy: 0.1250
Epoch 15/500
1/1 - 0s - loss: 3.0783 - accuracy: 0.1250
Epoch 16/500
1/1 - 0s - loss: 3.0774 - accuracy: 0.1250
Epoch 17/500
1/1 - 0s - loss: 3.0764 - accuracy: 0.1250
Epoch 18/500
1/1 - 0s - loss: 3.0754 - accuracy: 0.1250
E

Epoch 147/500
1/1 - 0s - loss: 2.1954 - accuracy: 0.5000
Epoch 148/500
1/1 - 0s - loss: 2.1815 - accuracy: 0.5000
Epoch 149/500
1/1 - 0s - loss: 2.1675 - accuracy: 0.5000
Epoch 150/500
1/1 - 0s - loss: 2.1535 - accuracy: 0.5000
Epoch 151/500
1/1 - 0s - loss: 2.1395 - accuracy: 0.5000
Epoch 152/500
1/1 - 0s - loss: 2.1255 - accuracy: 0.5000
Epoch 153/500
1/1 - 0s - loss: 2.1114 - accuracy: 0.5000
Epoch 154/500
1/1 - 0s - loss: 2.0974 - accuracy: 0.5000
Epoch 155/500
1/1 - 0s - loss: 2.0832 - accuracy: 0.5000
Epoch 156/500
1/1 - 0s - loss: 2.0691 - accuracy: 0.5000
Epoch 157/500
1/1 - 0s - loss: 2.0549 - accuracy: 0.5000
Epoch 158/500
1/1 - 0s - loss: 2.0408 - accuracy: 0.5000
Epoch 159/500
1/1 - 0s - loss: 2.0266 - accuracy: 0.5000
Epoch 160/500
1/1 - 0s - loss: 2.0124 - accuracy: 0.5000
Epoch 161/500
1/1 - 0s - loss: 1.9981 - accuracy: 0.5417
Epoch 162/500
1/1 - 0s - loss: 1.9839 - accuracy: 0.5417
Epoch 163/500
1/1 - 0s - loss: 1.9696 - accuracy: 0.5417
Epoch 164/500
1/1 - 0s - loss: 

Epoch 291/500
1/1 - 0s - loss: 0.6088 - accuracy: 0.8750
Epoch 292/500
1/1 - 0s - loss: 0.6037 - accuracy: 0.8750
Epoch 293/500
1/1 - 0s - loss: 0.5986 - accuracy: 0.8750
Epoch 294/500
1/1 - 0s - loss: 0.5936 - accuracy: 0.8750
Epoch 295/500
1/1 - 0s - loss: 0.5887 - accuracy: 0.8750
Epoch 296/500
1/1 - 0s - loss: 0.5838 - accuracy: 0.8750
Epoch 297/500
1/1 - 0s - loss: 0.5790 - accuracy: 0.8750
Epoch 298/500
1/1 - 0s - loss: 0.5742 - accuracy: 0.8750
Epoch 299/500
1/1 - 0s - loss: 0.5696 - accuracy: 0.8750
Epoch 300/500
1/1 - 0s - loss: 0.5650 - accuracy: 0.8750
Epoch 301/500
1/1 - 0s - loss: 0.5604 - accuracy: 0.8750
Epoch 302/500
1/1 - 0s - loss: 0.5559 - accuracy: 0.8750
Epoch 303/500
1/1 - 0s - loss: 0.5515 - accuracy: 0.8750
Epoch 304/500
1/1 - 0s - loss: 0.5471 - accuracy: 0.8750
Epoch 305/500
1/1 - 0s - loss: 0.5428 - accuracy: 0.8750
Epoch 306/500
1/1 - 0s - loss: 0.5385 - accuracy: 0.8750
Epoch 307/500
1/1 - 0s - loss: 0.5343 - accuracy: 0.8750
Epoch 308/500
1/1 - 0s - loss: 

Epoch 435/500
1/1 - 0s - loss: 0.2720 - accuracy: 0.8750
Epoch 436/500
1/1 - 0s - loss: 0.2713 - accuracy: 0.8750
Epoch 437/500
1/1 - 0s - loss: 0.2706 - accuracy: 0.8750
Epoch 438/500
1/1 - 0s - loss: 0.2698 - accuracy: 0.8750
Epoch 439/500
1/1 - 0s - loss: 0.2691 - accuracy: 0.8750
Epoch 440/500
1/1 - 0s - loss: 0.2684 - accuracy: 0.8750
Epoch 441/500
1/1 - 0s - loss: 0.2678 - accuracy: 0.8750
Epoch 442/500
1/1 - 0s - loss: 0.2671 - accuracy: 0.8750
Epoch 443/500
1/1 - 0s - loss: 0.2664 - accuracy: 0.8750
Epoch 444/500
1/1 - 0s - loss: 0.2658 - accuracy: 0.8750
Epoch 445/500
1/1 - 0s - loss: 0.2651 - accuracy: 0.8750
Epoch 446/500
1/1 - 0s - loss: 0.2645 - accuracy: 0.8750
Epoch 447/500
1/1 - 0s - loss: 0.2639 - accuracy: 0.8750
Epoch 448/500
1/1 - 0s - loss: 0.2632 - accuracy: 0.8750
Epoch 449/500
1/1 - 0s - loss: 0.2626 - accuracy: 0.8750
Epoch 450/500
1/1 - 0s - loss: 0.2620 - accuracy: 0.8750
Epoch 451/500
1/1 - 0s - loss: 0.2614 - accuracy: 0.8750
Epoch 452/500
1/1 - 0s - loss: 

<tensorflow.python.keras.callbacks.History at 0x272d8da1940>

In [20]:
""" Jack and Jill went up the hill\n
To fetch a pail of water\n
Jack fell down and broke his crown\n
And Jill came tumbling after\n """

' Jack and Jill went up the hill\n\nTo fetch a pail of water\n\nJack fell down and broke his crown\n\nAnd Jill came tumbling after\n '

In [21]:
# evaluate
print(generate_seq(model, tokenizer, 'Jack', 1))

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
yhat : [1]
Jack and


In [22]:
print(generate_seq(model, tokenizer, 'tumbling', 1))

yhat : [21]
tumbling after


In [23]:
print(generate_seq(model, tokenizer, 'pail', 1))

yhat : [12]
pail of


In [24]:
print(generate_seq(model, tokenizer, 'Jill', 2))

yhat : [19]
yhat : [20]
Jill came tumbling


In [25]:
print(generate_seq(model, tokenizer, 'tumbling', 2))

yhat : [21]
yhat : [7]
tumbling after hill
