In [None]:
import numpy as np
import requests
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LSTM, Dropout, Activation, Embedding, Bidirectional

In [None]:
# retrieve the data from the URL
response = requests.get('https://raw.githubusercontent.com/kellerdevin/Nursery-Rhyme/main/nursery_rhymes.txt')
lines = response.text.split('\n')

# parse the data
data = []
for line in lines:
    if line.strip():
        data.append(line.strip())

print(data)



In [None]:
# tokenization
token = Tokenizer()
token.fit_on_texts(data)

# encode the data
encoded_text = token.texts_to_sequences(data)
vocab_size = len(token.word_counts) + 1

print(encoded_text)

[[1, 175, 10, 498], [1, 175, 10, 498, 12, 69, 80, 585], [19, 21, 3, 965, 71], [1, 723, 10, 498, 5, 202, 499, 585], [2, 130, 34, 586, 62], [1, 123, 10, 498, 291, 18, 499, 585], [2, 203, 1, 723, 264, 966], [1, 723, 10, 498, 724, 88, 499, 585], [2, 587, 725, 726, 111, 265], [1378, 727, 71], [292, 727, 71, 49, 160, 967, 588], [18, 728, 293, 17, 28, 1379], [292, 727, 71, 49, 160, 37, 266], [18, 728, 293, 330, 28, 588, 111, 265], [131, 4, 47, 500], [131, 4, 47, 500], [13, 729, 730], [131, 4, 47, 500, 13, 729, 731], [11, 51, 78, 3, 968], [21, 3, 8, 1380], [11, 51, 78, 3, 968, 27, 1, 969, 331, 732], [131, 4, 47, 500], [13, 729, 730], [131, 4, 47, 500, 2, 4, 47, 1381, 55], [11, 51, 78, 3, 970], [2, 3, 367, 10, 1382], [11, 51, 78, 3, 970, 27, 1, 969, 331, 7], [1, 52, 7, 1, 267], [1, 52, 7, 1, 267], [53, 733, 43], [2, 734, 1, 241, 4, 1383], [5, 30, 65, 1, 971], [2, 589, 15, 735], [14, 590, 424, 1384, 1385], [368, 176], [368, 176, 132, 3, 112, 52], [137, 4, 1, 266], [42, 368, 176, 4, 1, 112, 52], 

In [None]:
# prepare training data
datalist = []
for d in encoded_text:
    if len(d)>1:
        for i in range(2, len(d)):
            datalist.append(d[:i])
            print(d[:i])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[2, 931, 114, 1151, 513]
[2, 931, 114, 1151, 513, 2]
[292, 1965]
[99, 473]
[99, 473, 4]
[99, 473, 4, 11]
[1057, 47]
[1057, 47, 532]
[1057, 47, 532, 20]
[1057, 47, 532, 20, 6]
[1057, 47, 532, 20, 6, 31]
[66, 187]
[66, 187, 2]
[66, 187, 2, 59]
[99, 473]
[99, 473, 4]
[99, 473, 4, 11]
[1, 123]
[1, 123, 10]
[1, 123]
[1, 123, 10]
[30, 26]
[30, 26, 1]
[14, 218]
[14, 218, 1272]
[1, 123]
[1, 123, 10]
[1, 123, 10, 559]
[1, 123, 10, 559, 53]
[1, 123, 10, 559, 53, 43]
[1, 123, 10, 559, 53, 43, 1]
[2, 445]
[2, 445, 30]
[2, 445, 30, 26]
[1, 1966]
[41, 22]
[41, 22, 1]
[41, 22, 1, 550]
[41, 22, 1, 550, 10]
[41, 22, 1, 550, 10, 1]
[41, 22, 1, 550, 10, 1, 71]
[41, 22, 1, 550, 10, 1, 71, 99]
[41, 22, 1, 550, 10, 1, 71, 99, 860]
[41, 22, 1, 550, 10, 1, 71, 99, 860, 6]
[25, 223]
[25, 223, 1]
[25, 223, 1, 1967]
[25, 223, 1, 1967, 22]
[25, 223, 1, 1967, 22, 151]
[25, 223, 1, 1967, 22, 151, 26]
[25, 223, 1, 1967, 22, 151, 26, 4]
[25, 223, 1, 196

In [None]:
# padding
max_length = 20
sequences = pad_sequences(datalist, maxlen=max_length, padding='pre')
X = sequences[:, :-1]
y = sequences[:, -1]
y = to_categorical(y, num_classes=vocab_size)
seq_length = X.shape[1]

print(X.shape)

(11187, 19)


In [None]:
# create the model
model = Sequential()
model.add(Embedding(vocab_size, 50, input_length=seq_length))
model.add(LSTM(100, return_sequences=True))
model.add(LSTM(100))
model.add(Dense(100, activation='relu'))
model.add(Dense(vocab_size, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# train the model
model.fit(X, y, batch_size=32, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7efffb211670>

In [None]:
# nursery rhyme generation
def generate_nursery_rhyme(seed_text):
    num_words = 20
    num_lines = 30
    for i in range(num_lines):
        text = []
        for _ in range(num_words):
            encoded = token.texts_to_sequences([seed_text])
            encoded = pad_sequences(encoded, maxlen=seq_length, padding='pre')

            y_pred = np.argmax(model.predict(encoded), axis=-1)

            predicted_word = ""
            for word, index in token.word_index.items():
                if index == y_pred:
                    predicted_word = word
                    break

            seed_text = seed_text + ' ' + predicted_word
            text.append(predicted_word)

        seed_text = text[-1]
        text = ' '.join(text)
        print(text)

In [None]:
# generate nursery rhyme
seed_text = "sleep tight"
generate_nursery_rhyme(seed_text)

baby and the little boy and a heigh and a heigh that jack you met a little waddle gammon and
the little boy and the little waddle leave the town began to the little waddle leave the town e i
met a little guinea which he haw not very girls you you suppose you do you do you a pie
sing saddle think a little man and he came to the little jolly bouncing and he came to the little
boy and the sun's dog may grind the crooked snooks won't make a little waddle quoth and the the waddle
goes the little man and he came to the little jolly bouncing and he came to the little waddle leave
a gay and sew a little louder behind a ting and a heigh and a heigh that jack you met
up a silver of the heigh and a heigh and a heigh that jack you met a little waddle leave
a gay and sew a little louder behind a ting and a heigh and a heigh that jack you met
up a silver of the heigh and a heigh and a heigh that jack you met a little waddle leave
a gay and sew a little louder behind a ting and a heigh and a heigh th