In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [None]:
text = """
i love ai
i love deep learning
i love machine learning
i love neural networks
machine learning is fun
ai is fun
learning ai is important
i build ai projects
ai can learn patterns
deep ai is powerful
ai will change the world
practice ai every day
models help ai learn
ai needs good data
understanding ai is key
training ai takes time
optimizers improve ai
ai can solve problems
ai improves with practice
evaluation of ai matters
data makes ai better
ai helps humans
building ai is enjoyable
i enjoy learning ai
"""

In [None]:
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts([text])
print("Word index: ", tokenizer.word_index)

In [None]:
sequences = []

for line in text.strip().split("\n"):
  token_list = tokenizer.texts_to_sequences([line])[0]
  for i in range(1, len(token_list)):
    sequences.append(token_list[:i+1])

max_len = max([len(x) for x in sequences])
sequences = pad_sequences(sequences, maxlen=max_len, padding="pre")
print("Sequences: ", sequences)

In [None]:
X = sequences[:, :-1]
y = sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=len(tokenizer.word_index)+1)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.word_index)+1, 20),
    tf.keras.layers.GRU(64),
    tf.keras.layers.Dense(len(tokenizer.word_index)+1, activation="softmax"),
])

In [None]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
model.fit(X, y, epochs=100, verbose=0)

In [None]:
def generate_words(seed_text, next_words):
  for _ in range(next_words):
    seed_token_list = tokenizer.texts_to_sequences([seed_text])[0]
    seed_token_list = pad_sequences([seed_token_list], maxlen=max_len-1, padding="pre")

    next_word_index = np.argmax(model.predict(seed_token_list, verbose=0), axis=-1)
    next_word = tokenizer.index_word[next_word_index[0]]
    seed_text += " " + next_word
  return seed_text

In [None]:
def generate_words_top3(seed_text, next_words):
  for _ in range(next_words):
    seed_token_list = tokenizer.texts_to_sequences([seed_text])[0]
    seed_token_list = pad_sequences([seed_token_list], maxlen=max_len-1, padding="pre")

    predicts = model.predict(seed_token_list, verbose=0)[0]
    top_words_indices = np.argsort(predicts)[-3:][::-1]

    next_word_index = np.random.choice(top_words_indices)
    next_word = tokenizer.index_word[next_word_index]

    seed_text += " " + next_word
  return seed_text

In [None]:
print(generate_words_top3("i love", 6))