In [3]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
import numpy as np

data = "Deep learning is amazing. Deep learning builds intelligent systems."

data = data.lower().replace(".", "").replace(",", "")

tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1

words = data.split()
sequences = []
for i in range(1, len(words)):
    seq = words[:i+1]
    sequences.append(' '.join(seq))

encoded = tokenizer.texts_to_sequences(sequences)
max_len = max(len(seq) for seq in encoded)

X = np.array([seq[:-1] for seq in pad_sequences(encoded, maxlen=max_len)])
y = to_categorical(
    [seq[-1] for seq in pad_sequences(encoded, maxlen=max_len)],
    num_classes=vocab_size
)

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=10, input_length=max_len-1),
    SimpleRNN(50),
    Dense(vocab_size, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X, y, epochs=200, verbose=0)

def generate_next_word(seed_text):
    seed_text = seed_text.lower().replace(".", "").replace(",", "")
    tokens = tokenizer.texts_to_sequences([seed_text])[0]
    tokens = pad_sequences([tokens], maxlen=max_len-1)
    predicted_index = np.argmax(model.predict(tokens, verbose=0))
    for word, index in word_index.items():
        if index == predicted_index:
            return word
    return "[unknown]"

seed = "deep learning"
next_word = generate_next_word(seed)
print(f"Input: '{seed}' → Predicted next word: '{next_word}'")




Input: 'deep learning' → Predicted next word: 'is'
