### Collect data 

In [15]:
sentences = [
    "The cat sat on the",
    "The cat sat on the table",
    "The dog barked at the",
    "The dog barked at the mouse"
]


### Prepare the Data 

In [16]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)

sequences = tokenizer.texts_to_sequences(sentences)

X, y = [], []
for sequence in sequences:
    for i in range(1, len(sequence)):
        X.append(sequence[:i])
        y.append(sequence[i])

max_sequence_length = max(len(x) for x in X)
X = pad_sequences(X, maxlen=max_sequence_length)
y = np.array(y) - 1  


### Building Model 

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=10, input_length=max_sequence_length))
model.add(SimpleRNN(units=50))
model.add(Dense(len(tokenizer.word_index), activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


### Train the Model 

In [18]:
model.fit(X, y, epochs=50, verbose=1)


Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0000e+00 - loss: 2.1936
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - accuracy: 0.1111 - loss: 2.1789
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step - accuracy: 0.1111 - loss: 2.1641
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.2222 - loss: 2.1490
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.2222 - loss: 2.1335
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - accuracy: 0.2222 - loss: 2.1175
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.2222 - loss: 2.1007
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - accuracy: 0.2222 - loss: 2.0832
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

<keras.src.callbacks.history.History at 0x2db5c001b80>

### Generated the Text 

In [19]:
def predict_next_word(model, tokenizer, text):
    sequence = tokenizer.texts_to_sequences([text])[0]
    sequence = pad_sequences([sequence], maxlen=max_sequence_length)
    prediction = model.predict(sequence, verbose=0)
    predicted_word_index = np.argmax(prediction)
    predicted_word = tokenizer.index_word.get(predicted_word_index + 1, "unknown")
    return predicted_word

# Example usage
input_text = "The dog barked at the"
next_word = predict_next_word(model, tokenizer, input_text)
print(f"Next word prediction: {next_word}")


Next word prediction: table
