<a href="https://colab.research.google.com/github/tharun-0-0-6/sdc/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#RNN Real-Life Example: Predict the Next Word in a Sentence
# Step 1: Import libraries
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Embedding, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Step 2: Create some sample real-life-like sentences
sentences = [
    "I like to eat pizza",
    "I like to play football",
    "I enjoy watching movies",
    "I love coding in python",
    "I like to read books",
    "I enjoy drinking coffee",
    "I like to go jogging",
    "I love eating burgers"
]

# Step 3: Tokenize the sentences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
total_words = len(tokenizer.word_index) + 1

# Create sequences (e.g., "I like to" → "eat")
input_sequences = []
for line in sentences:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram = token_list[:i+1]
        input_sequences.append(n_gram)

# Pad sequences and split input/label
max_len = max(len(seq) for seq in input_sequences)
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_len, padding='pre'))

X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = np.eye(total_words)[y]  # One-hot encode the output

# Step 4: Define RNN model
model = Sequential([
    Embedding(total_words, 10, input_length=max_len - 1),
    SimpleRNN(64),
    Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 5: Train quickly (no epochs set, just default 1 pass)
model.fit(X, y, verbose=1)

# ---------------- USER INPUT SECTION ----------------

input_text = input("\n💬 Enter a starting phrase (e.g., 'I like to'): ").strip().lower()
token_list = tokenizer.texts_to_sequences([input_text])[0]
token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre')

# Predict next word
predicted = model.predict(token_list, verbose=0)
predicted_index = np.argmax(predicted)

# Find the word from index
for word, index in tokenizer.word_index.items():
    if index == predicted_index:
        print(f"\n🧠 Predicted Next Word: '{word}'")
        break




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0345 - loss: 3.1339

💬 Enter a starting phrase (e.g., 'I like to'): I

🧠 Predicted Next Word: 'coding'
