<a href="https://colab.research.google.com/github/tharun-0-0-6/sdc/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#LSTM Real-Life Example: Next Word Prediction from Sentence Start
# Step 1: Import libraries
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Step 2: Sample dataset
sentences = [
    "I like to eat pizza",
    "I like to play football",
    "I enjoy watching movies",
    "I love coding in python",
    "I like to read books",
    "I enjoy drinking coffee",
    "I like to go jogging",
    "I love eating burgers"
]

# Step 3: Tokenize
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
total_words = len(tokenizer.word_index) + 1

# Step 4: Create input sequences
input_sequences = []
for line in sentences:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_seq = token_list[:i+1]
        input_sequences.append(n_gram_seq)

# Pad sequences
max_len = max(len(x) for x in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='pre')

# Split X and y
X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = np.eye(total_words)[y]  # One-hot encode

# Step 5: Build LSTM model
model = Sequential([
    Embedding(total_words, 10, input_length=max_len - 1),
    LSTM(64),
    Dense(total_words, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 6: Train (1 pass only, no manual epochs)
model.fit(X, y, verbose=1)

# ---------------- USER INPUT SECTION ----------------

input_text = input("\n💬 Enter a starting phrase (e.g., 'I like to'): ").strip().lower()
token_list = tokenizer.texts_to_sequences([input_text])[0]
token_list = pad_sequences([token_list], maxlen=max_len-1, padding='pre')

# Predict next word
predicted = model.predict(token_list, verbose=0)
predicted_index = np.argmax(predicted)

# Retrieve predicted word
for word, index in tokenizer.word_index.items():
    if index == predicted_index:
        print(f"\n🧠 Predicted Next Word: '{word}'")
        break




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0345 - loss: 3.1360

💬 Enter a starting phrase (e.g., 'I like to'): i like to

🧠 Predicted Next Word: 'love'
