In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense


In [2]:
with open('/content/Sheet_1.csv', 'r', encoding='utf-8') as file:
    data = file.read().lower()


In [3]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
vocab_size = len(tokenizer.word_index) + 1
sequences = []

In [4]:
tokens = tokenizer.texts_to_sequences([data])[0]
for i in range(1, len(tokens)):
    seq = tokens[:i+1]
    sequences.append(seq)

In [5]:
max_sequence_len = max([len(seq) for seq in sequences])
sequences = np.array(tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_sequence_len, padding='pre'))

In [6]:
# Features and Labels
X, y = sequences[:, :-1], sequences[:, -1]
y = to_categorical(y, num_classes=vocab_size)

In [7]:
# Build the Model
model = Sequential([
    Embedding(vocab_size, 50, input_length=max_sequence_len-1),
    LSTM(150, return_sequences=False),
    Dense(vocab_size, activation='softmax')
])



In [8]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


In [9]:
# Train the Model
model.fit(X, y, epochs=20, verbose=2)

Epoch 1/20
88/88 - 391s - 4s/step - accuracy: 0.0327 - loss: 6.0842
Epoch 2/20
88/88 - 444s - 5s/step - accuracy: 0.0355 - loss: 5.6747
Epoch 3/20
88/88 - 440s - 5s/step - accuracy: 0.0501 - loss: 5.5250
Epoch 4/20
88/88 - 445s - 5s/step - accuracy: 0.0664 - loss: 5.3746
Epoch 5/20
88/88 - 391s - 4s/step - accuracy: 0.0817 - loss: 5.2443
Epoch 6/20
88/88 - 443s - 5s/step - accuracy: 0.0927 - loss: 5.1400
Epoch 7/20
88/88 - 446s - 5s/step - accuracy: 0.1016 - loss: 5.0266
Epoch 8/20
88/88 - 434s - 5s/step - accuracy: 0.1083 - loss: 4.9189
Epoch 9/20
88/88 - 390s - 4s/step - accuracy: 0.1097 - loss: 4.8017
Epoch 10/20
88/88 - 390s - 4s/step - accuracy: 0.1243 - loss: 4.6867
Epoch 11/20
88/88 - 385s - 4s/step - accuracy: 0.1367 - loss: 4.5641
Epoch 12/20
88/88 - 447s - 5s/step - accuracy: 0.1442 - loss: 4.4341
Epoch 13/20
88/88 - 438s - 5s/step - accuracy: 0.1587 - loss: 4.3119
Epoch 14/20
88/88 - 445s - 5s/step - accuracy: 0.1729 - loss: 4.1826
Epoch 15/20
88/88 - 384s - 4s/step - accura

<keras.src.callbacks.history.History at 0x7b06f5202b60>

In [10]:
# Predict Function
def predict_next_word(seed_text, num_words=1):
    for _ in range(num_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = tf.keras.preprocessing.sequence.pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        next_word = tokenizer.index_word[np.argmax(predicted)]
        seed_text += " " + next_word
    return seed_text


In [13]:
seed_text = "I am friendly with"
next_words = predict_next_word(seed_text, num_words=3)
print("Generated Text:", next_words)

Generated Text: I am friendly with a same friend
