In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
ds = pd.read_csv("/content/qoute_dataset.csv")
quotes = ds['quote'].astype(str).str.lower().tolist()

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(quotes)
total_words = len(tokenizer.word_index) + 1
print("Total unique words:", total_words)

Total unique words: 8214


In [None]:
input_sequences = []
for line in quotes:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

max_seq_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre'))

X = input_sequences[:, :-1]
y = input_sequences[:, -1]

from tensorflow.keras.utils import to_categorical
y = to_categorical(y, num_classes=total_words)

print("Example X:", X[0])
print("Example y:", y[0])

Example X: [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

model = Sequential()
model.add(Embedding(input_dim=total_words, output_dim=64, input_length=max_seq_len-1))
model.add(LSTM(100, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()




In [None]:
history = model.fit(X, y, epochs=30, batch_size=128)


Epoch 1/30
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 87ms/step - accuracy: 0.0350 - loss: 7.0022
Epoch 2/30
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 90ms/step - accuracy: 0.0509 - loss: 6.3243
Epoch 3/30
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 92ms/step - accuracy: 0.0722 - loss: 6.0552
Epoch 4/30
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 92ms/step - accuracy: 0.0855 - loss: 5.8877
Epoch 5/30
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 91ms/step - accuracy: 0.1003 - loss: 5.7333
Epoch 6/30
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 91ms/step - accuracy: 0.1040 - loss: 5.6142
Epoch 7/30
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 91ms/step - accuracy: 0.1111 - loss: 5.4868
Epoch 8/30
[1m677/677[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 91ms/step - accuracy: 0.1172 - loss: 5.3905
Epoch 9/30
[1m677/677[

In [None]:
def predict_next_word(model, tokenizer, text_seq, max_seq_len):
    token_list = tokenizer.texts_to_sequences([text_seq])[0]
    token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
    predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)

    for word, index in tokenizer.word_index.items():
        if index == predicted:
            return word


In [None]:
seed_text = "once a king said that"
next_word = predict_next_word(model, tokenizer, seed_text, max_seq_len)
print(seed_text + " " + next_word)


once a king said that we


In [None]:
def generate_text(model, tokenizer, seed_text, next_words, max_seq_len):
    for _ in range(next_words):
        next_word = predict_next_word(model, tokenizer, seed_text, max_seq_len)
        seed_text += " " + next_word
    return seed_text

print(generate_text(model, tokenizer, "we are going to part just to", 10, max_seq_len))


we are going to part just to be happy by the world we are bound to be
