**IMPORTING REQUIRED LIBRARIES**

In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
import pickle
import os
import numpy as np

**UPLOADING TEXT FILE**

In [4]:
from google.colab import files
uploaded = files.upload()

Saving pizza.txt to pizza.txt


**STORING THE FILE CONTENTS INTO A LIST**
**AND PREPROCESSING**



In [5]:
file = open("pizza.txt","r",encoding="utf8")

lines=[]
for i in file:
  lines.append(i)

#converting list to string
data = ''
for i in lines:
  data = ' '.join(lines)

#replacing unnecessary characters with space
data = data.replace('\n','').replace("\r",'').replace('\ufeff','').replace('"','').replace('*','').replace(',','')

#removing unnecessary spaces
data = data.split()
data = ' '.join(data)
print(data[:500])
data[:500]
print('Length: ',len(data))

Pizza the delectable and iconic dish that has transcended borders and captivated taste buds worldwide is a testament to the extraordinary fusion of flavors creativity and cultural significance. Originating from the sun-kissed lands of Italy pizza has evolved into an art form that unites people from diverse backgrounds in a shared love for its mouthwatering combinations. Its history stretches back centuries with roots tracing back to ancient civilizations like the Greeks Romans and Egyptians who 
Length:  10823


**CREATING TOKENIZER OBJECT AND FITTING TO "data" VARIABLE.**





In [6]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])

#saving the tokenizer for predict function
pickle.dump(tokenizer,open('token.pkl','wb'))

sequence_data = tokenizer.texts_to_sequences([data])[0]
sequence_data[:15]
print("Length: ",len(sequence_data))

Length:  1707


***Calculating the size of the vocabulary that the tokenizer object has learned. ***

In [7]:
vocab_size = len(tokenizer.word_index)+1
print(vocab_size)

687


In [8]:
sequences = []
for i in range(3,len(sequence_data)):
  words = sequence_data[i-3:i+1]
  sequences.append(words)

print("Length of sequences: ",len(sequences))
sequences = np.array(sequences)
sequences[:10]

Length of sequences:  1704


array([[  3,   1, 233,   2],
       [  1, 233,   2,  45],
       [233,   2,  45,  35],
       [  2,  45,  35,  13],
       [ 45,  35,  13,   8],
       [ 35,  13,   8, 114],
       [ 13,   8, 114, 234],
       [  8, 114, 234,   2],
       [114, 234,   2, 235],
       [234,   2, 235,  74]])

In [9]:
x=[]
y=[]
for i in sequences:
  x.append(i[0:3])
  y.append(i[3])

#x data acts as input which is used for prediction.
x = np.array(x)
#y is response data which is predicted based on x
y = np.array(y)

In [10]:
print("Data: \n",x[:10])
print("Response: \n",y[:10])

Data: 
 [[  3   1 233]
 [  1 233   2]
 [233   2  45]
 [  2  45  35]
 [ 45  35  13]
 [ 35  13   8]
 [ 13   8 114]
 [  8 114 234]
 [114 234   2]
 [234   2 235]]
Response: 
 [  2  45  35  13   8 114 234   2 235  74]


In [11]:
y = to_categorical(y, num_classes=vocab_size)
y[:5]

array([[0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [12]:
model = Sequential()
model.add(Embedding(vocab_size,10,input_length=3))
model.add(LSTM(1000,return_sequences=True))
model.add(LSTM(1000))
model.add(Dense(1000,activation="relu"))
model.add(Dense(vocab_size,activation="softmax"))

In [13]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 10)             6870      
                                                                 
 lstm (LSTM)                 (None, 3, 1000)           4044000   
                                                                 
 lstm_1 (LSTM)               (None, 1000)              8004000   
                                                                 
 dense (Dense)               (None, 1000)              1001000   
                                                                 
 dense_1 (Dense)             (None, 687)               687687    
                                                                 
Total params: 13743557 (52.43 MB)
Trainable params: 13743557 (52.43 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [14]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint("next_words.h5",monitor="loss",verbose=1,save_best_only=True)
model.compile(loss="categorical_crossentropy",optimizer=Adam(learning_rate=0.001))
model.fit(x,y,epochs=30,batch_size=64,callbacks=[checkpoint])

Epoch 1/30
Epoch 1: loss improved from inf to 6.24007, saving model to next_words.h5


  saving_api.save_model(


Epoch 2/30
Epoch 2: loss improved from 6.24007 to 5.81841, saving model to next_words.h5
Epoch 3/30
Epoch 3: loss improved from 5.81841 to 5.68987, saving model to next_words.h5
Epoch 4/30
Epoch 4: loss improved from 5.68987 to 5.60726, saving model to next_words.h5
Epoch 5/30
Epoch 5: loss improved from 5.60726 to 5.54234, saving model to next_words.h5
Epoch 6/30
Epoch 6: loss improved from 5.54234 to 5.47359, saving model to next_words.h5
Epoch 7/30
Epoch 7: loss improved from 5.47359 to 5.38967, saving model to next_words.h5
Epoch 8/30
Epoch 8: loss improved from 5.38967 to 5.31834, saving model to next_words.h5
Epoch 9/30
Epoch 9: loss improved from 5.31834 to 5.26205, saving model to next_words.h5
Epoch 10/30
Epoch 10: loss improved from 5.26205 to 5.21231, saving model to next_words.h5
Epoch 11/30
Epoch 11: loss improved from 5.21231 to 5.15654, saving model to next_words.h5
Epoch 12/30
Epoch 12: loss improved from 5.15654 to 5.09076, saving model to next_words.h5
Epoch 13/30
Epo

<keras.src.callbacks.History at 0x7bcdcdba7340>

In [15]:
from tensorflow.keras.models import load_model


#load the model and tokenizer
model = load_model('next_words.h5')
tokenizer = pickle.load(open('token.pkl','rb'))

def Predict_Next_Words(model,tokenizer,text):
  sequence = tokenizer.texts_to_sequences([text])
  sequence = np.array(sequence)
  preds = np.argmax(model.predict(sequence))
  predict_word = ""

  for key,value in tokenizer.word_index.items():
    if value == preds:
      predicted_word = key
      break

  print(predicted_word)
  return predicted_word

In [None]:
while (True):
  text = input("Enter your line: ")

  if text=="0":
    print("Execution Terminated...")
    break
  else:
    try:
      text = text.split(" ")
      text = text[-3:]
      print(text)
      Predict_Next_Words(model,tokenizer,text)
    except Exception as e:
      print("Error Occured: ",e)
      continue

Enter your line: Pizza has woven 
['has', 'woven', '']
flavors
Enter your line: pizza has transcended
['pizza', 'has', 'transcended']
to
