### Importing all the necessary libraries:

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
import pickle
import numpy as np

### Reading the file:

In [2]:
file = open("textfile.txt", "r", encoding = "utf8")

lines = []
for i in file:
    lines.append(i)

data1 = ""
for i in lines:
  data1 = ' '. join(lines) 

data1 = data1.replace('\n', '').replace('\r', '').replace('\ufeff', '').replace('“','').replace('”','')  
 
data1 = data1.split()
data1 = ' '.join(data1)
data1= data1[:1000]
data1

'One morning, when Gregor Samsa woke from troubled dreams, he found himself transformed in his bed into a horrible vermin. He lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightly domed and divided by arches into stiff sections. The bedding was hardly able to cover it and seemed ready to slide off any moment. His many legs, pitifully thin compared with the size of the rest of him, waved about helplessly as he looked. "What\'s happened to me?" he thought. It wasn\'t a dream. His room, a proper human room although a little too small, lay peacefully between its four familiar walls. A collection of textile samples lay spread out on the table - Samsa was a travelling salesman - and above it there hung a picture that he had recently cut out of an illustrated magazine and housed in a nice, gilded frame. It showed a lady fitted out with a fur hat and fur boa who sat upright, raising a heavy fur muff that covered the whole of her lower arm toward

In [3]:
len(data1)

1000

### Tokenization:

In [4]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data1])

pickle.dump(tokenizer, open('tokennizer.pkl', 'wb')) #saving it

sequence_data1 = tokenizer.texts_to_sequences([data1])[0]
sequence_data1[:10]

[21, 22, 23, 24, 12, 25, 26, 27, 28, 2]

In [5]:
vocab_size = len(tokenizer.word_index) + 1
print(vocab_size)

131


In [6]:
len(sequence_data1)

185

In [7]:
sequences = []

for i in range(3, len(sequence_data1)):
    words = sequence_data1[i-3:i+1]
    sequences.append(words)
    
print("Length of sequences: ", len(sequences))
sequences = np.array(sequences)
sequences[:10]

Length of sequences:  182


array([[21, 22, 23, 24],
       [22, 23, 24, 12],
       [23, 24, 12, 25],
       [24, 12, 25, 26],
       [12, 25, 26, 27],
       [25, 26, 27, 28],
       [26, 27, 28,  2],
       [27, 28,  2, 29],
       [28,  2, 29, 30],
       [ 2, 29, 30, 31]])

In [8]:
X = []
Y = []

for i in sequences:
    X.append(i[0:3])
    Y.append(i[3])
    
X = np.array(X)
Y = np.array(Y)

In [9]:
print("Data1: ", X[:10])
print("Responses: ", Y[:10])

Data1:  [[21 22 23]
 [22 23 24]
 [23 24 12]
 [24 12 25]
 [12 25 26]
 [25 26 27]
 [26 27 28]
 [27 28  2]
 [28  2 29]
 [ 2 29 30]]
Responses:  [24 12 25 26 27 28  2 29 30 31]


In [10]:
Y = to_categorical(Y, num_classes=vocab_size)
Y[:5]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0.,

### Model Creation:

In [11]:
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=3))
model.add(LSTM(1000, return_sequences=True))
model.add(LSTM(1000))
model.add(Dense(1000, activation="relu"))
model.add(Dense(vocab_size, activation="softmax"))

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 10)             1310      
                                                                 
 lstm (LSTM)                 (None, 3, 1000)           4044000   
                                                                 
 lstm_1 (LSTM)               (None, 1000)              8004000   
                                                                 
 dense (Dense)               (None, 1000)              1001000   
                                                                 
 dense_1 (Dense)             (None, 131)               131131    
                                                                 
Total params: 13181441 (50.28 MB)
Trainable params: 13181441 (50.28 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=0.001))
model.fit(X, Y, epochs=70, batch_size=64)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


<keras.src.callbacks.History at 0x170e550e650>

In [14]:
""" We are using the tokenizer and models trained and we are creating the sequence of the text entered and thenusing our model 
    to predict and return the the predicted word. 
"""

def Predict_Next_Words(model, tokenizer, text):

  sequence = tokenizer.texts_to_sequences([text])
  sequence = np.array(sequence)
  preds = np.argmax(model.predict(sequence))
  predicted_word = ""
  
  for key, value in tokenizer.word_index.items():
      if value == preds:
          predicted_word = key
          break
  
  print(predicted_word)
  return predicted_word

In [18]:
# We will run the model until the user decides to stop the script.
# While the script is running, we try and check if the prediction can be made on the text. If no prediction can be made we just continue.

while(True):
  text = input("Enter your line: ")
  
  if text == "0":
      print("Execution completed.....")
      break
  
  else:
      try:
          text = text.split(" ")
          text = text[-3:]
          print(text)
        
          Predict_Next_Words(model, tokenizer, text)
          
      except Exception as e:
        print("Error occurred: ",e)
        continue

Enter your line:  0


Execution completed.....
