In [1]:
import numpy as np
import tensorflow as tf
import keras
from keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.utils import to_categorical

In [2]:
sentences=[
    "i love machine learning",
    "machine learing is amzing",
    "deep learning is a subset of machine learning",
    "i enjoy leaning new things",
    "AI can perfrom many tasks",
]

In [3]:
my_tokenizer=Tokenizer()
my_tokenizer.fit_on_texts(sentences)

In [4]:
my_tokenizer.word_index

{'machine': 1,
 'learning': 2,
 'i': 3,
 'is': 4,
 'love': 5,
 'learing': 6,
 'amzing': 7,
 'deep': 8,
 'a': 9,
 'subset': 10,
 'of': 11,
 'enjoy': 12,
 'leaning': 13,
 'new': 14,
 'things': 15,
 'ai': 16,
 'can': 17,
 'perfrom': 18,
 'many': 19,
 'tasks': 20}

In [5]:
total_words=len(my_tokenizer.word_index)+1
total_words

21

In [6]:
input_sequence=[]
for i in sentences:
   # texts_to_sequences expects a list of texts (e.g., ['my sentence']).
   # It returns a list of lists (e.g., [[token_id1, token_id2]]), so we take the [0] element
   # to get the flat list of token IDs for the current sentence.
   token_list=my_tokenizer.texts_to_sequences([i])[0]
   # Generate N-gram sequences from the flat token_list
   for j in range(1,len(token_list)):
    n_gram_sequence=token_list[:j+1]
    input_sequence.append(n_gram_sequence)

In [7]:
input_sequence

[[3, 5],
 [3, 5, 1],
 [3, 5, 1, 2],
 [1, 6],
 [1, 6, 4],
 [1, 6, 4, 7],
 [8, 2],
 [8, 2, 4],
 [8, 2, 4, 9],
 [8, 2, 4, 9, 10],
 [8, 2, 4, 9, 10, 11],
 [8, 2, 4, 9, 10, 11, 1],
 [8, 2, 4, 9, 10, 11, 1, 2],
 [3, 12],
 [3, 12, 13],
 [3, 12, 13, 14],
 [3, 12, 13, 14, 15],
 [16, 17],
 [16, 17, 18],
 [16, 17, 18, 19],
 [16, 17, 18, 19, 20]]

In [8]:
input_sequence

[[3, 5],
 [3, 5, 1],
 [3, 5, 1, 2],
 [1, 6],
 [1, 6, 4],
 [1, 6, 4, 7],
 [8, 2],
 [8, 2, 4],
 [8, 2, 4, 9],
 [8, 2, 4, 9, 10],
 [8, 2, 4, 9, 10, 11],
 [8, 2, 4, 9, 10, 11, 1],
 [8, 2, 4, 9, 10, 11, 1, 2],
 [3, 12],
 [3, 12, 13],
 [3, 12, 13, 14],
 [3, 12, 13, 14, 15],
 [16, 17],
 [16, 17, 18],
 [16, 17, 18, 19],
 [16, 17, 18, 19, 20]]

In [9]:
max_sequence_len=max([len(x) for x in input_sequence])
print(max_sequence_len)
input_sequence=np.array(pad_sequences(input_sequence, maxlen=max_sequence_len, padding="pre"))

8


In [10]:
input_sequence

array([[ 0,  0,  0,  0,  0,  0,  3,  5],
       [ 0,  0,  0,  0,  0,  3,  5,  1],
       [ 0,  0,  0,  0,  3,  5,  1,  2],
       [ 0,  0,  0,  0,  0,  0,  1,  6],
       [ 0,  0,  0,  0,  0,  1,  6,  4],
       [ 0,  0,  0,  0,  1,  6,  4,  7],
       [ 0,  0,  0,  0,  0,  0,  8,  2],
       [ 0,  0,  0,  0,  0,  8,  2,  4],
       [ 0,  0,  0,  0,  8,  2,  4,  9],
       [ 0,  0,  0,  8,  2,  4,  9, 10],
       [ 0,  0,  8,  2,  4,  9, 10, 11],
       [ 0,  8,  2,  4,  9, 10, 11,  1],
       [ 8,  2,  4,  9, 10, 11,  1,  2],
       [ 0,  0,  0,  0,  0,  0,  3, 12],
       [ 0,  0,  0,  0,  0,  3, 12, 13],
       [ 0,  0,  0,  0,  3, 12, 13, 14],
       [ 0,  0,  0,  3, 12, 13, 14, 15],
       [ 0,  0,  0,  0,  0,  0, 16, 17],
       [ 0,  0,  0,  0,  0, 16, 17, 18],
       [ 0,  0,  0,  0, 16, 17, 18, 19],
       [ 0,  0,  0, 16, 17, 18, 19, 20]], dtype=int32)

In [11]:
input_data=input_sequence[:,:-1]
output=input_sequence[:,-1]

In [12]:
print(input_data[0])
print(output[0])

[0 0 0 0 0 0 3]
5


In [13]:
output_c=to_categorical(output,num_classes=total_words)
output_c[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0.])

In [14]:
RNN=keras.Sequential([
    layers.Embedding(total_words,100),
    layers.SimpleRNN(150),
    layers.Dense(total_words,activation='softmax')
])

In [15]:
RNN.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [16]:
RNN.fit(input_data,output_c,epochs=100,verbose=1)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.0952 - loss: 3.0341
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.2381 - loss: 2.9606
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.3810 - loss: 2.8878
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.3333 - loss: 2.8133
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.2857 - loss: 2.7363
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.2857 - loss: 2.6572
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - accuracy: 0.2857 - loss: 2.5775
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.2857 - loss: 2.4982
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x788018891100>

In [19]:
send_text=__builtins__.input("Enter any text:")
token_list=my_tokenizer.texts_to_sequences([send_text])[0]
token_list=pad_sequences([token_list],maxlen=max_sequence_len-1,padding='pre')
predicted_probabilities=RNN.predict(token_list, verbose=0)
predicted=np.argmax(predicted_probabilities,axis=1)[0]

Enter any text:AI can perfrom many tasks


In [20]:
print(predicted)

10


In [21]:
predicted_word=""
for word,index in my_tokenizer.word_index.items():
  if index==predicted:
    predicted_word=word
    break
print(f"The next word is :{predicted_word}")

The next word is :subset
