**CONSIDERING THE SENTENCE : 'India is ready to produce over five billion covid vaccine doses next year to help the world in the fight against the pandemic'**.

CONSTRUCT A RECCURENT NEURAL NETWORK (RNN) USING THE ABOVE SENTENCE AS TRAINING DATA AND EVALUATE ITS PERFORMANCE

In [1]:
## IMPORT NECESSARY LIBRARIES
from keras.preprocessing.text import Tokenizer
import numpy as np
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Embedding, Dense, SimpleRNN

In [2]:
## DATA PREPROCESSING
t = Tokenizer()

In [3]:
## DEFINING THE DATA
data = 'India is ready to produce over five billion covid vaccine doses next year to help the world in the fight against the pandemic'

In [4]:
 t.fit_on_texts([data])

In [5]:
wo_indx = t.word_index
print(wo_indx)

{'the': 1, 'to': 2, 'india': 3, 'is': 4, 'ready': 5, 'produce': 6, 'over': 7, 'five': 8, 'billion': 9, 'covid': 10, 'vaccine': 11, 'doses': 12, 'next': 13, 'year': 14, 'help': 15, 'world': 16, 'in': 17, 'fight': 18, 'against': 19, 'pandemic': 20}


In [6]:
## NUMERIC ENCODING OF THE DATA
encoded_data = t.texts_to_sequences([data])[0]   # produces the text data in sequence format
encoded_data

[3,
 4,
 5,
 2,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 2,
 15,
 1,
 16,
 17,
 1,
 18,
 19,
 1,
 20]

In [7]:
## CREATING THE FEATURE AND TARGET VALUES FOR PREDICTING A WORD WHEN A SINGLE WORD IS GIVEN
n = len(encoded_data)
dt_seq = list()
for i in range(1,n):
  seq = encoded_data[i-1:i+1]
  dt_seq.append(seq)

In [8]:
dt_seq

[[3, 4],
 [4, 5],
 [5, 2],
 [2, 6],
 [6, 7],
 [7, 8],
 [8, 9],
 [9, 10],
 [10, 11],
 [11, 12],
 [12, 13],
 [13, 14],
 [14, 2],
 [2, 15],
 [15, 1],
 [1, 16],
 [16, 17],
 [17, 1],
 [1, 18],
 [18, 19],
 [19, 1],
 [1, 20]]

In [9]:
 encoded_data[1:3]

[4, 5]

In [10]:
seq = np.array(dt_seq)
seq

array([[ 3,  4],
       [ 4,  5],
       [ 5,  2],
       [ 2,  6],
       [ 6,  7],
       [ 7,  8],
       [ 8,  9],
       [ 9, 10],
       [10, 11],
       [11, 12],
       [12, 13],
       [13, 14],
       [14,  2],
       [ 2, 15],
       [15,  1],
       [ 1, 16],
       [16, 17],
       [17,  1],
       [ 1, 18],
       [18, 19],
       [19,  1],
       [ 1, 20]])

In [11]:
## SEPARATING INTO FEATURES AND LABELS
X = seq[:, 0]
y = seq[:, 1]

In [12]:
y

array([ 4,  5,  2,  6,  7,  8,  9, 10, 11, 12, 13, 14,  2, 15,  1, 16, 17,
        1, 18, 19,  1, 20])

In [13]:
## CONVERTING y VALUES INTO CATEGORICAL
voc_size = len(t.word_index)+1
y_cat = to_categorical(y, num_classes = voc_size)
y_cat.shape

(22, 21)

In [14]:
y_cat

array([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
        0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0.,

In [15]:
## CONSTRUCTION OF RNN
model = Sequential()
model.add(Embedding(voc_size, 10, input_length = 1))
model.add(SimpleRNN(50))
model.add(Dense(voc_size, 'softmax'))
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 1, 10)             210       
                                                                 
 simple_rnn (SimpleRNN)      (None, 50)                3050      
                                                                 
 dense (Dense)               (None, 21)                1071      
                                                                 
Total params: 4331 (16.92 KB)
Trainable params: 4331 (16.92 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [16]:
## MODEL COMPILING
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [17]:
## MODEL TRAINING
model.fit(X, y_cat, epochs = 500)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.src.callbacks.History at 0x7b57c33cc3d0>

In [18]:
## MODEL PREDICTION
pred = np.argmax(model.predict(X), axis = 1)
pred



array([ 4,  5,  2, 15,  7,  8,  9, 10, 11, 12, 13, 14,  2, 15,  1, 18, 17,
        1, 18, 19,  1, 18])

In [19]:
## DECODING THE PREDICTED VALUES
word = list(wo_indx.keys())  # collecting the keys and storing it as a list
word

['the',
 'to',
 'india',
 'is',
 'ready',
 'produce',
 'over',
 'five',
 'billion',
 'covid',
 'vaccine',
 'doses',
 'next',
 'year',
 'help',
 'world',
 'in',
 'fight',
 'against',
 'pandemic']

In [20]:
pr_words = ''
for i in range(1, len(pred)+1):
  pr_words = pr_words+' '+word[pred[i-1]-1]
pr_words

' is ready to help over five billion covid vaccine doses next year to help the fight in the fight against the fight'

In [21]:
## OUT OF SAMPLE PREDICTION : WE SHALL PREDICT THE NEXT TERM WHEN A WORD SAY, 'FIVE' IS GIVEN FOR PREDICTION

print('Predicted word is', word  [np.argmax(model.predict (np.array([8]) )) -1])

Predicted word is billion
