In [12]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model

In [13]:
# Word index 
word_index = imdb.get_word_index()
reverse_word_index = {value:key for key,value in word_index.items()}

In [14]:
model = load_model('simple_rnn_imdb.h5')
model.summary()



In [15]:
model.weights

[<Variable path=sequential/embedding/embeddings, shape=(1000, 256), dtype=float32, value=[[ 0.05888874 -0.04441324  0.01000248 ... -0.07152284  0.03158737
   -0.04884933]
  [-0.03052714 -0.00756482  0.00105636 ...  0.03473257  0.01622052
   -0.04564217]
  [-0.01885738  0.03390862 -0.05103505 ...  0.00576836  0.01366715
   -0.00826852]
  ...
  [-0.03948885  0.02295834  0.02294054 ... -0.00864452 -0.05552452
    0.00255836]
  [-0.00275307  0.01587846 -0.0468618  ...  0.03357592 -0.01088676
   -0.02630242]
  [-0.01563612  0.03885478 -0.04275705 ... -0.03389168 -0.02406293
   -0.01622603]]>,
 <Variable path=sequential/simple_rnn/simple_rnn_cell/kernel, shape=(256, 256), dtype=float32, value=[[ 0.10827423 -0.02845699 -0.02425099 ... -0.00583292 -0.03156288
    0.00824131]
  [ 0.01622746  0.01183468 -0.04113931 ... -0.08821131 -0.09240621
   -0.00291857]
  [-0.05286611 -0.05780099  0.06871456 ...  0.10630649 -0.08926094
    0.03144714]
  ...
  [ 0.09769413  0.01113985 -0.05696069 ...  0.1082

In [16]:
# Helper functions
Max_Vocab_size =1000
def decoder_review (encoded_review):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in encoded_review])

def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_index.get(word,2)+3
                      if word_index.get(word,2)+3<Max_Vocab_size else 2 for word in words]
    paddded_review = sequence.pad_sequences([encoded_review],maxlen=500)
    return paddded_review

- words = text.lower().split()
"This movie is great" → ["this", "movie", "is", "great"]

- encoded_review = [word_index.get(word,2)+3 for word in words]
word_index -> A dictionary that maps words to unique integer IDs.
Example:
{"movie": 1, "great": 2, "bad": 3}
word_index.get(word, 2)
Looks up the integer index of word.
If the word is not found, it returns 2 (default value).
Typically, this default represents an out-of-vocabulary (OOV) token.
+3 Shifts all word indices by 3.
This is commonly done to reserve the first few indices for special tokens, such as:
0 → padding
1 → start of sequence
2 → unknown word
By adding 3, actual vocabulary indices start from 3 onward.

- paddded_review = sequence.pad_sequences([encoded_review],maxlen=500)
A utility function (typically from keras.preprocessing.sequence) that ensures all sequences have the same length.
Wraps the list in another list because pad_sequences expects a batch of sequences, not a single sequence.
If the sequence has fewer than 500 tokens, it is left-padded with zeros.
If it has more than 500 tokens, it is truncated (usually from the beginning by default).
Result:
paddded_review is a 2D NumPy array with shape (1, 500).
This fixed-length format is required by most neural network models.

We created a loop for OOV words, if index more than 1000, with which our model trained, we can simply place them to index 2 which is default value for OOV words

In [17]:
# Prediction function
def predict_sentiment(review):
    preprocessed_input=preprocess_text (review)
    
    prediction = model.predict(preprocessed_input)
    
    sentiment = 'Postive' if prediction[0][0] > 0.4 else 'Negative'
    
    return sentiment, prediction[0][0]
    

In [18]:
# Testing
example_review = 'The actor was amazing and actress was very pretty'
sentiment,prediction=predict_sentiment(example_review)
print(sentiment,prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
Postive 0.7123227
