In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

import string
from tensorflow.keras.layers import SimpleRNN,Dense,Embedding
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence




In [2]:
## load imdb datasets

max_features=10000 #vocab size
(X_train,y_train),(X_test,y_test)=imdb.load_data(num_words=max_features)

print(f"X_train shape:{X_train.shape},y_train shape:{y_train.shape}")
print(f"X_test shape:{X_test.shape},y_test shape:{y_test.shape}")

X_train shape:(25000,),y_train shape:(25000,)
X_test shape:(25000,),y_test shape:(25000,)


In [3]:
word_index=imdb.get_word_index()
reverse_word_idx={value:key for key,value in word_index.items()}

In [4]:
model=load_model('simpleRNN.h5')
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, None, 128)         1280000   
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 128)               32896     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1,313,025
Trainable params: 1,313,025
Non-trainable params: 0
_________________________________________________________________


In [5]:
model.get_weights()

[array([[ 0.08432928, -0.00850521, -0.0163107 , ..., -0.01255569,
          0.05199545, -0.07474913],
        [ 0.00529887, -0.00595103,  0.04377968, ..., -0.00782422,
         -0.02508279, -0.05335869],
        [ 0.03100554,  0.02180002, -0.02006264, ..., -0.00444644,
          0.02150405,  0.00736132],
        ...,
        [-0.05059847, -0.03139113, -0.00849327, ...,  0.03443751,
          0.07184823, -0.05871859],
        [-0.06502292, -0.02094806, -0.01889172, ..., -0.08494218,
         -0.00740682, -0.00208867],
        [-0.02692566,  0.06272104,  0.02601198, ...,  0.0534294 ,
          0.0369684 ,  0.02736332]], dtype=float32),
 array([[-0.14796004,  0.02011113, -0.14641397, ...,  0.01397226,
          0.11391848, -0.00547336],
        [-0.13988473, -0.10025615,  0.12109255, ...,  0.04874684,
          0.00410993,  0.0705054 ],
        [-0.1304452 ,  0.14605968,  0.1549289 , ..., -0.06360789,
         -0.0357086 ,  0.02052478],
        ...,
        [-0.00528872,  0.03239422,  0.0

In [6]:
def decode_review(encoded_review):
    return " ".join(
    [reverse_word_idx.get(i-3,'?') 
     for i in encoded_review])

def preprocess_text(text):
    text = text.lower().translate(str.maketrans('', '', string.punctuation))
    words=text.lower().split()
    
    encoded_review = [word_index.get(word, 2) + 3 if word_index.get(word, 2) < max_features else 2 for word in words]
    padded_review=sequence.pad_sequences([encoded_review],maxlen=500)
    return padded_review

In [7]:
## prediction function

def predict_sentiment(review):
    preprocessed_ip=preprocess_text(review)
    prediction=model.predict(preprocessed_ip)
    sentiment='Positive' if prediction[0][0]>0.5 else 'Negative'
    return sentiment,prediction[0][0]

In [8]:
#example review  for prediction
example="I absolutely loved this movie! The storyline was captivating, and the actors gave outstanding performances."

In [12]:
preprocess_text(example)

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0, 

In [9]:
sentiment,score=predict_sentiment(example)
print(f"Review:{example}\nSentiment:{sentiment}\nPrediction Score:{score}")

Review:I absolutely loved this movie! The storyline was captivating, and the actors gave outstanding performances.
Sentiment:Positive
Prediction Score:0.809910237789154


In [10]:
encoded_review=[word_index.get(word,2)+3 for word in example]

In [11]:
decode_review(encoded_review)

'and and a b s o l u t e l y and l o v e d and t h i s and m o v i e and and and h e and s t o r y l i n e and w a s and c a p t i v a t i n g and and a n d and t h e and a c t o r s and g a v e and o u t s t a n d i n g and p e r f o r m a n c e s and'