In [25]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model

In [26]:
word_index = imdb.get_word_index()
reverse_word_index = {value : key for key, value in word_index.items()}

In [27]:
model = load_model('Simple_RNN_iMDB.h5')
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 500, 128)          1280000   
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
model.get_weights()

[array([[ 0.10638119,  0.03054436,  0.01856315, ..., -0.02670215,
         -0.05520982, -0.02278676],
        [ 0.07969575, -0.02072045,  0.02109982, ..., -0.01008854,
         -0.06855376, -0.02143191],
        [ 0.03953219, -0.06827509, -0.09729802, ..., -0.06044528,
         -0.02817632, -0.07017292],
        ...,
        [ 0.13084446,  0.00947882, -0.06953914, ...,  0.07871789,
          0.05126293,  0.00740908],
        [-0.01065067, -0.03457505,  0.13097683, ...,  0.03423046,
         -0.07449023, -0.0262621 ],
        [ 0.03950193,  0.14092702, -0.04468695, ...,  0.12350299,
         -0.01865359, -0.06716891]], dtype=float32),
 array([[-0.18966979, -0.09702833, -0.1426304 , ..., -0.10785474,
         -0.09604283, -0.13920325],
        [ 0.15307654, -0.09950901,  0.04252868, ...,  0.17229027,
         -0.05985273,  0.05417472],
        [-0.05346216, -0.12725954,  0.17280272, ...,  0.01653329,
         -0.07123855,  0.00797818],
        ...,
        [-0.1625687 , -0.07071172, -0.0

In [None]:
# Function to decode a review from its encoded (numeric) form back to human-readable words
def decode_review(encoded_review):
    return ' '.join([
        reverse_word_index.get(i - 3, '?')  # Subtracting 3 to adjust for reserved indices
        for i in encoded_review
    ])
    # The IMDB dataset reserves indices 0, 1, and 2 for special tokens
    # So we subtract 3 to get the correct word from the reverse index
    # If the word index is not found, replace with '?'



# Function to preprocess raw text input (e.g., user-provided review) into a format compatible with the model
def preprocess_text(text):
    # Convert the text to lowercase and split into words
    words = text.lower().split()
    
    # Encode each word using the IMDB word_index dictionary
    # Add 3 to each index to account for the reserved indices (0: padding, 1: start, 2: unknown)
    # If a word is not found, default to 2 ('unknown' token), then add 3
    encoded_review = [word_index.get(word, 2) + 3 for word in words]

    # Pad the encoded review to match the model's expected input length (500)
    # This ensures the review is in the correct shape (1 x 500)
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    
    return padded_review

In [None]:
# Function to predict the sentiment of a given text review
def predict_sentiment(review):
    # Step 1: Preprocess the input review to match the format expected by the model (padded sequence)
    preprocessed_input = preprocess_text(review)
    
    # Step 2: Use the trained model to predict sentiment
    # The model outputs a probability score between 0 and 1
    prediction = model.predict(preprocessed_input)
    
    # Step 3: Determine sentiment based on prediction threshold (0.5)
    # If score > 0.5 → Positive sentiment, else → Negative sentiment
    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    
    # Step 4: Return the predicted sentiment label and the actual probability score
    return sentiment, prediction[0][0]

In [None]:
# Define a sample review as a plain English sentence
sample_review = "I really loved the movie and how the plot evolved right throughout!"

# Use the predict_sentiment function to get the sentiment label and confidence score
sentiment, score = predict_sentiment(sample_review)

# Display the original review text
print(f'Review : {sample_review}')

# Display the predicted sentiment (Positive/Negative)
print(f'Sentiment : {sentiment}')

# Display the raw prediction score (probability value between 0 and 1)
# This shows how confident the model is about the prediction
print(f'Prediction Score : {score}')

Review : I really loved the movie and how the plot evolved right throughout!
Sentiment : Positive
Prediction Score : 0.6991913318634033
