In [26]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model

In [27]:
word_index = imdb.get_word_index()
reverse_word_index = {value: key for (key, value) in word_index.items()}

In [28]:
model = load_model('simple_rnn_imdb.h5')
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 500, 128)          1280000   
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 128)               32896     
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [29]:
model.get_weights()

[array([[ 0.00947382,  0.01929809,  0.0298148 , ..., -0.0021825 ,
          0.10895613,  0.01611679],
        [ 0.03787903,  0.01867275,  0.10138227, ...,  0.0144468 ,
          0.00958856,  0.06334078],
        [ 0.00989471, -0.03457222,  0.09148368, ..., -0.05803472,
         -0.09943514, -0.04511643],
        ...,
        [ 0.01152453,  0.01248157, -0.03300486, ..., -0.0218094 ,
          0.13268742,  0.02631973],
        [-0.01433709, -0.03544721,  0.01546449, ...,  0.10407382,
          0.05056367, -0.05251538],
        [-0.11561546, -0.24086633, -0.28398496, ...,  0.1644279 ,
          0.17871264,  0.32127747]], dtype=float32),
 array([[-0.10507565,  0.12180346,  0.13256705, ...,  0.02742503,
         -0.04096394, -0.03514663],
        [ 0.10018555,  0.03656873,  0.01882052, ...,  0.10254601,
         -0.09688514,  0.11143976],
        [-0.07070509,  0.13685982, -0.06808463, ..., -0.10663579,
          0.03846698, -0.00780517],
        ...,
        [ 0.13677657, -0.08597669,  0.1

In [30]:
# Step 2 Helper function 
# Function to decode reviews 

def decode_review(encode_review): 
    return ' '.join([reverse_word_index.get(i-3,'?') for i in encode_review])

## Function preprocess user input 
import re

def preprocess_text(text):
    # Convert to lowercase and remove punctuation, then split
    text = text.lower()
    # Remove punctuation and keep only alphanumeric and spaces
    text = re.sub(r'[^a-z0-9\s]', '', text)
    words = text.split()
    
    # Encode words: word_index returns 1-9999, we add 3 to get 4-10002
    # IMDB encoding: 0=padding, 1=start, 2=unknown, 3-9999=words
    # We need to cap at 9999 (max_features - 1) and handle unknown words (index 2)
    encoded_review = []
    for word in words:
        if word in word_index:
            idx = word_index[word] + 3
            # Cap at 9999 (max_features - 1) since embedding layer uses 0-9999
            if idx > 9999:
                encoded_review.append(2)  # Use unknown token if out of range
            else:
                encoded_review.append(idx)
        else:
            encoded_review.append(2)  # Unknown word -> index 2 (don't add 3!)
    
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500, padding='pre')
    return padded_review



In [31]:
## Prediction Function 

def predict_sentiment(review):
    preprocessed_input = preprocess_text(review)
    prediction  = model.predict(preprocessed_input)
    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'

    return sentiment, prediction[0][0]

In [32]:
## Step 4: User Input and Prediction
# Example For Prediction

example_review = "This Movie was fantastic ! The Acting was great and the plot was thrilling."
sentiment,score = predict_sentiment(example_review)

print(f'Review : {example_review}')
print(f'Sentiment : {sentiment}')
print(f'Prediction Score:  {score}')

# Test with a clearly positive review
print("\n" + "="*50)
test_review = "this movie is excellent and amazing"
sentiment2, score2 = predict_sentiment(test_review)
print(f'Test Review : {test_review}')
print(f'Sentiment : {sentiment2}')
print(f'Prediction Score:  {score2}')


Review : This Movie was fantastic ! The Acting was great and the plot was thrilling.
Sentiment : Positive
Prediction Score:  0.5371066331863403

Test Review : this movie is excellent and amazing
Sentiment : Positive
Prediction Score:  0.7851273417472839
