In [1]:
# Step 1: Import Libraries and Load the Model
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model




In [2]:

# Load the IMDB dataset word index
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

In [3]:
# Load the pre-trained model with ReLU activation
model = load_model('simple_rnn_imdb.h5')
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 128)          1280000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               32896     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 1313025 (5.01 MB)
Trainable params: 1313025 (5.01 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [4]:
model.get_weights()

[array([[-5.2977335e-02,  2.5795788e-02, -3.2376125e-02, ...,
         -4.0818319e-02, -6.7335621e-02, -3.8745716e-02],
        [-2.7059784e-02,  1.2193329e-02, -5.2689113e-02, ...,
          4.5338450e-03, -3.6179133e-02, -5.1954456e-05],
        [-3.3282422e-02, -4.1059460e-02, -2.8996443e-02, ...,
          7.4259186e-04,  1.8552084e-02, -5.9478026e-02],
        ...,
        [ 7.6222867e-02,  1.3686722e-02, -2.9517278e-02, ...,
         -8.2608916e-02, -6.4662598e-02,  5.6041749e-03],
        [-3.7850052e-02, -4.4610586e-02,  3.3544008e-02, ...,
          2.2625256e-02,  4.4362023e-02,  5.5474188e-02],
        [ 8.4423855e-02,  6.1524481e-02,  3.6922544e-02, ...,
          4.2671889e-02,  9.0871276e-03, -7.7781506e-02]], dtype=float32),
 array([[ 0.12677956, -0.07839283,  0.14951017, ..., -0.02837391,
          0.00810305, -0.07194644],
        [ 0.17004952,  0.01080087, -0.03505855, ..., -0.00849971,
         -0.01092332, -0.07963824],
        [ 0.16486824, -0.04412608,  0.09999849

In [5]:
# Step 2: Helper Functions
# Function to decode reviews
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])

# Function to preprocess user input
def preprocess_text(text):
    words = text.lower().split()
    encoded_review = [word_index.get(word, 2) + 3 for word in words]
    padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
    return padded_review

### Step 2: Helper Functions

1. **Function to Decode Reviews**
   ```python
   def decode_review(encoded_review):
       return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])
   ```

   **Explanation**:
   - **Purpose**: This function converts a sequence of integers (representing an encoded review) back into a human-readable string of words.
   - **How it works**:
     - The function takes an `encoded_review`, which is a list of integers.
     - It iterates through each integer `i` in the `encoded_review`.
     - **`i - 3`**: Adjusts the integer index by subtracting 3, accounting for special tokens reserved in the dataset (like padding, start, or unknown tokens).
     - **`reverse_word_index.get(i - 3, '?')`**: Looks up the word corresponding to the adjusted index. If the index doesn't exist in `reverse_word_index`, it returns `'?'`.
     - The decoded words are then joined into a single string using `' '.join([...])` and returned.

2. **Function to Preprocess User Input**
   ```python
   def preprocess_text(text):
       words = text.lower().split()
       encoded_review = [word_index.get(word, 2) + 3 for word in words]
       padded_review = sequence.pad_sequences([encoded_review], maxlen=500)
       return padded_review
   ```

   **Explanation**:
   - **Purpose**: This function converts a user-provided string of text into a format suitable for input into a neural network (such as for sentiment analysis).
   - **How it works**:
     - **Lowercase and Split**: The input `text` is converted to lowercase and split into individual words using `split()`.
     - **Encode Words**: Each word is then converted to its corresponding integer index using `word_index.get(word, 2) + 3`. 
       - **`word_index.get(word, 2)`**: Retrieves the index of the word from `word_index`. If the word isn’t found, it assigns it the index `2`, which usually stands for "unknown" or "OOV" (out of vocabulary).
       - **`+3`**: Offsets the index to match the same adjustment used in the decoding process.
     - **Padding**: The list of encoded indices is padded using `sequence.pad_sequences` to a fixed length (`maxlen=500`), ensuring that all reviews have the same length regardless of their original size.
     - The padded sequence is then returned, ready for use in a model.

### Summary:
- **`decode_review`** converts an encoded review back to a readable string.
- **`preprocess_text`** transforms raw text input into a numerical format that the model can process, ensuring uniform input size.

These helper functions are essential for preparing and interpreting data when working with models in natural language processing (NLP).

In [6]:
### Prediction  function

def predict_sentiment(review):
    preprocessed_input=preprocess_text(review)

    prediction=model.predict(preprocessed_input)

    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    
    return sentiment, prediction[0][0]



In [7]:
# Step 4: User Input and Prediction
# Example review for prediction
example_review = "This movie was fantastic! The acting was great and the plot was thrilling."

sentiment,score=predict_sentiment(example_review)

print(f'Review: {example_review}')
print(f'Sentiment: {sentiment}')
print(f'Prediction Score: {score}')

Review: This movie was fantastic! The acting was great and the plot was thrilling.
Sentiment: Positive
Prediction Score: 0.8901410698890686


In [8]:
import os
print(os.getcwd())


c:\Users\Kempsly\Desktop\Python-programming\NLP-DeepLearning\SimpleRNN


In [9]:
import os

# Check if the model file exists
file_path = "simple_rnn_imdb.h5"
if os.path.isfile(file_path):
    print("Model file found.")
else:
    print("Model file not found.")


Model file found.
