In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Conv1D, MaxPooling1D
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb

# Load the IMDB dataset to get the word index
max_features = 10000  # Number of words to consider as features
maxlen = 500  # Cut texts after this number of words

# Load word index
word_index = imdb.get_word_index()

# Load the IMDB dataset
print("Loading data...")
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

# Pad the data
print("Pad sequences (samples x time)")
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

# Hard-coded reviews
hardcoded_reviews = [
    "This movie was absolutely fantastic! The performances were incredible and the story was captivating.",
    "I really did not like this movie. It was too slow and the plot was very predictable.",
    "The film had some good moments, but overall it was just okay. Nothing too special.",
    "An amazing experience! I would definitely watch it again. Highly recommended!",
    "Terrible movie. The acting was bad, and the story made no sense."
]

# Encode the reviews using the IMDB word index
def encode_review(review):
    encoded = []
    for word in review.split():
        encoded.append(word_index.get(word.lower(), 2))  # 2 is the index for unknown words
    return encoded

# Encode and pad the reviews
encoded_reviews = [encode_review(review) for review in hardcoded_reviews]
padded_reviews = pad_sequences(encoded_reviews, maxlen=maxlen)

# Phase 1: Build a Simple RNN Model
print("Building Simple RNN model...")
model_rnn = Sequential()
model_rnn.add(Embedding(max_features, 32))
model_rnn.add(SimpleRNN(32))  # A Simple RNN layer with 32 units
model_rnn.add(Dense(1, activation='sigmoid'))

model_rnn.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

# Train the Simple RNN model
history_rnn = model_rnn.fit(x_train, y_train,
                            epochs=10,
                            batch_size=128,
                            validation_split=0.2)

# Phase 2: Build a Hybrid Model (RNN + CNN)
print("Building Hybrid RNN+CNN model...")
model_hybrid = Sequential()
model_hybrid.add(Embedding(max_features, 32))
model_hybrid.add(Conv1D(32, 7, activation='relu'))
model_hybrid.add(MaxPooling1D(5))
model_hybrid.add(SimpleRNN(32))
model_hybrid.add(Dense(1, activation='sigmoid'))

model_hybrid.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

# Train the Hybrid model
history_hybrid = model_hybrid.fit(x_train, y_train,
                                  epochs=10,
                                  batch_size=128,
                                  validation_split=0.2)

# Evaluate the models
print("Evaluating Simple RNN model...")
results_rnn = model_rnn.evaluate(x_test, y_test)
print(f"Test Loss, Test Accuracy for Simple RNN model: {results_rnn}")

print("Evaluating Hybrid RNN+CNN model...")
results_hybrid = model_hybrid.evaluate(x_test, y_test)
print(f"Test Loss, Test Accuracy for Hybrid RNN+CNN model: {results_hybrid}")

# Summary of the results
print("Summary of Model Performances:")
print(f"Simple RNN model - Loss: {results_rnn[0]}, Accuracy: {results_rnn[1]}")
print(f"Hybrid RNN+CNN model - Loss: {results_hybrid[0]}, Accuracy: {results_hybrid[1]}")

# Test the models with the hard-coded reviews
for i, review in enumerate(hardcoded_reviews):
    test_review = padded_reviews[i]

    # Predict using both models
    rnn_prediction = model_rnn.predict(np.array([test_review]))
    hybrid_prediction = model_hybrid.predict(np.array([test_review]))

    print(f"\nReview {i + 1}: {review}")
    print("Simple RNN model prediction:", "Positive" if rnn_prediction[0][0] > 0.5 else "Negative", f"({rnn_prediction[0][0]})")
    print("Hybrid RNN+CNN model prediction:", "Positive" if hybrid_prediction[0][0] > 0.5 else "Negative", f"({hybrid_prediction[0][0]})")


# Conclusion
## This project demonstrates the use of RNN and hybrid RNN+CNN models for sentiment analysis. The models are trained on the IMDB dataset and evaluated for accuracy. The predictions on hardcoded reviews help in understanding the strengths and limitations of each model.

