In [36]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.layers import Input, Embedding, SimpleRNN, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

# Pad sequences to a fixed length
max_length = 500
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_length)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_length)

# Define the model architecture using Functional API
vocab_size = 10000  # Number of unique words to consider
embedding_dim = 16  # Dimension of the embedding layer
rnn_units = 32      # Number of RNN units

# Functional API: Define the input and the model structure
inputs = Input(shape=(max_length,))
x = Embedding(vocab_size, embedding_dim)(inputs)
x = SimpleRNN(rnn_units)(x)
outputs = Dense(1, activation='sigmoid')(x)

# Build the model
model = Model(inputs, outputs)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    x_train, y_train, 
    epochs=10, 
    batch_size=64, 
    validation_data=(x_test, y_test), 
    callbacks=[early_stopping]
)

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

# Predict on a sample text
# Predict on a sample text
sample_text = "The movie was not good. The animation and the graphics were terrible. I would not recommend this movie."

# Tokenize and preprocess the sample text
word_index = imdb.get_word_index()

# Convert the sample text to integer sequences
def preprocess_text(text):
    words = text.lower().split()
    encoded_text = [word_index.get(word, 2) + 3 for word in words]  # +3 because IMDB indices are offset
    padded_text = tf.keras.preprocessing.sequence.pad_sequences([encoded_text], maxlen=max_length)
    return padded_text

sample_padded = preprocess_text(sample_text)

# Predict sentiment
predictions = model.predict(sample_padded)
print(f'Prediction: {"Positive" if predictions[0][0] > 0.5 else "Negative"} with score: {predictions[0][0]}')


Epoch 1/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 165ms/step - accuracy: 0.5424 - loss: 0.6821 - val_accuracy: 0.7844 - val_loss: 0.4929
Epoch 2/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 163ms/step - accuracy: 0.7899 - loss: 0.4547 - val_accuracy: 0.7863 - val_loss: 0.4691
Epoch 3/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 162ms/step - accuracy: 0.8122 - loss: 0.4324 - val_accuracy: 0.7998 - val_loss: 0.4687
Epoch 4/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 161ms/step - accuracy: 0.8968 - loss: 0.2595 - val_accuracy: 0.7872 - val_loss: 0.5068
Epoch 5/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 160ms/step - accuracy: 0.9508 - loss: 0.1498 - val_accuracy: 0.8024 - val_loss: 0.5467
Epoch 6/10
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 172ms/step - accuracy: 0.9684 - loss: 0.0968 - val_accuracy: 0.7735 - val_loss: 0.6474
[1m782/78

In [37]:
model.save('simpleRNN_IMDB_sentiment Analysis_part 2.h5')



In [39]:
from tensorflow.keras.models import load_model
model = load_model('simpleRNN_IMDB_sentiment Analysis_part 2.h5')



In [40]:
# Predict on a sample text
# Predict on a sample text
sample_text = "And then we have Jake with his closet which totally ruins all the film! I expected to see a BOOGEYMAN similar movie, and instead i watched a drama with some meaningless thriller spots"

# Tokenize and preprocess the sample text
word_index = imdb.get_word_index()

# Convert the sample text to integer sequences
def preprocess_text(text):
    words = text.lower().split()
    encoded_text = [word_index.get(word, 2) + 3 for word in words]  # +3 because IMDB indices are offset
    padded_text = tf.keras.preprocessing.sequence.pad_sequences([encoded_text], maxlen=max_length)
    return padded_text

sample_padded = preprocess_text(sample_text)

# Predict sentiment
predictions = model.predict(sample_padded)
print(f'Prediction: {"Positive" if predictions[0][0] > 0.5 else "Negative"} with score: {predictions[0][0]}')






[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 445ms/step
Prediction: Negative with score: 0.185366690158844


I made some changes to the preprocess_text() and predict_sentiment() functions as i want to give multiple input here.

In [42]:
def preprocess_text(text):
    words = text.lower().split()
    encoded_text = [word_index.get(word, 2) + 3 for word in words]  # +3 because IMDB indices are offset
    padded_text = tf.keras.preprocessing.sequence.pad_sequences([encoded_text], maxlen=max_length)
    return padded_text

# Function to predict sentiment of a review
def predict_sentiment(review):
    preprocessed_input = preprocess_text(review)
    prediction = model.predict(preprocessed_input)
    sentiment = 'Positive' if prediction[0][0] > 0.5 else 'Negative'
    score = prediction[0][0]
    return sentiment, score

# Test the function on example reviews
example_reviews = [
    "The movie was great fantastic, full of suspense and drama!",
    "I didn't like the movie, the plot was terrible and the acting was bad.",
    "It was an okay movie, not too bad but not great either.",
    "Absolutely loved this film, will watch it again.",
    "Worst movie I've ever seen, do not recommend!"
]

for review in example_reviews:
    sentiment, score = predict_sentiment(review)
    print(f'Review: "{review}"\nPredicted Sentiment: {sentiment}, Score: {score:.4f}\n')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
Review: "The movie was great fantastic, full of suspense and drama!"
Predicted Sentiment: Positive, Score: 0.6470

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
Review: "I didn't like the movie, the plot was terrible and the acting was bad."
Predicted Sentiment: Negative, Score: 0.0787

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
Review: "It was an okay movie, not too bad but not great either."
Predicted Sentiment: Negative, Score: 0.3337

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
Review: "Absolutely loved this film, will watch it again."
Predicted Sentiment: Negative, Score: 0.4196

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Review: "Worst movie I've ever seen, do not recommend!"
Predicted Sentiment: Negative, Score: 0.1793



### Some of the reason why the model is not able to predict the positive review as postive are:
- The first point is what I made is a Simple RNN and it doesn't have the capability of LSTM RNN or GRU RNN.
- The IMDB dataset has long term dependencies.
- The model which I made may not be accurate to fit the review properly.

