In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, SimpleRNN
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# Load the dataset
df = pd.read_csv("IMDB Dataset.csv")


In [3]:
# Encode labels
df.replace({"sentiment": {"positive": 1, "negative": 0}}, inplace=True)

  df.replace({"sentiment": {"positive": 1, "negative": 0}}, inplace=True)


In [4]:
# Split the data
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)


In [5]:
# Tokenize the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_data["review"])
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]), maxlen=200)

In [6]:
# Prepare labels
Y_train = train_data["sentiment"].values
Y_test = test_data["sentiment"].values

In [7]:
# Build the RNN model
model_rnn = Sequential()
model_rnn.add(Embedding(input_dim=5000, output_dim=128, input_length=200))
model_rnn.add(SimpleRNN(128, dropout=0.2, recurrent_dropout=0.2))
model_rnn.add(Dense(1, activation="sigmoid"))



In [8]:
# Compile the model
model_rnn.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])


In [9]:
# Train the model
model_rnn.fit(X_train, Y_train, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 78ms/step - accuracy: 0.5159 - loss: 0.7017 - val_accuracy: 0.6019 - val_loss: 0.6554
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 98ms/step - accuracy: 0.6243 - loss: 0.6354 - val_accuracy: 0.6709 - val_loss: 0.5963
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 103ms/step - accuracy: 0.6927 - loss: 0.5714 - val_accuracy: 0.6799 - val_loss: 0.5960
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 95ms/step - accuracy: 0.7401 - loss: 0.5158 - val_accuracy: 0.7490 - val_loss: 0.5267
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 91ms/step - accuracy: 0.7679 - loss: 0.4844 - val_accuracy: 0.7623 - val_loss: 0.5332
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 89ms/step - accuracy: 0.7713 - loss: 0.4728 - val_accuracy: 0.6309 - val_loss: 0.6752
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x2a17e780710>

In [10]:
# Evaluate the model
loss, accuracy = model_rnn.evaluate(X_test, Y_test)
print(f"RNN Test Loss: {loss}")
print(f"RNN Test Accuracy: {accuracy}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.7488 - loss: 0.5691
RNN Test Loss: 0.5662838220596313
RNN Test Accuracy: 0.7487999796867371


In [11]:
# Prediction function for RNN
def predict_sentiment_rnn(review):
    sequence = tokenizer.texts_to_sequences([review])
    padded_sequence = pad_sequences(sequence, maxlen=200)
    prediction = model_rnn.predict(padded_sequence)
    return "positive" if prediction[0][0] > 0.5 else "negative"


In [12]:
# Example usage
new_review = "This movie was fantastic. I loved it."
print(f"RNN Prediction: {predict_sentiment_rnn(new_review)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248ms/step
RNN Prediction: positive


In [13]:
new_review = "This movie was shit."
print(f"RNN Prediction: {predict_sentiment_rnn(new_review)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
RNN Prediction: negative
