In [11]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [12]:
df = pd.read_csv('IMDB Dataset.csv')



In [13]:

max_words = 10000  # Vocabulary size
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(df['review'])  # Fit tokenizer on text

# Convert text to sequences
X = tokenizer.texts_to_sequences(df['review'])

# Apply padding to ensure equal-length input sequences
max_length = 500  # Max sequence length
X = pad_sequences(X, maxlen=max_length, padding='post', truncating='post')

In [14]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['sentiment'])

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
model = Sequential([
    Embedding(input_dim=max_words, output_dim=128, input_length=max_length),  # Word Embedding
    SimpleRNN(64, activation='relu', return_sequences=True),  # First RNN Layer
    SimpleRNN(32, activation='relu'),  # Second RNN Layer
    Dense(1, activation='sigmoid')  # Output Layer (Binary Classification)
])



In [17]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [18]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=32, verbose=1)

Epoch 1/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 140ms/step - accuracy: 0.5048 - loss: 0.6932 - val_accuracy: 0.5018 - val_loss: 0.6932
Epoch 2/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 138ms/step - accuracy: 0.5195 - loss: 0.6854 - val_accuracy: 0.4982 - val_loss: 0.6999
Epoch 3/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 137ms/step - accuracy: 0.5362 - loss: 0.6594 - val_accuracy: 0.5004 - val_loss: 0.7351
Epoch 4/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 145ms/step - accuracy: 0.5423 - loss: 0.6435 - val_accuracy: 0.4974 - val_loss: 0.7470
Epoch 5/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 152ms/step - accuracy: 0.5452 - loss: 0.6395 - val_accuracy: 0.4989 - val_loss: 0.8097


In [19]:
model.save('rnn_imdb_sentiment.h5')

