In [None]:
# ====================================
# Step 1: Import libraries
# ====================================
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, LSTM, Dense
from sklearn.model_selection import train_test_split
import pickle

# ====================================
# Step 2: Load dataset
# ====================================
df = pd.read_csv('data/test.csv', header=None)
df.columns = ['polarity', 'title', 'text']
print(df['polarity'].value_counts())
# Combine title + text
df['content'] = df['title'].fillna('') + " " + df['text'].fillna('')
# Convert 1=Negative, 2=Positive to 0 and 1
df['polarity'] = df['polarity'].replace({1: 0, 2: 1})


X = df['content']
y = df['polarity']

# ====================================
# Step 3: Tokenization and Padding
# ====================================
MAX_WORDS = 5000
MAX_SEQUENCE_LENGTH = 100

tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token="<OOV>")
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)
X_pad = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post')

X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

# ====================================
# Step 4: Build RNN Model
# ====================================
rnn_model = Sequential([
    Embedding(input_dim=MAX_WORDS, output_dim=64),
    SimpleRNN(64),
    Dense(1, activation='sigmoid')
])
rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
rnn_model.summary()

# ====================================
# Step 5: Build LSTM Model
# ====================================
lstm_model = Sequential([
    Embedding(input_dim=MAX_WORDS, output_dim=64),
    LSTM(64),
    Dense(1, activation='sigmoid')
])
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
lstm_model.summary()

# ====================================
# Step 6: Train Models
# ====================================
print("\nTraining RNN...")
rnn_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.1)

print("\nTraining LSTM...")
lstm_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.1)

# ====================================
# Step 7: Evaluate Models
# ====================================
rnn_acc = rnn_model.evaluate(X_test, y_test, verbose=0)[1]
lstm_acc = lstm_model.evaluate(X_test, y_test, verbose=0)[1]
print(f"\nRNN Test Accuracy: {rnn_acc:.4f}")
print(f"LSTM Test Accuracy: {lstm_acc:.4f}")

# ====================================
# Step 8: Save Models and Tokenizer
# ====================================
rnn_model.save(r'C:\Users\user\AmazonReview\Review\rnn_model.keras')
lstm_model.save(r'C:\Users\user\AmazonReview\Review\lstm_model.keras')



with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

print("\n✅ RNN, LSTM models and tokenizer saved successfully!")



Training RNN...
Epoch 1/5
[1m9000/9000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 24ms/step - accuracy: 0.5789 - loss: 0.6581 - val_accuracy: 0.5677 - val_loss: 0.6668
Epoch 2/5
[1m9000/9000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 17ms/step - accuracy: 0.5600 - loss: 0.6633 - val_accuracy: 0.5377 - val_loss: 0.6717
Epoch 3/5
[1m9000/9000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 17ms/step - accuracy: 0.5890 - loss: 0.6487 - val_accuracy: 0.5934 - val_loss: 0.6382
Epoch 4/5
[1m9000/9000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 17ms/step - accuracy: 0.6312 - loss: 0.6222 - val_accuracy: 0.5610 - val_loss: 0.6804
Epoch 5/5
[1m9000/9000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 18ms/step - accuracy: 0.5859 - loss: 0.6488 - val_accuracy: 0.5506 - val_loss: 0.6797

Training LSTM...
Epoch 1/5
[1m9000/9000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 33ms/step - accuracy: 0.8242 - loss: 0.3719 - val_accur