In [8]:
import pickle
import os

# Load preprocessed data
with open("ML/models/X_train_padded.pkl", "rb") as f:
    X_train_padded = pickle.load(f)

with open("ML/models/X_test_padded.pkl", "rb") as f:
    X_test_padded = pickle.load(f)

with open("ML/models/y_train.pkl", "rb") as f:
    y_train = pickle.load(f)

with open("ML/models/y_test.pkl", "rb") as f:
    y_test = pickle.load(f)

# Load tokenizer
with open("ML/models/tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

print("✅ Processed data and tokenizer loaded successfully from 'ML/models'!")


✅ Processed data and tokenizer loaded successfully from 'ML/models'!


In [9]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Convert to NumPy arrays (if not already)
y_train_encoded = np.array(y_train_encoded)
y_test_encoded = np.array(y_test_encoded)

print("✅ Loaded Data:")
print(" - X_train_padded shape:", X_train_padded.shape)  # e.g., (8044, 500)
print(" - X_test_padded shape :", X_test_padded.shape)   # e.g., (2012, 500)
print(" - y_train_encoded shape:", y_train_encoded.shape)  # e.g., (8044,)
print(" - y_test_encoded shape :", y_test_encoded.shape)   # e.g., (2012,)


✅ Loaded Data:
 - X_train_padded shape: (8044, 500)
 - X_test_padded shape : (2012, 500)
 - y_train_encoded shape: (8044,)
 - y_test_encoded shape : (2012,)


In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


# Determine vocabulary size from tokenizer (add +1 for padding)
vocab_size = len(tokenizer.word_index) + 1  
embedding_dim = 128           # Dimension for embedding vectors
max_sequence_length = X_train_padded.shape[1]  # e.g., 500

model = Sequential()
model.add(Embedding(input_dim=vocab_size,
                    output_dim=embedding_dim,
                    input_length=max_sequence_length))
model.add(Bidirectional(LSTM(64, return_sequences=True)))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(32)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

# Compile the model
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Display the model summary (shows input shape details)
model.summary()




In [11]:

# Use EarlyStopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)


In [12]:

history = model.fit(
    X_train_padded, y_train_encoded,
    validation_data=(X_test_padded, y_test_encoded),
    epochs=10,          # Adjust epochs as needed
    batch_size=64,      # Adjust batch size as needed
    callbacks=[early_stop]
)


Epoch 1/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 611ms/step - accuracy: 0.7285 - loss: 0.5058 - val_accuracy: 0.9140 - val_loss: 0.2148
Epoch 2/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 547ms/step - accuracy: 0.9627 - loss: 0.1267 - val_accuracy: 0.9105 - val_loss: 0.3215
Epoch 3/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 543ms/step - accuracy: 0.9824 - loss: 0.0595 - val_accuracy: 0.9195 - val_loss: 0.2817
Epoch 4/10
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 574ms/step - accuracy: 0.9919 - loss: 0.0303 - val_accuracy: 0.9185 - val_loss: 0.3103


In [14]:
model_dir = "ML/models"
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_padded, y_test_encoded)
print("Test Accuracy:", test_accuracy)

# ------------------------------
# 5. Save the Trained Model
# ------------------------------
model_save_path = os.path.join(model_dir, "bidirectional_lstm_model.h5")
model.save(model_save_path)
print("✅ Bidirectional LSTM model saved successfully at:", model_save_path)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 51ms/step - accuracy: 0.9172 - loss: 0.2033




Test Accuracy: 0.9140158891677856
✅ Bidirectional LSTM model saved successfully at: ML/models\bidirectional_lstm_model.h5
