# **BIDIRECTIONAL LSTM**

In [65]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from gensim.models import KeyedVectors
import optuna

In [67]:
# ----------------
# Load dataset
# ----------------

path_file = "datasets/Unipi_NDF/df_ndf.csv"
df = pd.read_csv(path_file, sep="\t", encoding="utf-8")
df.head()

texts = df["texts"].astype(str).tolist()
labels = df["labels"].values

In [68]:
# ------------------------------
# Preprocessing and tokenization
# ------------------------------
max_words = 20000     # vocabulary size
max_len = 300         # maximum sequence length

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>") # tokenizer with OOV token
tokenizer.fit_on_texts(texts) # create vocabulary {word: index}

sequences = tokenizer.texts_to_sequences(texts) # convert texts to sequences of integers
X = pad_sequences(sequences, maxlen=max_len, padding="post", truncating="post") # padd/truncate sequences to max_len

encoder = LabelEncoder() # encode labels to integers
y = encoder.fit_transform(labels)

# split train/validation/test (0.6 / 0.2 / 0.2): stratify to maintain label distribution
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

In [69]:
# -----------------------------------------------------------
# Load pre-trained Word2Vec embeddings (Google News)
# -----------------------------------------------------------

print("Loading pre-trained Word2Vec model (may take time)...")
w2v_path = "Word2Vec_GoogleNews300/word2vec-google-news-300.model"
w2v_model = KeyedVectors.load(w2v_path, mmap='r') # load model with memory mapping (mmap='r' for only reading)

embedding_dim = 300                             # dimension of Word2Vec embeddings
word_index = tokenizer.word_index               # vocabulary from tokenizer {word: index}
num_words = min(max_words, len(word_index) + 1) # number of words to consider (max_words or vocab size)

# create embedding matrix: each row corresponds to a word index from tokenizer, each column to an embedding dimension
embedding_matrix = np.zeros((num_words, embedding_dim))
for word, i in word_index.items():
    if i >= max_words: # skip because we only consider top max_words
        continue
    if word in w2v_model: # if word has a pre-trained embedding
        embedding_matrix[i] = w2v_model[word]

Loading pre-trained Word2Vec model (may take time)...


In [70]:
# ------------------------------
# Optuna objective function
# ------------------------------

def objectiveBiLSTM(trial):
    num_units = trial.suggest_categorical("num_units", [16, 32, 64, 96, 128])            # number of LSTM units
    dropout = trial.suggest_categorical("dropout", [0.2, 0.4, 0.6, 0.8])                 # dropout rate
    hidden_units = trial.suggest_categorical("hidden_units", [8, 16, 32, 64])            # number of units in dense layer
    learning_rate = trial.suggest_categorical("learning_rate", [1e-5, 1e-4, 1e-3, 1e-2]) # learning rate for Adam optimizer

    # BiLSTM model
    model = Sequential([
        Embedding(num_words, embedding_dim, weights=[embedding_matrix], input_length=max_len, trainable=True), # embedding layer with pre-trained weights
        Bidirectional(LSTM(num_units, return_sequences=False, dropout=dropout, recurrent_dropout=0.0)),
        Dense(hidden_units, activation='relu'),
        Dropout(dropout),
        Dense(1, activation='sigmoid')
    ])

    # Compile model with Adam optimizer
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Train model with early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=0)
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=8,
        callbacks=[es], # early stopping
        verbose=0
    )

    # dummy_input = np.zeros((1, max_len), dtype='int32')  # input fittizio per inizializzare i layer
    # model(dummy_input)  # forza la costruzione del modello
    # model.summary()

    preds_val = (model.predict(X_val) > 0.5).astype(int)
    f1 = f1_score(y_val, preds_val)
    return f1

In [71]:
# ----------------------------
# Hyperparameter optimization
# ----------------------------

study = optuna.create_study(direction="maximize") # maximize F1-score
study.optimize(objectiveBiLSTM, n_trials=50) # 50 trials for demonstration

print("Best parameters:", study.best_params)

[I 2025-10-21 17:50:36,732] A new study created in memory with name: no-name-b3503c06-a6de-448f-bc4b-21ac122ee401


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 17:50:57,053] Trial 0 finished with value: 0.8372093023255814 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 0 with value: 0.8372093023255814.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 17:51:12,355] Trial 1 finished with value: 0.0 and parameters: {'num_units': 64, 'dropout': 0.6, 'hidden_units': 16, 'learning_rate': 0.01}. Best is trial 0 with value: 0.8372093023255814.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step


[I 2025-10-21 17:51:21,374] Trial 2 finished with value: 0.7469879518072289 and parameters: {'num_units': 96, 'dropout': 0.4, 'hidden_units': 64, 'learning_rate': 0.01}. Best is trial 0 with value: 0.8372093023255814.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step


[I 2025-10-21 17:53:09,470] Trial 3 finished with value: 0.044444444444444446 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 16, 'learning_rate': 1e-05}. Best is trial 0 with value: 0.8372093023255814.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step


[I 2025-10-21 17:53:26,993] Trial 4 finished with value: 0.9069767441860465 and parameters: {'num_units': 16, 'dropout': 0.8, 'hidden_units': 64, 'learning_rate': 0.01}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 17:53:40,161] Trial 5 finished with value: 0.8470588235294118 and parameters: {'num_units': 96, 'dropout': 0.2, 'hidden_units': 16, 'learning_rate': 0.001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step


[I 2025-10-21 17:53:55,575] Trial 6 finished with value: 0.8888888888888888 and parameters: {'num_units': 128, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step


[I 2025-10-21 17:54:12,155] Trial 7 finished with value: 0.8333333333333334 and parameters: {'num_units': 64, 'dropout': 0.4, 'hidden_units': 8, 'learning_rate': 0.001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step


[I 2025-10-21 17:56:01,903] Trial 8 finished with value: 0.0 and parameters: {'num_units': 128, 'dropout': 0.6, 'hidden_units': 16, 'learning_rate': 1e-05}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step


[I 2025-10-21 17:57:41,447] Trial 9 finished with value: 0.5084745762711864 and parameters: {'num_units': 32, 'dropout': 0.8, 'hidden_units': 64, 'learning_rate': 0.0001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step


[I 2025-10-21 17:57:52,539] Trial 10 finished with value: 0.7222222222222222 and parameters: {'num_units': 16, 'dropout': 0.8, 'hidden_units': 64, 'learning_rate': 0.01}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step


[I 2025-10-21 17:58:58,942] Trial 11 finished with value: 0.7567567567567568 and parameters: {'num_units': 128, 'dropout': 0.8, 'hidden_units': 32, 'learning_rate': 0.0001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step


[I 2025-10-21 17:59:08,097] Trial 12 finished with value: 0.5084745762711864 and parameters: {'num_units': 128, 'dropout': 0.2, 'hidden_units': 32, 'learning_rate': 0.01}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step


[I 2025-10-21 17:59:17,177] Trial 13 finished with value: 0.12244897959183673 and parameters: {'num_units': 32, 'dropout': 0.8, 'hidden_units': 8, 'learning_rate': 0.001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step


[I 2025-10-21 17:59:32,638] Trial 14 finished with value: 0.525 and parameters: {'num_units': 128, 'dropout': 0.8, 'hidden_units': 64, 'learning_rate': 0.01}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step


[I 2025-10-21 17:59:51,482] Trial 15 finished with value: 0.8863636363636364 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step


[I 2025-10-21 18:01:43,108] Trial 16 finished with value: 0.6153846153846154 and parameters: {'num_units': 128, 'dropout': 0.4, 'hidden_units': 64, 'learning_rate': 1e-05}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 18:02:18,123] Trial 17 finished with value: 0.8863636363636364 and parameters: {'num_units': 16, 'dropout': 0.2, 'hidden_units': 32, 'learning_rate': 0.0001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step


[I 2025-10-21 18:02:31,447] Trial 18 finished with value: 0.0 and parameters: {'num_units': 32, 'dropout': 0.8, 'hidden_units': 8, 'learning_rate': 0.01}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 18:02:49,024] Trial 19 finished with value: 0.8181818181818182 and parameters: {'num_units': 96, 'dropout': 0.6, 'hidden_units': 64, 'learning_rate': 0.001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 18:03:14,058] Trial 20 finished with value: 0.813953488372093 and parameters: {'num_units': 64, 'dropout': 0.8, 'hidden_units': 32, 'learning_rate': 0.01}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 18:03:33,631] Trial 21 finished with value: 0.8631578947368421 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 4 with value: 0.9069767441860465.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 18:03:53,319] Trial 22 finished with value: 0.9213483146067416 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step


[I 2025-10-21 18:04:10,965] Trial 23 finished with value: 0.8505747126436781 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step


[I 2025-10-21 18:04:29,621] Trial 24 finished with value: 0.7848101265822784 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 18:04:44,944] Trial 25 finished with value: 0.8695652173913043 and parameters: {'num_units': 128, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step


[I 2025-10-21 18:06:34,774] Trial 26 finished with value: 0.0 and parameters: {'num_units': 16, 'dropout': 0.2, 'hidden_units': 64, 'learning_rate': 1e-05}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step


[I 2025-10-21 18:07:16,193] Trial 27 finished with value: 0.7901234567901234 and parameters: {'num_units': 16, 'dropout': 0.4, 'hidden_units': 8, 'learning_rate': 0.0001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step


[I 2025-10-21 18:07:30,563] Trial 28 finished with value: 0.8604651162790697 and parameters: {'num_units': 128, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step


[I 2025-10-21 18:07:54,813] Trial 29 finished with value: 0.8354430379746836 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 18:08:06,075] Trial 30 finished with value: 0.6666666666666666 and parameters: {'num_units': 32, 'dropout': 0.8, 'hidden_units': 64, 'learning_rate': 0.01}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step


[I 2025-10-21 18:08:25,674] Trial 31 finished with value: 0.8095238095238095 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 18:08:41,070] Trial 32 finished with value: 0.7777777777777778 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 18:09:01,562] Trial 33 finished with value: 0.8913043478260869 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step


[I 2025-10-21 18:09:19,156] Trial 34 finished with value: 0.9010989010989011 and parameters: {'num_units': 64, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step


[I 2025-10-21 18:09:30,193] Trial 35 finished with value: 0.5757575757575758 and parameters: {'num_units': 64, 'dropout': 0.6, 'hidden_units': 16, 'learning_rate': 0.01}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 18:09:47,256] Trial 36 finished with value: 0.8863636363636364 and parameters: {'num_units': 64, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 18:10:07,731] Trial 37 finished with value: 0.8541666666666666 and parameters: {'num_units': 64, 'dropout': 0.4, 'hidden_units': 16, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step


[I 2025-10-21 18:11:56,964] Trial 38 finished with value: 0.0 and parameters: {'num_units': 96, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 1e-05}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step


[I 2025-10-21 18:12:08,033] Trial 39 finished with value: 0.8292682926829268 and parameters: {'num_units': 64, 'dropout': 0.2, 'hidden_units': 64, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step


[I 2025-10-21 18:12:19,021] Trial 40 finished with value: 0.8051948051948052 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 16, 'learning_rate': 0.01}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step


[I 2025-10-21 18:12:35,624] Trial 41 finished with value: 0.8292682926829268 and parameters: {'num_units': 64, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 18:12:51,229] Trial 42 finished with value: 0.8636363636363636 and parameters: {'num_units': 96, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 18:13:09,180] Trial 43 finished with value: 0.8863636363636364 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step


[I 2025-10-21 18:14:07,507] Trial 44 finished with value: 0.37037037037037035 and parameters: {'num_units': 128, 'dropout': 0.8, 'hidden_units': 8, 'learning_rate': 0.0001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 90ms/step


[I 2025-10-21 18:14:29,836] Trial 45 finished with value: 0.8571428571428571 and parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step


[I 2025-10-21 18:14:45,311] Trial 46 finished with value: 0.8636363636363636 and parameters: {'num_units': 64, 'dropout': 0.4, 'hidden_units': 64, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step


[I 2025-10-21 18:16:35,900] Trial 47 finished with value: 0.0 and parameters: {'num_units': 128, 'dropout': 0.8, 'hidden_units': 32, 'learning_rate': 1e-05}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 18:16:47,013] Trial 48 finished with value: 0.4406779661016949 and parameters: {'num_units': 32, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.01}. Best is trial 22 with value: 0.9213483146067416.


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


[I 2025-10-21 18:17:00,108] Trial 49 finished with value: 0.8333333333333334 and parameters: {'num_units': 16, 'dropout': 0.2, 'hidden_units': 64, 'learning_rate': 0.001}. Best is trial 22 with value: 0.9213483146067416.


Best parameters: {'num_units': 16, 'dropout': 0.6, 'hidden_units': 32, 'learning_rate': 0.001}


In [73]:
# -----------
# Final training and evaluation
# -----------

best_params = study.best_params
best_model = Sequential([
    Embedding(num_words, embedding_dim, weights=[embedding_matrix], input_length=max_len, trainable=True),
    Bidirectional(LSTM(best_params["num_units"], return_sequences=False, dropout=best_params["dropout"], recurrent_dropout=0.0)),
    Dense(best_params["hidden_units"], activation='relu'),
    Dropout(best_params["dropout"]),
    Dense(1, activation='sigmoid')
])
best_model.compile(
    optimizer=Adam(learning_rate=best_params["learning_rate"]),
    loss='binary_crossentropy',
    metrics=['accuracy']
)
best_model.fit(
    np.concatenate((X_train, X_val)), 
    np.concatenate((y_train, y_val)),
    epochs=50,
    batch_size=8,
    callbacks=[EarlyStopping(monitor='loss', patience=2, restore_best_weights=True, verbose=0)],
    verbose=0
)
y_pred = (best_model.predict(X_test) > 0.5).astype(int)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Weighted F1-score:", f1_score(y_test, y_pred, average="weighted"))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.90      0.89        68
           1       0.83      0.81      0.82        43

    accuracy                           0.86       111
   macro avg       0.86      0.86      0.86       111
weighted avg       0.86      0.86      0.86       111

Confusion Matrix:
[[61  7]
 [ 8 35]]
Weighted F1-score: 0.8645631462290972
