In [48]:
# Import necessary libraries

# Data manipulation
import pandas as pd
import numpy as np

# Statistical functions
from scipy.stats import zscore

# For concurrency (running functions in parallel)
from concurrent.futures import ThreadPoolExecutor

# For caching (to speed up repeated function calls)
from functools import lru_cache

# For progress tracking
from tqdm import tqdm


# Text Preprocessing and NLP
import nltk

# Stopwords (common words to ignore) from NLTK
from nltk.corpus import stopwords

# Tokenizing sentences/words
from nltk.tokenize import word_tokenize

# Part-of-speech tagging
from nltk import pos_tag

# Lemmatization (converting words to their base form)
from nltk.stem import WordNetLemmatizer


# Model Imports
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from gensim.models import Word2Vec

In [49]:
import os
import sys
from pathlib import Path

if "workding_dir" not in locals():
    workding_dir = str(Path.cwd().parent)
os.chdir(workding_dir)
sys.path.append(workding_dir)
print("working dir:", workding_dir)

working dir: /home/dariusng2103/projects/dm_project/DM-Fake-News-Detection


### Load datasets

In [50]:
from datasets import load_dataset, concatenate_datasets, Dataset

datasets = load_dataset(
    "csv",
    data_files={
        "train": [
            "dataset/train_data_1.csv",
            "dataset/train_data_2.csv",
            "dataset/train_data_3.csv",
            "dataset/train_data_4.csv",
        ],
        "test": "dataset/test_data.csv",
        "rewritten_train": [
            "dataset/rewritten_train_data_1.csv",
            "dataset/rewritten_train_data_2.csv",
            "dataset/rewritten_train_data_3.csv",
            "dataset/rewritten_train_data_4.csv",
        ],
        "rewritten_test": "dataset/rewritten_test_data.csv",
    },
)
datasets

DatasetDict({
    train: Dataset({
        features: ['label', 'full_content', 'processed_full_content'],
        num_rows: 54441
    })
    test: Dataset({
        features: ['label', 'full_content', 'processed_full_content'],
        num_rows: 6050
    })
    rewritten_train: Dataset({
        features: ['label', 'full_content', 'processed_full_content'],
        num_rows: 54441
    })
    rewritten_test: Dataset({
        features: ['label', 'full_content', 'processed_full_content'],
        num_rows: 6050
    })
})

### LSTM with Word2Vec (best model)

In [51]:
# Set seeds for reproducibility
import tensorflow as tf
import numpy as np
from gensim.models import Word2Vec
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# Train and process Word2Vec embeddings
def train_word2vec_embeddings(train_texts, word_index, max_words=10000, embedding_dim=100):
    """Tokenizes the text, trains Word2Vec, and creates an embedding matrix."""
    
    # Tokenizing text into word lists
    sentences = [text.split() for text in train_texts]

    # ✅ Train Word2Vec model using processed sentences
    word2vec = Word2Vec(sentences=sentences, vector_size=embedding_dim, window=5, min_count=1, workers=4)

    # ✅ Ensure consistent vocab size (only include words within max_words limit)
    vocab_size = min(max_words, len(word_index) + 1)
    embedding_matrix = np.zeros((vocab_size, embedding_dim))

    for word, i in word_index.items():
        if i < vocab_size and word in word2vec.wv:
            embedding_matrix[i] = word2vec.wv[word]

    return embedding_matrix, vocab_size  # ✅ Returns both embedding matrix & vocab size


def create_lstm_model(vocab_size, embedding_matrix, lstm_units=128, dropout_rate=0.3, learning_rate=0.001, l2_lambda=0.01):
    """Creates an LSTM model with Dropout and L2 regularization."""
    
    model = Sequential([
        Embedding(vocab_size, embedding_matrix.shape[1], weights=[embedding_matrix], trainable=True),
        LSTM(units=lstm_units, return_sequences=False, dropout=dropout_rate), 
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid', kernel_regularizer=l2(l2_lambda))
    ])

    # ✅ Use Adam optimizer with a defined learning rate
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate), metrics=['accuracy'])
    
    return model


# ✅ Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)



### Define function to train the model

In [52]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def train_model(
    train_data, val_data, epochs=10, batch_size=64, lstm_units=64, dropout_rate=0.3, grid_search=False
):
    """Trains an LSTM model using Word2Vec embeddings while ensuring tokenizer consistency across datasets."""
    
    # ✅ Set random seeds for reproducibility
    seed = 42
    tf.random.set_seed(seed)
    np.random.seed(seed)

    max_words = 10000  # Maximum vocabulary size
    max_sequence_length = 300  # Max length of tokenized sequences
    embedding_dim = 100  # Embedding dimension

    print(f"\n🚀 Training LSTM with lstm_units={lstm_units}, dropout_rate={dropout_rate}")

    # ✅ Extract texts and labels
    train_texts = train_data["processed_full_content"]
    val_texts = val_data["processed_full_content"]
    y_train = train_data["label"]
    y_val = val_data["label"]

    # ✅ Tokenizer is refitted on updated `train_data` for new dataset compatibility
    tokenizer = Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(train_texts)

    # ✅ Convert texts to sequences and pad them
    X_train = pad_sequences(tokenizer.texts_to_sequences(train_texts), maxlen=max_sequence_length)
    X_val = pad_sequences(tokenizer.texts_to_sequences(val_texts), maxlen=max_sequence_length)

    # ✅ Ensure vocab size consistency (Prevents index mismatches)
    vocab_size = min(max_words, len(tokenizer.word_index) + 1)

    # ✅ Train Word2Vec and get the embedding matrix
    embedding_matrix, _ = train_word2vec_embeddings(train_texts, tokenizer.word_index, max_words, embedding_dim)

    # ✅ Create and compile the LSTM model
    model = create_lstm_model(
        vocab_size=vocab_size,
        embedding_matrix=embedding_matrix,
        lstm_units=lstm_units, 
        dropout_rate=dropout_rate
    )

    # ✅ Train the model with early stopping
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping],
        verbose=1
    )

    # ✅ Evaluate model performance on validation data
    y_pred = (model.predict(X_val) > 0.5).astype(int).reshape(-1)

    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)

    # ✅ Print evaluation metrics
    result = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1_score": f1}
    print("\n🏆 Training Results:")
    for key, value in result.items():
        print(f"🔹 {key.capitalize()}: {value:.4f}")

    return result if grid_search else model


### Define Evaluate model function

In [53]:
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
import pandas as pd
from tqdm import tqdm


def evaluate_model(model, train_data, val_data):
    print("Evaluating Model")

    max_words = 10000
    max_sequence_length = 300

    train_texts = train_data["processed_full_content"]
    tokenizer = Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(train_texts)

    y_val = val_data["label"]
    val_texts = val_data["processed_full_content"]

    X_val = pad_sequences(
        tokenizer.texts_to_sequences(val_texts), maxlen=max_sequence_length
    )
    y_pred = (model.predict(X_val) > 0.5).astype(int)

    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1: {f1:.4f}")

### Define GridSearch Function

In [54]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

def do_grid_search(data):
    """Performs Grid Search to find the best LSTM hyperparameters using cross-validation."""
    param_grid = {"lstm_units": [64, 128], "dropout_rate": [0.2, 0.3]}
    results = []
    best_score = 0
    best_params = None

    if data.empty:
        raise ValueError("The dataset is empty. Please provide valid data.")

    for lstm_unit in param_grid["lstm_units"]:
        for dropout_rate in param_grid["dropout_rate"]:
            print(f"\n Testing lstm_units={lstm_unit}, dropout_rate={dropout_rate}")

            kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
            fold_scores = []

            for fold, (train_idx, val_idx) in enumerate(
                kfold.split(data["processed_full_content"], data["label"].to_numpy()), 1  
            ):
                print(f"Fold {fold}")

                try:
                    train_data = data.iloc[train_idx]
                    val_data = data.iloc[val_idx]

                    result = train_model(
                        train_data=train_data,
                        val_data=val_data,
                        lstm_units=lstm_unit,
                        dropout_rate=dropout_rate,
                        grid_search=True,
                    )

                    fold_score = result["f1_score"]
                    fold_scores.append(fold_score)
                    print(f"✔️ Fold {fold} F1-score: {fold_score:.4f}")

                except Exception as e:
                    print(f"Fold {fold} failed due to error: {e}")
                    continue

            avg_score = np.mean(fold_scores) if fold_scores else 0
            print(f"Average F1-score: {avg_score:.4f}")

            results.append({"lstm_units": lstm_unit, "dropout_rate": dropout_rate, "avg_f1_score": avg_score})

            if avg_score > best_score:
                best_score = avg_score
                best_params = {"lstm_units": lstm_unit, "dropout_rate": dropout_rate}

    # ✅ Edge case: If all folds fail
    if best_params is not None:
        print("Best Parameters Found:")
        print(f"LSTM Units: {best_params['lstm_units']}")
        print(f"Dropout Rate: {best_params['dropout_rate']}")
        print(f"Best F1-Score: {best_score:.4f}")
    else:
        print("Grid search failed—no valid results.")

    return best_params


### Set training and validation data

In [55]:
train_data = datasets["train"].to_pandas()
val_data = datasets["test"].to_pandas()
data = pd.concat([train_data, val_data], ignore_index=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60491 entries, 0 to 60490
Data columns (total 3 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   label                   60491 non-null  int64 
 1   full_content            60491 non-null  object
 2   processed_full_content  60491 non-null  object
dtypes: int64(1), object(2)
memory usage: 1.4+ MB


### Perform grid search for optimal hyperparameters

In [27]:
%%time

best_params = do_grid_search(data)
best_params


 Testing lstm_units=64, dropout_rate=0.2
Fold 1

🚀 Training LSTM with lstm_units=64, dropout_rate=0.2
Epoch 1/10
[1m757/757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 18ms/step - accuracy: 0.8857 - loss: 0.2930 - val_accuracy: 0.9495 - val_loss: 0.1513
Epoch 2/10
[1m757/757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.9581 - loss: 0.1265 - val_accuracy: 0.9683 - val_loss: 0.1023
Epoch 3/10
[1m757/757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 17ms/step - accuracy: 0.9699 - loss: 0.0991 - val_accuracy: 0.9717 - val_loss: 0.0951
Epoch 4/10
[1m757/757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 18ms/step - accuracy: 0.9739 - loss: 0.0863 - val_accuracy: 0.9757 - val_loss: 0.0822
Epoch 5/10
[1m757/757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 18ms/step - accuracy: 0.9809 - loss: 0.0685 - val_accuracy: 0.9778 - val_loss: 0.0733
Epoch 6/10
[1m757/757[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m

{'lstm_units': 128, 'dropout_rate': 0.2}

### Train the model

In [None]:
%%time

model = train_model(train_data, val_data, lstm_units=best_params["lstm_units"], dropout_rate=best_params["dropout_rate"])
model.summary()


🚀 Training LSTM with lstm_units=128, dropout_rate=0.2
Epoch 1/10
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 17ms/step - accuracy: 0.9015 - loss: 0.2472 - val_accuracy: 0.9691 - val_loss: 0.1048
Epoch 2/10
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.9678 - loss: 0.0997 - val_accuracy: 0.9704 - val_loss: 0.0883
Epoch 3/10
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 16ms/step - accuracy: 0.9747 - loss: 0.0778 - val_accuracy: 0.9798 - val_loss: 0.0691
Epoch 4/10
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 15ms/step - accuracy: 0.9804 - loss: 0.0616 - val_accuracy: 0.9790 - val_loss: 0.0682
Epoch 5/10
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - accuracy: 0.9869 - loss: 0.0483 - val_accuracy: 0.9803 - val_loss: 0.0692
Epoch 6/10
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - accuracy: 0.9891 - loss: 0.0420 - v

CPU times: user 3min, sys: 45.9 s, total: 3min 46s
Wall time: 2min 55s


### Save the model in .keras

In [40]:
model.save("results/LSTM_model_original.keras")

### Load the model again to see if results are the same

In [41]:
# load model
from tensorflow.keras.models import load_model

model2 = load_model("results/LSTM_model_original.keras")
model2.summary()

### Model evaluation

In [42]:
%%time

evaluate_model(model, train_data, val_data)

Evaluating Model
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Accuracy: 0.9815
Precision: 0.9760
Recall: 0.9819
F1: 0.9789
CPU times: user 9.09 s, sys: 213 ms, total: 9.31 s
Wall time: 9.45 s


In [43]:
%%time

evaluate_model(model2, train_data, val_data)

Evaluating Model
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Accuracy: 0.9798
Precision: 0.9741
Recall: 0.9800
F1: 0.9770
CPU times: user 9.11 s, sys: 194 ms, total: 9.31 s
Wall time: 8.7 s


### Load LLM-rewritten data

In [44]:
val_data_rewritten = datasets["rewritten_test"].to_pandas()
train_data_rewritten = datasets["rewritten_train"].to_pandas()
data_combined = pd.concat([train_data, train_data_rewritten, val_data, val_data_rewritten], ignore_index=True)
data_combined.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120982 entries, 0 to 120981
Data columns (total 3 columns):
 #   Column                  Non-Null Count   Dtype 
---  ------                  --------------   ----- 
 0   label                   120982 non-null  int64 
 1   full_content            120982 non-null  object
 2   processed_full_content  120982 non-null  object
dtypes: int64(1), object(2)
memory usage: 2.8+ MB


### Model evaluation on rewritten data, using original training data

In [45]:
%%time

evaluate_model(model, train_data, val_data_rewritten)

Evaluating Model
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Accuracy: 0.8217
Precision: 0.8388
Recall: 0.7333
F1: 0.7825
CPU times: user 8.94 s, sys: 202 ms, total: 9.14 s
Wall time: 8.57 s


In [46]:
%%time

evaluate_model(model2, train_data, val_data_rewritten)

Evaluating Model
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
Accuracy: 0.8279
Precision: 0.8464
Recall: 0.7412
F1: 0.7903
CPU times: user 8.89 s, sys: 106 ms, total: 9 s
Wall time: 9.09 s


### Rerun grid search with rewritten data

In [56]:
%%time

best_params_combined = do_grid_search(data_combined)
best_params_combined


 Testing lstm_units=64, dropout_rate=0.2
Fold 1

🚀 Training LSTM with lstm_units=64, dropout_rate=0.2
Epoch 1/10
[1m1513/1513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 18ms/step - accuracy: 0.8466 - loss: 0.3540 - val_accuracy: 0.9093 - val_loss: 0.2217
Epoch 2/10
[1m1513/1513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 18ms/step - accuracy: 0.9299 - loss: 0.1848 - val_accuracy: 0.9393 - val_loss: 0.1640
Epoch 3/10
[1m1513/1513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 18ms/step - accuracy: 0.9423 - loss: 0.1546 - val_accuracy: 0.9447 - val_loss: 0.1469
Epoch 4/10
[1m1513/1513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 18ms/step - accuracy: 0.9488 - loss: 0.1371 - val_accuracy: 0.9490 - val_loss: 0.1423
Epoch 5/10
[1m1513/1513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 17ms/step - accuracy: 0.9565 - loss: 0.1209 - val_accuracy: 0.9499 - val_loss: 0.1381
Epoch 6/10
[1m1513/1513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

{'lstm_units': 128, 'dropout_rate': 0.2}

### Retrain the model with rewritten data

In [57]:
%%time

train_data_combined = pd.concat([train_data, train_data_rewritten], ignore_index=True)
val_data_combined = pd.concat([val_data, val_data_rewritten], ignore_index=True)
model_combined = train_model(train_data_combined, val_data_combined, 
                             lstm_units=best_params_combined["lstm_units"], 
                             dropout_rate=best_params_combined["dropout_rate"])
model_combined.save("results/LSTM_model_combined.keras")


🚀 Training LSTM with lstm_units=128, dropout_rate=0.2
Epoch 1/10
[1m1702/1702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 17ms/step - accuracy: 0.8596 - loss: 0.3229 - val_accuracy: 0.9279 - val_loss: 0.1830
Epoch 2/10
[1m1702/1702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 16ms/step - accuracy: 0.9354 - loss: 0.1644 - val_accuracy: 0.9394 - val_loss: 0.1494
Epoch 3/10
[1m1702/1702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 17ms/step - accuracy: 0.9499 - loss: 0.1327 - val_accuracy: 0.9465 - val_loss: 0.1389
Epoch 4/10
[1m1702/1702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 16ms/step - accuracy: 0.9576 - loss: 0.1143 - val_accuracy: 0.9475 - val_loss: 0.1363
Epoch 5/10
[1m1702/1702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 17ms/step - accuracy: 0.9628 - loss: 0.1023 - val_accuracy: 0.9497 - val_loss: 0.1330
Epoch 6/10
[1m1702/1702[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 16ms/step - accuracy: 0.9679 - loss

In [58]:
evaluate_model(model_combined, train_data_combined, val_data_combined)

Evaluating Model
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Accuracy: 0.9538
Precision: 0.9566
Recall: 0.9369
F1: 0.9467


In [59]:
evaluate_model(model_combined, train_data_combined, val_data)

Evaluating Model
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Accuracy: 0.9792
Precision: 0.9813
Recall: 0.9709
F1: 0.9761


In [60]:
evaluate_model(model_combined, train_data_combined, val_data_rewritten)

Evaluating Model
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy: 0.9245
Precision: 0.9287
Recall: 0.8961
F1: 0.9121


In [61]:
model_combined2 = load_model("results/LSTM_model_combined.keras")
model_combined2.summary()

In [62]:
evaluate_model(model_combined2, train_data_combined, val_data_combined)

Evaluating Model
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Accuracy: 0.9512
Precision: 0.9518
Recall: 0.9358
F1: 0.9437


In [63]:
evaluate_model(model_combined2, train_data_combined, val_data)

Evaluating Model
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Accuracy: 0.9813
Precision: 0.9851
Recall: 0.9720
F1: 0.9785


In [64]:
evaluate_model(model_combined2, train_data_combined, val_data_rewritten)

Evaluating Model
[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
Accuracy: 0.9208
Precision: 0.9241
Recall: 0.8923
F1: 0.9079
