# **CNN**

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1" # "0" o "1"

In [2]:
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found, using CPU.")


2025-10-27 15:24:38.384742: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-27 15:24:38.444631: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-27 15:24:39.690284: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
from utils import *

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from gensim.models import KeyedVectors

In [4]:
def prepare_data(datasets, max_words=20000, max_len=300):
    """
    Tokenization and label encoding for multiple datasets.

    Args:
        datasets (dict): A dictionary where keys are dataset names and values are pandas DataFrames with 'texts' and 'labels' columns.
        max_words (int): Maximum number of words to keep in the tokenizer vocabulary.
        max_len (int): Maximum length of sequences after padding/truncating.

    Returns:
        processed_datasets (dict): A dictionary with the same keys as input, where each value is another dict with 'train', 'val', 'test' splits containing (X, y) tuples.
        tokenizer (Tokenizer): Fitted Keras Tokenizer.
        encoder (LabelEncoder): Fitted sklearn LabelEncoder.
    """
    # Unisci tutti i testi di tutti i dataset
    all_texts = []
    for df in datasets.values():
        all_texts.extend(df["texts"].astype(str).tolist())

    # Tokenizer
    tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>") # tokenizer with OOV token
    tokenizer.fit_on_texts(all_texts) # create vocabulary {word: index}

    # LabelEncoder globale
    all_labels = np.concatenate([df["labels"].values for df in datasets.values()])
    encoder = LabelEncoder().fit(all_labels)

    # Applica tokenizzazione e encoding a ogni dataset
    processed_datasets = {}
    for name, df in datasets.items():
        seq = tokenizer.texts_to_sequences(df["texts"].astype(str).tolist()) # convert texts to sequences of integers
        X = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post') # padd/truncate sequences to max_len
        y = encoder.transform(df["labels"].values) # encode labels to integers

        # Split train/val/test
        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, stratify=y, random_state=42)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

        processed_datasets[name] = {
            "train": (X_train, y_train),
            "val": (X_val, y_val),
            "test": (X_test, y_test)
        }

    return processed_datasets, tokenizer, encoder

In [5]:
def load_word2vec(tokenizer, max_words=20000, embedding_dim=300):
    """
    Load pre-trained Word2Vec embeddings and create embedding matrix

    Args:
        tokenizer: Keras Tokenizer object with fitted vocabulary
        max_words: maximum number of words to consider from tokenizer
        embedding_dim: dimension of Word2Vec embeddings

    Returns:
        embedding_matrix: numpy array of shape (num_words, embedding_dim)
        num_words: actual number of words considered
    """
    
    print("Loading pre-trained Word2Vec model (may take time)...")
    w2v_path = "../Word2Vec_GoogleNews300/word2vec-google-news-300.model"
    w2v_model = KeyedVectors.load(w2v_path, mmap='r') # load model with memory mapping (mmap='r' for only reading) 

    word_index = tokenizer.word_index               # vocabulary from tokenizer {word: index}
    num_words = min(max_words, len(word_index) + 1) # number of words to consider (max_words or vocab size)

    # create embedding matrix: each row corresponds to a word index from tokenizer, each column to an embedding dimension
    embedding_matrix = np.zeros((num_words, embedding_dim))
    for word, i in word_index.items():
        if i >= max_words: # skip because we only consider top max_words
            continue
        if word in w2v_model: # if word has a pre-trained embedding
            embedding_matrix[i] = w2v_model[word]

    return embedding_matrix, num_words

In [None]:
# -----------------------
# Model building function
# -----------------------

def build_model(embedding_matrix, num_words, max_len=300, embedding_dim=300,
                filter_size=4, num_filters=96, dropout=0.4,
                hidden_units=32, learning_rate=1e-4):
    """
    Build and compile the CNN model.

    Args:
        embedding_matrix: Pre-trained embedding matrix
        num_words: Number of words in the vocabulary
        max_len: Maximum length of input sequences
        embedding_dim: Dimension of word embeddings
        filter_size: Size of the convolutional filters
        num_filters: Number of convolutional filters
        dropout: Dropout rate
        hidden_units: Number of units in the dense hidden layer
        learning_rate: Learning rate for the Adam optimizer

    Returns:
        model: Compiled Keras CNN model
    """
    model = Sequential([
        Embedding(num_words,
                  embedding_dim,
                  weights=[embedding_matrix],
                  input_length=max_len,
                  trainable=True),
        Dropout(dropout),
        Conv1D(num_filters, filter_size, activation='relu'),
        GlobalMaxPooling1D(),
        Dense(hidden_units, activation='relu'),
        Dropout(dropout),
        Dense(1, activation='sigmoid')
    ])

    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

## VERSION 1: Dataset (Simple)

In [7]:
dataset_df = data_loading() # load datasets

for name, df in dataset_df.items():
    print(f"Dataset: {name}, Number of samples: {len(df)}")

Dataset: Celebrity, Number of samples: 500
Dataset: CIDII, Number of samples: 722
Dataset: FaKES, Number of samples: 842
Dataset: FakeVsSatire, Number of samples: 486
Dataset: Horne, Number of samples: 326
Dataset: Infodemic, Number of samples: 10559
Dataset: ISOT, Number of samples: 44271
Dataset: Kaggle_clement, Number of samples: 39105
Dataset: Kaggle_meg, Number of samples: 12845
Dataset: LIAR_PLUS, Number of samples: 12784
Dataset: Politifact, Number of samples: 504
Dataset: Unipi_NDF, Number of samples: 554


  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT


In [8]:
# --------------------------------
# Fine-tuning on multiple datasets
# --------------------------------

# preprocessing on all datasets
datasets, tokenizer, encoder = prepare_data(dataset_df)
embedding_matrix, num_words = load_word2vec(tokenizer)

# initialize model
model = build_model(embedding_matrix, num_words)

results = {}

# sequential training
for i, (name, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on {name} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train, X_val]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=8,
        validation_data=(X_val, y_val),
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after {name}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after {name}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after {name}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all datasets
    print("\n--- Evaluation on all datasets ---")
    results[name] = {}
    for test_name, test_data in datasets.items(): # for each dataset
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[name][test_name] = f1
        print(f"Evaluation on {test_name}: Weighted F1 = {f1:.4f}")


Loading pre-trained Word2Vec model (may take time)...


I0000 00:00:1761565947.018184 3391636 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29192 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:d8:00.0, compute capability: 7.0



=== Phase 1: Training/Fine-tuning on Celebrity ===
Epoch 1/10


2025-10-27 12:52:28.794851: I external/local_xla/xla/service/service.cc:163] XLA service 0x793c6401ca30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-10-27 12:52:28.794882: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): Tesla V100S-PCIE-32GB, Compute Capability 7.0
2025-10-27 12:52:28.839535: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-10-27 12:52:29.000613: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91002


[1m35/50[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 5ms/step - accuracy: 0.4448 - loss: 0.7871

I0000 00:00:1761565951.020361 3392366 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.4725 - loss: 0.7606 - val_accuracy: 0.5400 - val_loss: 0.6861
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5175 - loss: 0.7261 - val_accuracy: 0.6400 - val_loss: 0.6772
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5225 - loss: 0.7062 - val_accuracy: 0.6500 - val_loss: 0.6713
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5325 - loss: 0.7147 - val_accuracy: 0.6900 - val_loss: 0.6626
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5200 - loss: 0.6928 - val_accuracy: 0.6900 - val_loss: 0.6554
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5750 - loss: 0.6742 - val_accuracy: 0.7800 - val_loss: 0.6434
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━

In [9]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for name, res in results.items():
    print(f"\nResults after training on {name}:")
    for test_name, f1 in res.items():
        print(f"  Test on {test_name}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on Celebrity:
  Test on Celebrity: Weighted F1 = 0.6692
  Test on CIDII: Weighted F1 = 0.4962
  Test on FaKES: Weighted F1 = 0.4911
  Test on FakeVsSatire: Weighted F1 = 0.4344
  Test on Horne: Weighted F1 = 0.4007
  Test on Infodemic: Weighted F1 = 0.5141
  Test on ISOT: Weighted F1 = 0.5222
  Test on Kaggle_clement: Weighted F1 = 0.5361
  Test on Kaggle_meg: Weighted F1 = 0.7685
  Test on LIAR_PLUS: Weighted F1 = 0.5265
  Test on Politifact: Weighted F1 = 0.6725
  Test on Unipi_NDF: Weighted F1 = 0.5001

Results after training on CIDII:
  Test on Celebrity: Weighted F1 = 0.3967
  Test on CIDII: Weighted F1 = 0.8416
  Test on FaKES: Weighted F1 = 0.4065
  Test on FakeVsSatire: Weighted F1 = 0.4650
  Test on Horne: Weighted F1 = 0.5609
  Test on Infodemic: Weighted F1 = 0.4782
  Test on ISOT: Weighted F1 = 0.4335
  Test on Kaggle_clement: Weighted F1 = 0.4970
  Test on Kaggle_meg: Weighted F1 = 0.9155
  Test on LIAR_PLUS: Weighted F1 = 0

## VERSION 2: Dataset by Topic

In [7]:
dataset_df = data_by_topic()

for topic, df in dataset_df.items():
    print(f"Topic: {topic}, Number of samples: {len(df)}")

Topic: politics, Number of samples: 97476
Topic: general, Number of samples: 12845
Topic: covid, Number of samples: 10559
Topic: syria, Number of samples: 842
Topic: islam, Number of samples: 722
Topic: notredame, Number of samples: 554
Topic: gossip, Number of samples: 500


  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT


In [8]:
# -------------------------------
# Fine-tuning on Dataset by Topic
# -------------------------------

# preprocessing on all datasets
datasets, tokenizer, encoder = prepare_data(dataset_df)
embedding_matrix, num_words = load_word2vec(tokenizer)

# initialize model
model = build_model(embedding_matrix, num_words)

results = {}

# sequential training
for i, (topic, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on topic: {topic} ===")

    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train, X_val]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=8,
        validation_data=(X_val, y_val),
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after topic {topic}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after topic {topic}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after topic {topic}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all topics
    print("\n--- Evaluation on all topics ---")
    results[topic] = {}
    for test_topic, test_data in datasets.items(): # for each topic
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[topic][test_topic] = f1
        print(f"Evaluation on topic {test_topic}: Weighted F1 = {f1:.4f}")

Loading pre-trained Word2Vec model (may take time)...


I0000 00:00:1761575181.076534 3491979 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31134 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:d8:00.0, compute capability: 7.0



=== Phase 1: Training/Fine-tuning on topic: politics ===
Epoch 1/10


2025-10-27 15:26:22.955776: I external/local_xla/xla/service/service.cc:163] XLA service 0x721abc009b30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-10-27 15:26:22.955810: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): Tesla V100S-PCIE-32GB, Compute Capability 7.0
2025-10-27 15:26:23.001295: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-10-27 15:26:23.168953: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91002


[1m  37/9748[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m41s[0m 4ms/step - accuracy: 0.5067 - loss: 0.7457 

I0000 00:00:1761575184.967475 3492771 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m9748/9748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 5ms/step - accuracy: 0.9046 - loss: 0.1967 - val_accuracy: 0.9407 - val_loss: 0.1124
Epoch 2/10
[1m9748/9748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 5ms/step - accuracy: 0.9390 - loss: 0.1177 - val_accuracy: 0.9494 - val_loss: 0.1001
Epoch 3/10
[1m9748/9748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 5ms/step - accuracy: 0.9457 - loss: 0.1058 - val_accuracy: 0.9559 - val_loss: 0.0902
Epoch 4/10
[1m9748/9748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 5ms/step - accuracy: 0.9506 - loss: 0.0980 - val_accuracy: 0.9609 - val_loss: 0.0834
Epoch 5/10
[1m9748/9748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 5ms/step - accuracy: 0.9544 - loss: 0.0904 - val_accuracy: 0.9665 - val_loss: 0.0741
Epoch 6/10
[1m9748/9748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 5ms/step - accuracy: 0.9599 - loss: 0.0833 - val_accuracy: 0.9728 - val_loss: 0.0663
Epoch 7/10
[1m9748/9

In [9]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for topic, res in results.items():
    print(f"\nResults after training on topic {topic}:")
    for test_topic, f1 in res.items():
        print(f"  Test on topic {test_topic}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on topic politics:
  Test on topic politics: Weighted F1 = 0.9412
  Test on topic general: Weighted F1 = 0.3805
  Test on topic covid: Weighted F1 = 0.4206
  Test on topic syria: Weighted F1 = 0.4462
  Test on topic islam: Weighted F1 = 0.4118
  Test on topic notredame: Weighted F1 = 0.3092
  Test on topic gossip: Weighted F1 = 0.4430

Results after training on topic general:
  Test on topic politics: Weighted F1 = 0.4058
  Test on topic general: Weighted F1 = 0.9695
  Test on topic covid: Weighted F1 = 0.3619
  Test on topic syria: Weighted F1 = 0.3633
  Test on topic islam: Weighted F1 = 0.4333
  Test on topic notredame: Weighted F1 = 0.4654
  Test on topic gossip: Weighted F1 = 0.3333

Results after training on topic covid:
  Test on topic politics: Weighted F1 = 0.7490
  Test on topic general: Weighted F1 = 0.3708
  Test on topic covid: Weighted F1 = 0.9335
  Test on topic syria: Weighted F1 = 0.4883
  Test on topic islam: Weighted F

## VERSION 3: Dataset by Date

In [10]:
dataset_df = data_by_date()

for date, df in dataset_df.items():
    print(f"Date: {date}, Number of samples: {len(df)}")

  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT


Date: 2011-2013, Number of samples: 55
Date: 2014, Number of samples: 114
Date: 2015, Number of samples: 84
Date: 2016, Number of samples: 49687
Date: 2017, Number of samples: 16657
Date: 2020, Number of samples: 10559


In [11]:
# ------------------------------
# Fine-tuning on Dataset by Date
# ------------------------------

# preprocessing on all datasets
datasets, tokenizer, encoder = prepare_data(dataset_df)
embedding_matrix, num_words = load_word2vec(tokenizer)

# initialize model
model = build_model(embedding_matrix, num_words)

results = {}

# sequential training
for i, (date, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on date: {date} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train, X_val]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=8,
        validation_data=(X_val, y_val),
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after date {date}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after date {date}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after date {date}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all dates
    print("\n--- Evaluation on all dates ---")
    results[date] = {}
    for test_date, test_data in datasets.items(): # for each date
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[date][test_date] = f1
        print(f"Evaluation on {test_date}: Weighted F1 = {f1:.4f}")
    

Loading pre-trained Word2Vec model (may take time)...

=== Phase 1: Training/Fine-tuning on date: 2011-2013 ===
Epoch 1/10




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 492ms/step - accuracy: 0.4773 - loss: 0.7796 - val_accuracy: 0.5455 - val_loss: 0.6953
Epoch 2/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.5227 - loss: 0.7129 - val_accuracy: 0.4545 - val_loss: 0.6906
Epoch 3/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5227 - loss: 0.7051 - val_accuracy: 0.3636 - val_loss: 0.6861
Epoch 4/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.6818 - loss: 0.5989 - val_accuracy: 0.4545 - val_loss: 0.6815
Epoch 5/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5227 - loss: 0.7556 - val_accuracy: 0.6364 - val_loss: 0.6760
Epoch 6/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.4318 - loss: 0.7827 - val_accuracy: 0.6364 - val_loss: 0.6689
Epoch 7/10
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [12]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for date, res in results.items():
    print(f"\nResults after training on date {date}:")
    for test_date, f1 in res.items():
        print(f"  Test on date {test_date}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on date 2011-2013:
  Test on date 2011-2013: Weighted F1 = 0.5606
  Test on date 2014: Weighted F1 = 0.2690
  Test on date 2015: Weighted F1 = 0.4938
  Test on date 2016: Weighted F1 = 0.3890
  Test on date 2017: Weighted F1 = 0.2089
  Test on date 2020: Weighted F1 = 0.3686

Results after training on date 2014:
  Test on date 2011-2013: Weighted F1 = 0.3961
  Test on date 2014: Weighted F1 = 0.3944
  Test on date 2015: Weighted F1 = 0.4759
  Test on date 2016: Weighted F1 = 0.4834
  Test on date 2017: Weighted F1 = 0.7877
  Test on date 2020: Weighted F1 = 0.4752

Results after training on date 2015:
  Test on date 2011-2013: Weighted F1 = 0.5606
  Test on date 2014: Weighted F1 = 0.2899
  Test on date 2015: Weighted F1 = 0.4078
  Test on date 2016: Weighted F1 = 0.3989
  Test on date 2017: Weighted F1 = 0.2922
  Test on date 2020: Weighted F1 = 0.3880

Results after training on date 2016:
  Test on date 2011-2013: Weighted F1 = 0.3528
