# **CNN-BERT (FakeBERT)**

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # "0" o "1"

In [2]:
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found, using CPU.")


2025-10-27 17:57:49.842064: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-27 17:57:49.899115: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-27 17:57:51.101399: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [None]:
from utils import *

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from transformers import BertTokenizer, TFBertModel

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# ---------------
# BERT Embeddings
# ---------------

tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
bert_model = TFBertModel.from_pretrained("bert-base-cased", from_pt=True)

def get_bert_embeddings(texts, max_len=128, batch_size=16):
    """
    Get BERT embeddings for a list of texts.

    Args:
        texts: List or array of input texts
        max_len: Maximum length for padding/truncation
        batch_size: Batch size for processing texts

    Returns:
        Numpy array of BERT embeddings with shape (num_texts, max_len, 768)
    """
    
    embeddings = []

    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size].tolist()
        input_enc = tokenizer(
            batch_texts,
            truncation=True,
            padding="max_length",
            max_length=max_len,
            return_tensors='tf'
        )
        outputs = bert_model(input_enc)
        batch_emb = outputs.last_hidden_state  # (batch, max_len, 768)
        embeddings.append(batch_emb.numpy())

        # libera memoria GPU tra un batch e l’altro
        del input_enc, outputs, batch_emb
        tf.keras.backend.clear_session()

    return np.concatenate(embeddings, axis=0)


def generate_bert_embeddings(datasets):
    """
    Generate BERT embeddings for all datasets.
    
    Args:
        datasets: Dictionary of datasets with train/val/test splits.
    
    Returns:
        Updated datasets with BERT embeddings.
    """

    for name, data in datasets.items():
        print(f"\n=== Generating embeddings for dataset: {name} ===")

        X_train, y_train = data["train"]
        X_val, y_val = data["val"]
        X_test, y_test = data["test"]

        X_train_emb = get_bert_embeddings(X_train)
        X_val_emb = get_bert_embeddings(X_val)
        X_test_emb = get_bert_embeddings(X_test)

        datasets[name] = {
            "train": (X_train_emb, y_train),
            "val": (X_val_emb, y_val),
            "test": (X_test_emb, y_test)
        }
    
    return datasets

I0000 00:00:1761583819.027517 3874635 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 27088 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:af:00.0, compute capability: 7.0
TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining 

In [None]:
# ------------------------------
# Build model function
# ------------------------------

def build_model(max_len=128, cnn_filters=96, kernel_size=4,
                dense_units=32, learning_rate=1e-4):
    """
    Builds a CNN model on top of BERT embeddings (not end-to-end fine-tuning).

    Args:
        max_len (int): Maximum sequence length.
        cnn_filters (int): Number of filters in Conv1D layer.
        kernel_size (int): Size of convolution kernel.
        dense_units (int): Units in dense hidden layer.
        learning_rate (float): Learning rate for Adam optimizer.

    Returns:
        model (tf.keras.Model): Compiled CNN-BERT model.
    """
    model = Sequential([
        Input(shape=(max_len, 768)),  # BERT base hidden size
        Conv1D(filters=cnn_filters, kernel_size=kernel_size, activation='relu'),
        GlobalMaxPooling1D(),
        Dense(dense_units, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

## VERSION 1: Dataset (Simple)

In [None]:
dataset_df = data_loading() # load datasets

for name, df in dataset_df.items():
    print(f"Dataset: {name}, Number of samples: {len(df)}")

Dataset: Celebrity, Number of samples: 500
Dataset: CIDII, Number of samples: 722
Dataset: FaKES, Number of samples: 842
Dataset: FakeVsSatire, Number of samples: 486
Dataset: Horne, Number of samples: 326
Dataset: Infodemic, Number of samples: 10559
Dataset: ISOT, Number of samples: 44271
Dataset: Kaggle_clement, Number of samples: 39105
Dataset: Kaggle_meg, Number of samples: 12845
Dataset: LIAR_PLUS, Number of samples: 12784
Dataset: Politifact, Number of samples: 504
Dataset: Unipi_NDF, Number of samples: 554


  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT


In [None]:
# --------------------------------
# Fine-tuning on multiple datasets
# --------------------------------

print("\nSplitting datasets into train/val/test...")
datasets = {name: split_dataset(df) for name, df in dataset_df.items()} # split all datasets in train/val/test
print("\nComputing BERT embeddings for all datasets...")
datasets = generate_bert_embeddings(datasets) # get BERT embeddings for all datasets

model = build_model() # initialize model

results = {}

# sequential training
for i, (name, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on {name} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=0)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train, X_val]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=16,
        validation_split=0.1,
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after {name}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after {name}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after {name}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all datasets
    print("\n--- Evaluation on all datasets ---")
    results[name] = {}
    for test_name, test_data in datasets.items(): # for each dataset
        X_te, y_te = test_data["test"]
        preds = model.predict(get_bert_embeddings(X_te))
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[name][test_name] = f1
        print(f"Evaluation on {test_name}: Weighted F1 = {f1:.4f}")


TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.



=== Phase 1: Training/Fine-tuning on Celebrity ===
Epoch 1/10


2025-10-27 17:50:41.884200: I external/local_xla/xla/service/service.cc:163] XLA service 0x70b65000bdb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-10-27 17:50:41.884295: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): Tesla V100S-PCIE-32GB, Compute Capability 7.0
2025-10-27 17:50:41.948241: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-10-27 17:50:42.095371: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91002


[1m17/23[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 7ms/step - accuracy: 0.5916 - loss: 0.9231

I0000 00:00:1761583843.186846 3874922 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 96ms/step - accuracy: 0.5583 - loss: 0.8195 - val_accuracy: 0.4750 - val_loss: 0.7714
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.5972 - loss: 0.6637 - val_accuracy: 0.7000 - val_loss: 0.6599
Epoch 3/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7583 - loss: 0.5561 - val_accuracy: 0.6250 - val_loss: 0.6450
Epoch 4/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8333 - loss: 0.4803 - val_accuracy: 0.7250 - val_loss: 0.6108
Epoch 5/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8806 - loss: 0.4360 - val_accuracy: 0.6500 - val_loss: 0.6119
Epoch 6/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9222 - loss: 0.3768 - val_accuracy: 0.6000 - val_loss: 0.6117
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 115 and the array at index 1 has size 128

In [None]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for name, res in results.items():
    print(f"\nResults after training on {name}:")
    for test_name, f1 in res.items():
        print(f"  Test on {test_name}: Weighted F1 = {f1:.4f}")

## VERSION 2: Dataset by Topic

In [None]:
dataset_df = data_by_topic()

for topic, df in dataset_df.items():
    print(f"Topic: {topic}, Number of samples: {len(df)}")

Topic: politics, Number of samples: 97476
Topic: general, Number of samples: 12845
Topic: covid, Number of samples: 10559
Topic: syria, Number of samples: 842
Topic: islam, Number of samples: 722
Topic: notredame, Number of samples: 554
Topic: gossip, Number of samples: 500


  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT


In [None]:
# -------------------------------
# Fine-tuning on Dataset by Topic
# -------------------------------

datasets = {topic: split_dataset(df) for topic, df in dataset_df.items()} # split all datasets in train/val/test
model = build_model() # initialize model

results = {}

# sequential training
for i, (topic, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on topic: {topic} ===")

    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # get BERT embeddings
    X_train_emb = get_bert_embeddings(X_train)
    X_val_emb = get_bert_embeddings(X_val)
    X_test_emb = get_bert_embeddings(X_test)

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train_emb, X_val_emb]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=16,
        validation_data=(X_val_emb, y_val),
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test_emb)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after topic {topic}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after topic {topic}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after topic {topic}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all topics
    print("\n--- Evaluation on all topics ---")
    results[topic] = {}
    for test_topic, test_data in datasets.items(): # for each topic
        X_te, y_te = test_data["test"]
        preds = model.predict(get_bert_embeddings(X_te))
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[topic][test_topic] = f1
        print(f"Evaluation on topic {test_topic}: Weighted F1 = {f1:.4f}")

Loading pre-trained Word2Vec model (may take time)...





=== Phase 1: Training/Fine-tuning on topic: politics ===
Epoch 1/10
[1m1219/1219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 50ms/step - accuracy: 0.9011 - loss: 0.2092 - val_accuracy: 0.9386 - val_loss: 0.1234
Epoch 2/10
[1m1219/1219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 49ms/step - accuracy: 0.9370 - loss: 0.1248 - val_accuracy: 0.9446 - val_loss: 0.1093
Epoch 3/10
[1m1219/1219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 54ms/step - accuracy: 0.9419 - loss: 0.1141 - val_accuracy: 0.9496 - val_loss: 0.0979
Epoch 4/10
[1m1219/1219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 49ms/step - accuracy: 0.9459 - loss: 0.1052 - val_accuracy: 0.9530 - val_loss: 0.0922
Epoch 5/10
[1m1219/1219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 49ms/step - accuracy: 0.9503 - loss: 0.0980 - val_accuracy: 0.9567 - val_loss: 0.0838
Epoch 6/10
[1m1219/1219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 49ms/step - accuracy: 0.9527 - l

In [None]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for topic, res in results.items():
    print(f"\nResults after training on topic {topic}:")
    for test_topic, f1 in res.items():
        print(f"  Test on topic {test_topic}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on topic politics:
  Test on topic politics: Weighted F1 = 0.9425
  Test on topic general: Weighted F1 = 0.3113
  Test on topic covid: Weighted F1 = 0.4602
  Test on topic syria: Weighted F1 = 0.5949
  Test on topic islam: Weighted F1 = 0.3845
  Test on topic notredame: Weighted F1 = 0.3730
  Test on topic gossip: Weighted F1 = 0.3658

Results after training on topic general:
  Test on topic politics: Weighted F1 = 0.3552
  Test on topic general: Weighted F1 = 0.9671
  Test on topic covid: Weighted F1 = 0.3600
  Test on topic syria: Weighted F1 = 0.3633
  Test on topic islam: Weighted F1 = 0.4333
  Test on topic notredame: Weighted F1 = 0.4654
  Test on topic gossip: Weighted F1 = 0.3333

Results after training on topic covid:
  Test on topic politics: Weighted F1 = 0.7028
  Test on topic general: Weighted F1 = 0.0921
  Test on topic covid: Weighted F1 = 0.9398
  Test on topic syria: Weighted F1 = 0.3016
  Test on topic islam: Weighted F

## VERSION 3: Dataset by Date

In [None]:
dataset_df = data_by_date()

for date, df in dataset_df.items():
    print(f"Date: {date}, Number of samples: {len(df)}")

  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT


Date: 2011-2013, Number of samples: 55
Date: 2014, Number of samples: 114
Date: 2015, Number of samples: 84
Date: 2016, Number of samples: 49687
Date: 2017, Number of samples: 16657
Date: 2020, Number of samples: 10559


In [None]:
# ------------------------------
# Fine-tuning on Dataset by Date
# ------------------------------

datasets = {date: split_dataset(df) for date, df in dataset_df.items()} # split all datasets in train/val/test
model = build_model() # initialize model

results = {}

# sequential training
for i, (date, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on date: {date} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # get BERT embeddings
    X_train_emb = get_bert_embeddings(X_train)
    X_val_emb = get_bert_embeddings(X_val)
    X_test_emb = get_bert_embeddings(X_test)

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train_emb, X_val_emb]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=64,
        validation_data=(X_val_emb, y_val),
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test_emb)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after date {date}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after date {date}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after date {date}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all dates
    print("\n--- Evaluation on all dates ---")
    results[date] = {}
    for test_date, test_data in datasets.items(): # for each date
        X_te, y_te = test_data["test"]
        preds = model.predict(get_bert_embeddings(X_te))
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[date][test_date] = f1
        print(f"Evaluation on {test_date}: Weighted F1 = {f1:.4f}")
    

Loading pre-trained Word2Vec model (may take time)...

=== Phase 1: Training/Fine-tuning on date: 2011-2013 ===
Epoch 1/10




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5682 - loss: 0.6833 - val_accuracy: 0.3636 - val_loss: 0.6976
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299ms/step - accuracy: 0.3864 - loss: 0.7038 - val_accuracy: 0.3636 - val_loss: 0.6966
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 280ms/step - accuracy: 0.5227 - loss: 0.6950 - val_accuracy: 0.3636 - val_loss: 0.6954
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 279ms/step - accuracy: 0.4773 - loss: 0.6987 - val_accuracy: 0.3636 - val_loss: 0.6942
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 270ms/step - accuracy: 0.5455 - loss: 0.6894 - val_accuracy: 0.5455 - val_loss: 0.6930
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 270ms/step - accuracy: 0.5000 - loss: 0.6895 - val_accuracy: 0.5455 - val_loss: 0.6919
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [None]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for date, res in results.items():
    print(f"\nResults after training on date {date}:")
    for test_date, f1 in res.items():
        print(f"  Test on date {test_date}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on date 2011-2013:
  Test on date 2011-2013: Weighted F1 = 0.3636
  Test on date 2014: Weighted F1 = 0.5602
  Test on date 2015: Weighted F1 = 0.4594
  Test on date 2016: Weighted F1 = 0.3755
  Test on date 2017: Weighted F1 = 0.5813
  Test on date 2020: Weighted F1 = 0.5200

Results after training on date 2014:
  Test on date 2011-2013: Weighted F1 = 0.5377
  Test on date 2014: Weighted F1 = 0.4283
  Test on date 2015: Weighted F1 = 0.5394
  Test on date 2016: Weighted F1 = 0.3577
  Test on date 2017: Weighted F1 = 0.3460
  Test on date 2020: Weighted F1 = 0.5202

Results after training on date 2015:
  Test on date 2011-2013: Weighted F1 = 0.6169
  Test on date 2014: Weighted F1 = 0.3473
  Test on date 2015: Weighted F1 = 0.5394
  Test on date 2016: Weighted F1 = 0.3244
  Test on date 2017: Weighted F1 = 0.2466
  Test on date 2020: Weighted F1 = 0.5265

Results after training on date 2016:
  Test on date 2011-2013: Weighted F1 = 0.2338
