# **CNN-BERT (FakeBERT)**

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1" # "0" o "1"

In [2]:
import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.set_visible_devices(gpus[0], 'GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found, using CPU.")

2025-10-28 14:44:39.728346: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-28 14:44:39.786037: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-28 14:44:41.051385: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
from utils import *

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from transformers import BertTokenizer, TFBertModel

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# ---------------
# BERT Embeddings
# ---------------

tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
bert_model = TFBertModel.from_pretrained("bert-base-cased", from_pt=True)

def get_bert_embeddings(texts, max_len=128, batch_size=16):
    """
    Get BERT embeddings for a list of texts.

    Args:
        texts: List or array of input texts
        max_len: Maximum length for padding/truncation
        batch_size: Batch size for processing texts

    Returns:
        Numpy array of BERT embeddings with shape (num_texts, max_len, 768)
    """
    
    embeddings = []

    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size].tolist()
        input_enc = tokenizer(
            batch_texts,
            truncation=True,
            padding="max_length",
            max_length=max_len,
            return_tensors='tf'
        )
        outputs = bert_model(input_enc)
        batch_emb = outputs.last_hidden_state  # (batch, max_len, 768)
        embeddings.append(batch_emb.numpy())

        # libera memoria GPU tra un batch e l’altro
        del input_enc, outputs, batch_emb
        tf.keras.backend.clear_session()

    return np.concatenate(embeddings, axis=0)


def generate_bert_embeddings(datasets):
    """
    Generate BERT embeddings for all datasets.
    
    Args:
        datasets: Dictionary of datasets with train/val/test splits.
    
    Returns:
        Updated datasets with BERT embeddings.
    """

    for name, data in datasets.items():
        print(f"\n=== Generating embeddings for dataset: {name} ===")

        X_train, y_train = data["train"]
        X_val, y_val = data["val"]
        X_test, y_test = data["test"]

        X_train_emb = get_bert_embeddings(X_train)
        X_val_emb = get_bert_embeddings(X_val)
        X_test_emb = get_bert_embeddings(X_test)

        datasets[name] = {
            "train": (X_train_emb, y_train),
            "val": (X_val_emb, y_val),
            "test": (X_test_emb, y_test)
        }
    
    return datasets

I0000 00:00:1761659086.616286 4162294 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 28602 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:d8:00.0, compute capability: 7.0
TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining 

In [5]:
# ------------------------------
# Build model function
# ------------------------------

def build_model(max_len=128, cnn_filters=96, kernel_size=4,
                dense_units=32, learning_rate=1e-4):
    """
    Builds a CNN model on top of BERT embeddings (not end-to-end fine-tuning).

    Args:
        max_len (int): Maximum sequence length.
        cnn_filters (int): Number of filters in Conv1D layer.
        kernel_size (int): Size of convolution kernel.
        dense_units (int): Units in dense hidden layer.
        learning_rate (float): Learning rate for Adam optimizer.

    Returns:
        model (tf.keras.Model): Compiled CNN-BERT model.
    """
    model = Sequential([
        Input(shape=(max_len, 768)),  # BERT base hidden size
        Conv1D(filters=cnn_filters, kernel_size=kernel_size, activation='relu'),
        GlobalMaxPooling1D(),
        Dense(dense_units, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

## VERSION 1: Dataset (Simple)

In [6]:
dataset_df = data_loading() # load datasets

for name, df in dataset_df.items():
    print(f"Dataset: {name}, Number of samples: {len(df)}")

print("\nSplitting datasets into train/val/test...")
datasets = {name: split_dataset(df) for name, df in dataset_df.items()} # split all datasets in train/val/test
print("\nComputing BERT embeddings for all datasets...")
datasets = generate_bert_embeddings(datasets) # get BERT embeddings for all datasets

  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT
TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


Dataset: Celebrity, Number of samples: 500
Dataset: CIDII, Number of samples: 722
Dataset: FaKES, Number of samples: 842
Dataset: FakeVsSatire, Number of samples: 486
Dataset: Horne, Number of samples: 326
Dataset: Infodemic, Number of samples: 10559
Dataset: ISOT, Number of samples: 44271
Dataset: Kaggle_clement, Number of samples: 39105
Dataset: Kaggle_meg, Number of samples: 12845
Dataset: LIAR_PLUS, Number of samples: 12784
Dataset: Politifact, Number of samples: 504
Dataset: Unipi_NDF, Number of samples: 554

Splitting datasets into train/val/test...

Computing BERT embeddings for all datasets...

=== Generating embeddings for dataset: Celebrity ===

=== Generating embeddings for dataset: CIDII ===

=== Generating embeddings for dataset: FaKES ===

=== Generating embeddings for dataset: FakeVsSatire ===

=== Generating embeddings for dataset: Horne ===

=== Generating embeddings for dataset: Infodemic ===

=== Generating embeddings for dataset: ISOT ===

=== Generating embeddings 

In [7]:
# --------------------------------
# Fine-tuning on multiple datasets
# --------------------------------

model = build_model() # initialize model

results = {}

# sequential training
for i, (name, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on {name} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=0)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train, X_val]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=16,
        validation_split=0.1,
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after {name}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after {name}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after {name}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all datasets
    print("\n--- Evaluation on all datasets ---")
    results[name] = {}
    for test_name, test_data in datasets.items(): # for each dataset
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[name][test_name] = f1
        print(f"Evaluation on {test_name}: Weighted F1 = {f1:.4f}")



=== Phase 1: Training/Fine-tuning on Celebrity ===
Epoch 1/10


2025-10-28 15:53:39.606037: I external/local_xla/xla/service/service.cc:163] XLA service 0x76211800e560 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-10-28 15:53:39.606088: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): Tesla V100S-PCIE-32GB, Compute Capability 7.0
2025-10-28 15:53:39.657556: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-10-28 15:53:39.810393: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91002


[1m16/23[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 7ms/step - accuracy: 0.4615 - loss: 1.0325

I0000 00:00:1761663220.952619 4162589 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 99ms/step - accuracy: 0.4861 - loss: 0.8677 - val_accuracy: 0.4750 - val_loss: 0.7064
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.5944 - loss: 0.6626 - val_accuracy: 0.5500 - val_loss: 0.7043
Epoch 3/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7083 - loss: 0.5625 - val_accuracy: 0.5500 - val_loss: 0.6846
Epoch 4/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7944 - loss: 0.5041 - val_accuracy: 0.5750 - val_loss: 0.6791
Epoch 5/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8722 - loss: 0.4310 - val_accuracy: 0.5750 - val_loss: 0.6745
Epoch 6/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9111 - loss: 0.3808 - val_accuracy: 0.4750 - val_loss: 0.7212
Epoch 7/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━

I0000 00:00:1761663412.562555 4162294 cuda_executor.cc:508] failed to allocate 16.00GiB (17179869184 bytes) from device: RESOURCE_EXHAUSTED: : CUDA_ERROR_OUT_OF_MEMORY: out of memory


Epoch 1/10
[1m1993/1993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 9ms/step - accuracy: 0.9961 - loss: 0.0140 - val_accuracy: 0.9989 - val_loss: 0.0058
Epoch 2/10
[1m1993/1993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9997 - loss: 0.0019 - val_accuracy: 0.9992 - val_loss: 0.0037
Epoch 3/10
[1m1993/1993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 1.0000 - loss: 3.9608e-04 - val_accuracy: 0.9992 - val_loss: 0.0057
Epoch 4/10
[1m1993/1993[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9999 - loss: 2.4138e-04 - val_accuracy: 0.9989 - val_loss: 0.0065
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step
Classification Report after ISOT:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4284
           1       1.00      1.00      1.00      4571

    accuracy                           1.00      8855
   ma

In [8]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for name, res in results.items():
    print(f"\nResults after training on {name}:")
    for test_name, f1 in res.items():
        print(f"  Test on {test_name}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on Celebrity:
  Test on Celebrity: Weighted F1 = 0.5998
  Test on CIDII: Weighted F1 = 0.6211
  Test on FaKES: Weighted F1 = 0.4830
  Test on FakeVsSatire: Weighted F1 = 0.5925
  Test on Horne: Weighted F1 = 0.5543
  Test on Infodemic: Weighted F1 = 0.3161
  Test on ISOT: Weighted F1 = 0.5308
  Test on Kaggle_clement: Weighted F1 = 0.7384
  Test on Kaggle_meg: Weighted F1 = 0.4941
  Test on LIAR_PLUS: Weighted F1 = 0.4400
  Test on Politifact: Weighted F1 = 0.4370
  Test on Unipi_NDF: Weighted F1 = 0.3437

Results after training on CIDII:
  Test on Celebrity: Weighted F1 = 0.4725
  Test on CIDII: Weighted F1 = 0.9312
  Test on FaKES: Weighted F1 = 0.3016
  Test on FakeVsSatire: Weighted F1 = 0.4278
  Test on Horne: Weighted F1 = 0.2631
  Test on Infodemic: Weighted F1 = 0.3105
  Test on ISOT: Weighted F1 = 0.3729
  Test on Kaggle_clement: Weighted F1 = 0.3182
  Test on Kaggle_meg: Weighted F1 = 0.0465
  Test on LIAR_PLUS: Weighted F1 = 0

## VERSION 2: Dataset by Topic

In [None]:
dataset_df = data_by_topic()

for topic, df in dataset_df.items():
    print(f"Topic: {topic}, Number of samples: {len(df)}")

print("\nSplitting datasets into train/val/test...")
datasets = {topic: split_dataset(df) for topic, df in dataset_df.items()} # split all datasets in train/val/test
print("\nComputing BERT embeddings for all datasets...")
datasets = generate_bert_embeddings(datasets) # get BERT embeddings for all datasets

In [None]:
# -------------------------------
# Fine-tuning on Dataset by Topic
# -------------------------------

model = build_model() # initialize model

results = {}

# sequential training
for i, (topic, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on topic: {topic} ===")

    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train, X_val]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=16,
        validation_data=(X_val, y_val),
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after topic {topic}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after topic {topic}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after topic {topic}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all topics
    print("\n--- Evaluation on all topics ---")
    results[topic] = {}
    for test_topic, test_data in datasets.items(): # for each topic
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[topic][test_topic] = f1
        print(f"Evaluation on topic {test_topic}: Weighted F1 = {f1:.4f}")

In [None]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for topic, res in results.items():
    print(f"\nResults after training on topic {topic}:")
    for test_topic, f1 in res.items():
        print(f"  Test on topic {test_topic}: Weighted F1 = {f1:.4f}")

## VERSION 3: Dataset by Date

In [None]:
dataset_df = data_by_date()

for date, df in dataset_df.items():
    print(f"Date: {date}, Number of samples: {len(df)}")

print("\nSplitting datasets into train/val/test...")
datasets = {date: split_dataset(df) for date, df in dataset_df.items()} # split all datasets in train/val/test
print("\nComputing BERT embeddings for all datasets...")
datasets = generate_bert_embeddings(datasets) # get BERT embeddings for all datasets

In [None]:
# ------------------------------
# Fine-tuning on Dataset by Date
# ------------------------------

model = build_model() # initialize model

results = {}

# sequential training
for i, (date, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on date: {date} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    # early stopping
    es = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)

    # fine-tune on train + val
    model.fit(
        np.concatenate([X_train, X_val]),
        np.concatenate([y_train, y_val]),
        epochs=10,
        batch_size=64,
        validation_data=(X_val, y_val),
        callbacks=[es],
        verbose=1
    )

    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int)
    print(f"Classification Report after date {date}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after date {date}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after date {date}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all dates
    print("\n--- Evaluation on all dates ---")
    results[date] = {}
    for test_date, test_data in datasets.items(): # for each date
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        preds = (preds > 0.5).astype(int)
        f1 = f1_score(y_te, preds, average="weighted")
        results[date][test_date] = f1
        print(f"Evaluation on {test_date}: Weighted F1 = {f1:.4f}")
    

In [None]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for date, res in results.items():
    print(f"\nResults after training on date {date}:")
    for test_date, f1 in res.items():
        print(f"  Test on date {test_date}: Weighted F1 = {f1:.4f}")