# **SUPPORT VECTOR MACHINE**

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1" # "0" o "1"

In [2]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [3]:
from utils import *

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, f1_score, confusion_matrix

In [4]:
# -----------------------
# Model building function
# -----------------------

def build_model(C=10.0, penalty="l1", loss="squared_hinge", dual=False):
    """
    Builds a scikit-learn Pipeline with TF-IDF vectorization and SVM classifier.

    Args:
        C (float): Inverse of regularization strength for SVM.
        penalty (str): Regularization type for SVM.
        loss (str): Loss function to use for SVM.
        dual (bool): Whether to solve the dual optimization problem.

    Returns:
        Pipeline: A scikit-learn Pipeline object (TF-IDF + SVM).
    """
    
    return Pipeline([
        ('tfidf', TfidfVectorizer(  # TD-IDF vectorization inside pipeline
            max_features=5000,      # limit to top 5000 features
            ngram_range=(1, 2),     # unigrams + bigrams
            stop_words="english"    # remove English stop words
        )),
        ('clf', LinearSVC(         # Linear SVM classifier
            C=C,
            penalty=penalty,
            loss=loss,
            dual=dual,
            max_iter=2000,
            random_state=42
        ))
    ])

## VERSION 1: Dataset (Simple)

In [5]:
# --------------------------------
# Fine-tuning on multiple datasets
# --------------------------------

datasets_df = data_loading() # load datasets

datasets = {name: split_dataset(df) for name, df in datasets_df.items()} # split all datasets in train/val/test
model = build_model() # initialize model

results = {}

# sequential training
for i, (name, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on {name} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    model.fit(pd.concat([X_train, X_val]), pd.concat([y_train, y_val])) # fine-tune on train + val

    y_pred = model.predict(X_test)
    print(f"Classification Report after {name}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after {name}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after {name}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all datasets
    print("\n--- Evaluation on all datasets ---")
    results[name] = {}
    for test_name, test_data in datasets.items(): # for each dataset
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        f1 = f1_score(y_te, preds, average="weighted")
        results[name][test_name] = f1
        print(f"Evaluation on {test_name}: Weighted F1 = {f1:.4f}")


  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT



=== Phase 1: Training/Fine-tuning on Celebrity ===




Classification Report after Celebrity:
              precision    recall  f1-score   support

           0       0.58      0.68      0.62        50
           1       0.61      0.50      0.55        50

    accuracy                           0.59       100
   macro avg       0.59      0.59      0.59       100
weighted avg       0.59      0.59      0.59       100

Confusion Matrix after Celebrity:
[[34 16]
 [25 25]]

Weighted F1-score after Celebrity: 0.5866518802298619

--- Evaluation on all datasets ---
Evaluation on Celebrity: Weighted F1 = 0.5867
Evaluation on CIDII: Weighted F1 = 0.5747
Evaluation on FaKES: Weighted F1 = 0.4011
Evaluation on FakeVsSatire: Weighted F1 = 0.4506
Evaluation on Horne: Weighted F1 = 0.6382
Evaluation on Infodemic: Weighted F1 = 0.4439
Evaluation on ISOT: Weighted F1 = 0.4919
Evaluation on Kaggle_clement: Weighted F1 = 0.5220
Evaluation on Kaggle_meg: Weighted F1 = 0.8742
Evaluation on LIAR_PLUS: Weighted F1 = 0.4593
Evaluation on Politifact: Weighted F1 



Classification Report after FaKES:
              precision    recall  f1-score   support

           0       0.54      0.61      0.57        89
           1       0.49      0.42      0.46        80

    accuracy                           0.52       169
   macro avg       0.52      0.52      0.51       169
weighted avg       0.52      0.52      0.52       169

Confusion Matrix after FaKES:
[[54 35]
 [46 34]]

Weighted F1-score after FaKES: 0.516965739474774

--- Evaluation on all datasets ---
Evaluation on Celebrity: Weighted F1 = 0.4987
Evaluation on CIDII: Weighted F1 = 0.5146
Evaluation on FaKES: Weighted F1 = 0.5170
Evaluation on FakeVsSatire: Weighted F1 = 0.3765
Evaluation on Horne: Weighted F1 = 0.5922
Evaluation on Infodemic: Weighted F1 = 0.4655
Evaluation on ISOT: Weighted F1 = 0.4428
Evaluation on Kaggle_clement: Weighted F1 = 0.4627
Evaluation on Kaggle_meg: Weighted F1 = 0.7731
Evaluation on LIAR_PLUS: Weighted F1 = 0.4932
Evaluation on Politifact: Weighted F1 = 0.5034
Eval



Classification Report after FakeVsSatire:
              precision    recall  f1-score   support

           0       0.77      0.66      0.71        41
           1       0.78      0.86      0.82        57

    accuracy                           0.78        98
   macro avg       0.77      0.76      0.76        98
weighted avg       0.78      0.78      0.77        98

Confusion Matrix after FakeVsSatire:
[[27 14]
 [ 8 49]]

Weighted F1-score after FakeVsSatire: 0.7722610096670247

--- Evaluation on all datasets ---
Evaluation on Celebrity: Weighted F1 = 0.4858
Evaluation on CIDII: Weighted F1 = 0.3953
Evaluation on FaKES: Weighted F1 = 0.4445
Evaluation on FakeVsSatire: Weighted F1 = 0.7723
Evaluation on Horne: Weighted F1 = 0.5961
Evaluation on Infodemic: Weighted F1 = 0.3526
Evaluation on ISOT: Weighted F1 = 0.4438
Evaluation on Kaggle_clement: Weighted F1 = 0.4097
Evaluation on Kaggle_meg: Weighted F1 = 0.4682
Evaluation on LIAR_PLUS: Weighted F1 = 0.4095
Evaluation on Politifact: Wei



Classification Report after Horne:
              precision    recall  f1-score   support

           0       0.80      0.90      0.85        41
           1       0.80      0.64      0.71        25

    accuracy                           0.80        66
   macro avg       0.80      0.77      0.78        66
weighted avg       0.80      0.80      0.80        66

Confusion Matrix after Horne:
[[37  4]
 [ 9 16]]

Weighted F1-score after Horne: 0.7977475908510391

--- Evaluation on all datasets ---
Evaluation on Celebrity: Weighted F1 = 0.5806
Evaluation on CIDII: Weighted F1 = 0.6254
Evaluation on FaKES: Weighted F1 = 0.4748
Evaluation on FakeVsSatire: Weighted F1 = 0.6646
Evaluation on Horne: Weighted F1 = 0.7977
Evaluation on Infodemic: Weighted F1 = 0.4667
Evaluation on ISOT: Weighted F1 = 0.5524
Evaluation on Kaggle_clement: Weighted F1 = 0.5690
Evaluation on Kaggle_meg: Weighted F1 = 0.6869
Evaluation on LIAR_PLUS: Weighted F1 = 0.4951
Evaluation on Politifact: Weighted F1 = 0.6395
Eva



Classification Report after Kaggle_meg:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      2514
           1       0.17      0.15      0.16        55

    accuracy                           0.97      2569
   macro avg       0.58      0.57      0.57      2569
weighted avg       0.96      0.97      0.97      2569

Confusion Matrix after Kaggle_meg:
[[2476   38]
 [  47    8]]

Weighted F1-score after Kaggle_meg: 0.9654685906366277

--- Evaluation on all datasets ---
Evaluation on Celebrity: Weighted F1 = 0.4172
Evaluation on CIDII: Weighted F1 = 0.4487
Evaluation on FaKES: Weighted F1 = 0.3803
Evaluation on FakeVsSatire: Weighted F1 = 0.2468
Evaluation on Horne: Weighted F1 = 0.4761
Evaluation on Infodemic: Weighted F1 = 0.3920
Evaluation on ISOT: Weighted F1 = 0.3323
Evaluation on Kaggle_clement: Weighted F1 = 0.3963
Evaluation on Kaggle_meg: Weighted F1 = 0.9655
Evaluation on LIAR_PLUS: Weighted F1 = 0.4097
Evaluation on Politifact: W



Classification Report after Politifact:
              precision    recall  f1-score   support

           0       0.79      0.95      0.87        65
           1       0.87      0.56      0.68        36

    accuracy                           0.81       101
   macro avg       0.83      0.75      0.77       101
weighted avg       0.82      0.81      0.80       101

Confusion Matrix after Politifact:
[[62  3]
 [16 20]]

Weighted F1-score after Politifact: 0.7997070893530032

--- Evaluation on all datasets ---
Evaluation on Celebrity: Weighted F1 = 0.4179
Evaluation on CIDII: Weighted F1 = 0.5136
Evaluation on FaKES: Weighted F1 = 0.4573
Evaluation on FakeVsSatire: Weighted F1 = 0.4824
Evaluation on Horne: Weighted F1 = 0.6848
Evaluation on Infodemic: Weighted F1 = 0.5102
Evaluation on ISOT: Weighted F1 = 0.5614
Evaluation on Kaggle_clement: Weighted F1 = 0.5794
Evaluation on Kaggle_meg: Weighted F1 = 0.8264
Evaluation on LIAR_PLUS: Weighted F1 = 0.4452
Evaluation on Politifact: Weighted 

In [6]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for name, res in results.items():
    print(f"\nResults after training on {name}:")
    for test_name, f1 in res.items():
        print(f"  Test on {test_name}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on Celebrity:
  Test on Celebrity: Weighted F1 = 0.5867
  Test on CIDII: Weighted F1 = 0.5747
  Test on FaKES: Weighted F1 = 0.4011
  Test on FakeVsSatire: Weighted F1 = 0.4506
  Test on Horne: Weighted F1 = 0.6382
  Test on Infodemic: Weighted F1 = 0.4439
  Test on ISOT: Weighted F1 = 0.4919
  Test on Kaggle_clement: Weighted F1 = 0.5220
  Test on Kaggle_meg: Weighted F1 = 0.8742
  Test on LIAR_PLUS: Weighted F1 = 0.4593
  Test on Politifact: Weighted F1 = 0.5542
  Test on Unipi_NDF: Weighted F1 = 0.4245

Results after training on CIDII:
  Test on Celebrity: Weighted F1 = 0.3967
  Test on CIDII: Weighted F1 = 0.8663
  Test on FaKES: Weighted F1 = 0.3803
  Test on FakeVsSatire: Weighted F1 = 0.4202
  Test on Horne: Weighted F1 = 0.6812
  Test on Infodemic: Weighted F1 = 0.4553
  Test on ISOT: Weighted F1 = 0.4186
  Test on Kaggle_clement: Weighted F1 = 0.4727
  Test on Kaggle_meg: Weighted F1 = 0.9002
  Test on LIAR_PLUS: Weighted F1 = 0

## VERSION 2: Dataset by Topic

In [7]:
# -------------------------------
# Fine-tuning on Dataset by Topic
# -------------------------------

dataset_df = data_by_topic()

datasets = {topic: split_dataset(df) for topic, df in dataset_df.items()} # split all datasets in train/val/test
model = build_model() # initialize model

results = {}

# sequential training
for i, (topic, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on topic: {topic} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    model.fit(pd.concat([X_train, X_val]), pd.concat([y_train, y_val])) # fine-tune on train + val

    y_pred = model.predict(X_test)
    print(f"Classification Report after topic {topic}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after topic {topic}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after topic {topic}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all topics
    print("\n--- Evaluation on all topics ---")
    results[topic] = {}
    for test_topic, test_data in datasets.items(): # for each topic
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        f1 = f1_score(y_te, preds, average="weighted")
        results[topic][test_topic] = f1
        print(f"Evaluation on topic {test_topic}: Weighted F1 = {f1:.4f}")

  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT



=== Phase 1: Training/Fine-tuning on topic: politics ===
Classification Report after topic politics:
              precision    recall  f1-score   support

         0.0       0.94      0.94      0.94     10094
         1.0       0.93      0.93      0.93      9402

    accuracy                           0.93     19496
   macro avg       0.93      0.93      0.93     19496
weighted avg       0.93      0.93      0.93     19496

Confusion Matrix after topic politics:
[[9476  618]
 [ 657 8745]]

Weighted F1-score after topic politics: 0.934597057541472

--- Evaluation on all topics ---
Evaluation on topic politics: Weighted F1 = 0.9346
Evaluation on topic general: Weighted F1 = 0.3693
Evaluation on topic covid: Weighted F1 = 0.3599
Evaluation on topic syria: Weighted F1 = 0.5463
Evaluation on topic islam: Weighted F1 = 0.4834
Evaluation on topic notredame: Weighted F1 = 0.4718
Evaluation on topic gossip: Weighted F1 = 0.3743

=== Phase 2: Training/Fine-tuning on topic: general ===




Classification Report after topic general:
              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98      2514
         1.0       0.17      0.15      0.16        55

    accuracy                           0.97      2569
   macro avg       0.58      0.57      0.57      2569
weighted avg       0.96      0.97      0.97      2569

Confusion Matrix after topic general:
[[2476   38]
 [  47    8]]

Weighted F1-score after topic general: 0.9654685906366277

--- Evaluation on all topics ---
Evaluation on topic politics: Weighted F1 = 0.3690
Evaluation on topic general: Weighted F1 = 0.9655
Evaluation on topic covid: Weighted F1 = 0.3920
Evaluation on topic syria: Weighted F1 = 0.3803
Evaluation on topic islam: Weighted F1 = 0.4487
Evaluation on topic notredame: Weighted F1 = 0.4654
Evaluation on topic gossip: Weighted F1 = 0.4172

=== Phase 3: Training/Fine-tuning on topic: covid ===
Classification Report after topic covid:
              precision    rec



Classification Report after topic syria:
              precision    recall  f1-score   support

         0.0       0.54      0.61      0.57        89
         1.0       0.49      0.42      0.46        80

    accuracy                           0.52       169
   macro avg       0.52      0.52      0.51       169
weighted avg       0.52      0.52      0.52       169

Confusion Matrix after topic syria:
[[54 35]
 [46 34]]

Weighted F1-score after topic syria: 0.516965739474774

--- Evaluation on all topics ---
Evaluation on topic politics: Weighted F1 = 0.4530
Evaluation on topic general: Weighted F1 = 0.7731
Evaluation on topic covid: Weighted F1 = 0.4655
Evaluation on topic syria: Weighted F1 = 0.5170
Evaluation on topic islam: Weighted F1 = 0.5146
Evaluation on topic notredame: Weighted F1 = 0.5759
Evaluation on topic gossip: Weighted F1 = 0.4987

=== Phase 5: Training/Fine-tuning on topic: islam ===
Classification Report after topic islam:
              precision    recall  f1-score  



Classification Report after topic gossip:
              precision    recall  f1-score   support

         0.0       0.58      0.68      0.62        50
         1.0       0.61      0.50      0.55        50

    accuracy                           0.59       100
   macro avg       0.59      0.59      0.59       100
weighted avg       0.59      0.59      0.59       100

Confusion Matrix after topic gossip:
[[34 16]
 [25 25]]

Weighted F1-score after topic gossip: 0.5866518802298619

--- Evaluation on all topics ---
Evaluation on topic politics: Weighted F1 = 0.5023
Evaluation on topic general: Weighted F1 = 0.8742
Evaluation on topic covid: Weighted F1 = 0.4439
Evaluation on topic syria: Weighted F1 = 0.4011
Evaluation on topic islam: Weighted F1 = 0.5747
Evaluation on topic notredame: Weighted F1 = 0.4245
Evaluation on topic gossip: Weighted F1 = 0.5867


In [8]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for topic, res in results.items():
    print(f"\nResults after training on topic {topic}:")
    for test_topic, f1 in res.items():
        print(f"  Test on topic {test_topic}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on topic politics:
  Test on topic politics: Weighted F1 = 0.9346
  Test on topic general: Weighted F1 = 0.3693
  Test on topic covid: Weighted F1 = 0.3599
  Test on topic syria: Weighted F1 = 0.5463
  Test on topic islam: Weighted F1 = 0.4834
  Test on topic notredame: Weighted F1 = 0.4718
  Test on topic gossip: Weighted F1 = 0.3743

Results after training on topic general:
  Test on topic politics: Weighted F1 = 0.3690
  Test on topic general: Weighted F1 = 0.9655
  Test on topic covid: Weighted F1 = 0.3920
  Test on topic syria: Weighted F1 = 0.3803
  Test on topic islam: Weighted F1 = 0.4487
  Test on topic notredame: Weighted F1 = 0.4654
  Test on topic gossip: Weighted F1 = 0.4172

Results after training on topic covid:
  Test on topic politics: Weighted F1 = 0.4845
  Test on topic general: Weighted F1 = 0.3855
  Test on topic covid: Weighted F1 = 0.9314
  Test on topic syria: Weighted F1 = 0.5101
  Test on topic islam: Weighted F

## VERSION 3: Dataset by Date

In [9]:
dataset_df = data_by_date()

for date, df in dataset_df.items():
    print(f"Date: {date}, Number of samples: {len(df)}")

  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT


Date: 2011-2013, Number of samples: 55
Date: 2014, Number of samples: 114
Date: 2015, Number of samples: 84
Date: 2016, Number of samples: 49687
Date: 2017, Number of samples: 16657
Date: 2020, Number of samples: 10559


In [10]:
# ------------------------------
# Fine-tuning on Dataset by Date
# ------------------------------

dataset_df = data_by_date()

datasets = {date: split_dataset(df) for date, df in dataset_df.items()} # split all datasets in train/val/test
model = build_model() # initialize model

results = {}

# sequential training
for i, (date, data) in enumerate(datasets.items()):
    print(f"\n=== Phase {i+1}: Training/Fine-tuning on date: {date} ===")
    
    X_train, y_train = data["train"]
    X_val, y_val = data["val"]
    X_test, y_test = data["test"]

    model.fit(pd.concat([X_train, X_val]), pd.concat([y_train, y_val])) # fine-tune on train + val

    y_pred = model.predict(X_test)
    print(f"Classification Report after date {date}:")
    print(classification_report(y_test, y_pred))
    print(f"Confusion Matrix after date {date}:")
    print(confusion_matrix(y_test, y_pred))
    print(f"\nWeighted F1-score after date {date}:", f1_score(y_test, y_pred, average="weighted"))


    # evaluation on all dates
    print("\n--- Evaluation on all dates ---")
    results[date] = {}
    for test_date, test_data in datasets.items(): # for each date
        X_te, y_te = test_data["test"]
        preds = model.predict(X_te)
        f1 = f1_score(y_te, preds, average="weighted")
        results[date][test_date] = f1
        print(f"Evaluation on date {test_date}: Weighted F1 = {f1:.4f}")

  dfKaggleMeg['date'] = pd.to_datetime(dfKaggleMeg['date'], errors='coerce') # convert date column to datetime, coerce errors to NaT



=== Phase 1: Training/Fine-tuning on date: 2011-2013 ===
Classification Report after date 2011-2013:
              precision    recall  f1-score   support

         0.0       0.40      0.40      0.40         5
         1.0       0.50      0.50      0.50         6

    accuracy                           0.45        11
   macro avg       0.45      0.45      0.45        11
weighted avg       0.45      0.45      0.45        11

Confusion Matrix after date 2011-2013:
[[2 3]
 [3 3]]

Weighted F1-score after date 2011-2013: 0.45454545454545453

--- Evaluation on all dates ---
Evaluation on date 2011-2013: Weighted F1 = 0.4545
Evaluation on date 2014: Weighted F1 = 0.3380
Evaluation on date 2015: Weighted F1 = 0.3827




Evaluation on date 2016: Weighted F1 = 0.6458
Evaluation on date 2017: Weighted F1 = 0.7272
Evaluation on date 2020: Weighted F1 = 0.3871

=== Phase 2: Training/Fine-tuning on date: 2014 ===
Classification Report after date 2014:
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        12
         1.0       0.40      0.73      0.52        11

    accuracy                           0.35        23
   macro avg       0.20      0.36      0.26        23
weighted avg       0.19      0.35      0.25        23

Confusion Matrix after date 2014:
[[ 0 12]
 [ 3  8]]

Weighted F1-score after date 2014: 0.2468443197755961

--- Evaluation on all dates ---
Evaluation on date 2011-2013: Weighted F1 = 0.2338
Evaluation on date 2014: Weighted F1 = 0.2468
Evaluation on date 2015: Weighted F1 = 0.3992
Evaluation on date 2016: Weighted F1 = 0.4278
Evaluation on date 2017: Weighted F1 = 0.3039
Evaluation on date 2020: Weighted F1 = 0.4082

=== Phase 3: Trainin



Classification Report after date 2017:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      3318
         1.0       0.29      0.14      0.19        14

    accuracy                           0.99      3332
   macro avg       0.64      0.57      0.59      3332
weighted avg       0.99      0.99      0.99      3332

Confusion Matrix after date 2017:
[[3313    5]
 [  12    2]]

Weighted F1-score after date 2017: 0.9940503071618532

--- Evaluation on all dates ---
Evaluation on date 2011-2013: Weighted F1 = 0.2841
Evaluation on date 2014: Weighted F1 = 0.6217
Evaluation on date 2015: Weighted F1 = 0.3827
Evaluation on date 2016: Weighted F1 = 0.3778
Evaluation on date 2017: Weighted F1 = 0.9941
Evaluation on date 2020: Weighted F1 = 0.3621

=== Phase 6: Training/Fine-tuning on date: 2020 ===
Classification Report after date 2020:
              precision    recall  f1-score   support

         0.0       0.94      0.91      0.92      1106
   

In [11]:
# ---------------
# Results summary
# ---------------

print("\n=== Results Summary ===")
for date, res in results.items():
    print(f"\nResults after training on date {date}:")
    for test_date, f1 in res.items():
        print(f"  Test on date {test_date}: Weighted F1 = {f1:.4f}")


=== Results Summary ===

Results after training on date 2011-2013:
  Test on date 2011-2013: Weighted F1 = 0.4545
  Test on date 2014: Weighted F1 = 0.3380
  Test on date 2015: Weighted F1 = 0.3827
  Test on date 2016: Weighted F1 = 0.6458
  Test on date 2017: Weighted F1 = 0.7272
  Test on date 2020: Weighted F1 = 0.3871

Results after training on date 2014:
  Test on date 2011-2013: Weighted F1 = 0.2338
  Test on date 2014: Weighted F1 = 0.2468
  Test on date 2015: Weighted F1 = 0.3992
  Test on date 2016: Weighted F1 = 0.4278
  Test on date 2017: Weighted F1 = 0.3039
  Test on date 2020: Weighted F1 = 0.4082

Results after training on date 2015:
  Test on date 2011-2013: Weighted F1 = 0.3961
  Test on date 2014: Weighted F1 = 0.3453
  Test on date 2015: Weighted F1 = 0.5193
  Test on date 2016: Weighted F1 = 0.4075
  Test on date 2017: Weighted F1 = 0.8908
  Test on date 2020: Weighted F1 = 0.3901

Results after training on date 2016:
  Test on date 2011-2013: Weighted F1 = 0.1818
