In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from tqdm import tqdm

from utils.datasets import get_datasets
from utils.models import get_models

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATASET_SAMPLE_SIZE = 10

datasets = get_datasets()
datasets_without_center = get_datasets()

for dataset in datasets:
    dataset.dataframe = (dataset.dataframe.iloc[::len(dataset.dataframe) // DATASET_SAMPLE_SIZE]
                         .head(DATASET_SAMPLE_SIZE))

for dataset in datasets_without_center:
    dataset.dataframe = dataset.dataframe[dataset.dataframe["leaning"] != "center"]
    dataset.dataframe = (dataset.dataframe.iloc[::len(dataset.dataframe) // DATASET_SAMPLE_SIZE]
                         .head(DATASET_SAMPLE_SIZE))

models = get_models()

In [3]:
TRUNCATE_TOKENS = True

accuracy_results = [["" for _ in range(len(datasets))] for _ in range(len(models))]

for model_index, model in enumerate(models):
    for dataset_index in range(len(datasets)):
        dataset = datasets[dataset_index] if model.supports_center_leaning else datasets_without_center[dataset_index]

        predictions = []
        for body_index, body in enumerate(tqdm(dataset.dataframe["body"])):
            try:
                predictions.append(model.predict(body, TRUNCATE_TOKENS))
            except RuntimeError:
                if TRUNCATE_TOKENS:
                    raise
                predictions.append(None)

        valid_indices = [i for i, prediction in enumerate(predictions) if prediction is not None]
        predictions = list(map(lambda prediction: prediction.value, [predictions[i] for i in valid_indices]))
        accuracy = accuracy_score(
            dataset.dataframe["leaning"].iloc[valid_indices].tolist(),
            predictions
        ) if len(predictions) > 0 else 0
        accuracy_results[model_index][dataset_index] \
            = f"{len(valid_indices) * accuracy:.0f}/{len(valid_indices)} ({np.round(accuracy * 100, 2)} %)"

100%|██████████| 10/10 [00:14<00:00,  1.44s/it]
100%|██████████| 10/10 [00:09<00:00,  1.08it/s]
100%|██████████| 10/10 [00:02<00:00,  4.93it/s]
100%|██████████| 10/10 [00:11<00:00,  1.14s/it]
100%|██████████| 10/10 [00:11<00:00,  1.14s/it]
100%|██████████| 10/10 [01:09<00:00,  6.92s/it]
100%|██████████| 10/10 [00:28<00:00,  2.81s/it]
100%|██████████| 10/10 [00:03<00:00,  2.96it/s]
100%|██████████| 10/10 [00:31<00:00,  3.11s/it]
100%|██████████| 10/10 [00:43<00:00,  4.32s/it]
100%|██████████| 10/10 [00:06<00:00,  1.56it/s]
100%|██████████| 10/10 [00:04<00:00,  2.12it/s]
100%|██████████| 10/10 [00:00<00:00, 10.37it/s]
100%|██████████| 10/10 [00:05<00:00,  1.68it/s]
100%|██████████| 10/10 [00:05<00:00,  1.91it/s]
100%|██████████| 10/10 [00:09<00:00,  1.04it/s]
100%|██████████| 10/10 [00:07<00:00,  1.33it/s]
100%|██████████| 10/10 [00:01<00:00,  5.54it/s]
100%|██████████| 10/10 [00:11<00:00,  1.18s/it]
100%|██████████| 10/10 [00:10<00:00,  1.10s/it]
100%|██████████| 10/10 [00:05<00:00,  1.

In [4]:
results_df = pd.DataFrame(
    accuracy_results,
    index=list(map(lambda model: model.name, models)),
    columns=list(map(lambda dataset: dataset.name, datasets)))
results_df

Unnamed: 0,article_bias_prediction,commoncrawl_news_articles,qbias,webis_bias_flipper_18,webis_news_bias_20
PoliticalBiasBert,5/10 (50.0 %),3/10 (30.0 %),5/10 (50.0 %),5/10 (50.0 %),3/10 (30.0 %)
PoliticalBiasPredictionAllsidesDeberta,5/10 (50.0 %),5/10 (50.0 %),7/10 (70.0 %),6/10 (60.0 %),7/10 (70.0 %)
DistilBertPoliticalBias,3/10 (30.0 %),1/10 (10.0 %),5/10 (50.0 %),3/10 (30.0 %),6/10 (60.0 %)
BertPoliticalBiasFineTune,3/10 (30.0 %),5/10 (50.0 %),6/10 (60.0 %),4/10 (40.0 %),2/10 (20.0 %)
DistilBertPoliticalFinetune,5/10 (50.0 %),4/10 (40.0 %),5/10 (50.0 %),6/10 (60.0 %),3/10 (30.0 %)
