In [9]:
from itertools import chain
from pathlib import Path

from datasets import concatenate_datasets

from utils.dataset_utils import get_politicalness_datasets, \
    get_politicalness_datasets_from_leaning_datasets
from utils.model_utils import evaluate_models, get_custom_politicalness_models

In [10]:
GET_DATASETS = lambda: chain(
    get_politicalness_datasets(),
    get_politicalness_datasets_from_leaning_datasets()
)
DATASET_SAMPLE_SIZE = 1_000

datasets = list(
    map(
        lambda dataset: dataset
        .take_even_class_distribution_sample(DATASET_SAMPLE_SIZE)
        .transform_for_inference()
        .to_huggingface(),
        GET_DATASETS(),
    )
)

In [None]:
# Lambda, so that the generator can be reused.
GET_MODELS = lambda: get_custom_politicalness_models(Path("dataset_benchmark") / "leave_one_in" / "politicalness")

results = evaluate_models(
    GET_MODELS,
    datasets,
)

In [None]:
results.count

In [None]:
results.accuracy

In [None]:
results.f1

In [None]:
results.precision

In [None]:
results.recall

In [None]:
concatenated_dataset = concatenate_datasets(datasets)

concatenated_results = evaluate_models(
    GET_MODELS,
    [concatenated_dataset],
)

In [None]:
concatenated_results.count

In [None]:
concatenated_results.accuracy

In [None]:
concatenated_results.f1

In [None]:
concatenated_results.precision

In [None]:
concatenated_results.recall