In [None]:
from itertools import chain
from pathlib import Path

from datasets import DatasetInfo, concatenate_datasets

from utils.base_directory import base_directory
from utils.dataset_utils import get_politicalness_datasets, \
    get_politicalness_datasets_from_leaning_datasets, politicalness_label_mapping
from utils.model_utils import evaluate_models, get_custom_politicalness_models


In [None]:
GET_DATASETS = lambda: chain(
    get_politicalness_datasets(),
    get_politicalness_datasets_from_leaning_datasets()
)
DATASET_SAMPLE_SIZE = 1_000

datasets = list(
    map(
        lambda dataset: dataset
        .take_even_class_sample_by_size(DATASET_SAMPLE_SIZE)
        .transform_for_inference()
        .to_huggingface(),
        GET_DATASETS(),
    )
)

In [None]:
# Lambda, so that the generator can be reused.
GET_MODELS = lambda: get_custom_politicalness_models(Path("dataset_benchmark") / "leave_one_in" / "politicalness")

results = evaluate_models(
    GET_MODELS,
    datasets,
)

In [None]:
results.count

In [None]:
results.accuracy

In [None]:
results.f1

In [None]:
results.precision

In [None]:
results.recall

In [None]:
results.save_confusion_matrix_images(
    Path(
        base_directory,
        "analysis",
        "model_evaluation",
        "politicalness",
        "confusion_matrix_results",
        "separate",
    ),
    politicalness_label_mapping
)

In [None]:
aggregate_dataset = concatenate_datasets(datasets, info=DatasetInfo(dataset_name="aggregate"))

aggregate_results = evaluate_models(
    GET_MODELS,
    [aggregate_dataset],
)

In [None]:
aggregate_results.count

In [None]:
aggregate_results.accuracy

In [None]:
aggregate_results.f1

In [None]:
aggregate_results.precision

In [None]:
aggregate_results.recall

In [None]:
aggregate_results.save_confusion_matrix_images(
    Path(
        base_directory,
        "analysis",
        "model_evaluation",
        "politicalness",
        "confusion_matrix_results",
        "aggregate",
    ),
    politicalness_label_mapping
)