In [1]:
from pathlib import Path

from utils.dataset_utils import get_leaning_datasets, leaning_with_center_label_mapping
from utils.model_utils import evaluate_models, \
    get_custom_leaning_models

In [2]:
GET_DATASETS = lambda: get_leaning_datasets()
DATASET_SAMPLE_SIZE = 1_000

datasets_with_center = []
for dataset in GET_DATASETS():
    dataset = dataset.take_even_class_distribution_sample(DATASET_SAMPLE_SIZE)
    datasets_with_center.append(dataset.transform_for_inference(leaning_with_center_label_mapping).to_huggingface())

datasets_without_center = []
for dataset in GET_DATASETS():
    dataset.dataframe = dataset.dataframe[dataset.dataframe["leaning"] != "center"]
    dataset.dataframe["leaning"] = dataset.dataframe["leaning"].cat.remove_unused_categories()

    dataset = dataset.take_even_class_distribution_sample(DATASET_SAMPLE_SIZE)
    datasets_without_center.append(dataset.transform_for_inference().to_huggingface())

TypeError: Dataset.take_even_class_distribution_sample() missing 1 required positional argument: 'size'

In [None]:
# Lambda, so that the generator can be reused.
GET_MODELS = lambda: get_custom_leaning_models(Path("dataset_benchmark") / "leave_one_in" / "political_leaning")
TRUNCATE_TOKENS = True


def get_filtered_models(supports_center_leaning_class: bool):
    yield from filter(
        lambda model: model.supports_center_leaning_class == supports_center_leaning_class,
        GET_MODELS(),
    )


evaluate_models(
    lambda: get_filtered_models(True),
    datasets_with_center,
    TRUNCATE_TOKENS
)

In [None]:
evaluate_models(
    lambda: get_filtered_models(False),
    datasets_without_center,
    TRUNCATE_TOKENS
)