In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from tqdm import tqdm

from utils.datasets import get_datasets
from utils.models import get_dataset_benchmark_models, get_existing_models

In [None]:
DATASET_SAMPLE_SIZE = 1000

datasets = get_datasets()
datasets_without_center = get_datasets()

for dataset in datasets:
    if len(dataset.dataframe) <= DATASET_SAMPLE_SIZE:
        continue

    dataset.dataframe = dataset.dataframe.iloc[
                        ::len(dataset.dataframe) // DATASET_SAMPLE_SIZE
                        ].head(DATASET_SAMPLE_SIZE)

for dataset in datasets_without_center:
    dataset.dataframe = dataset.dataframe[dataset.dataframe["leaning"] != "center"]

    if len(dataset.dataframe) <= DATASET_SAMPLE_SIZE:
        continue

    dataset.dataframe = dataset.dataframe.iloc[
                        ::len(dataset.dataframe) // DATASET_SAMPLE_SIZE
                        ].head(DATASET_SAMPLE_SIZE)

In [None]:
TRUNCATE_TOKENS = True
MODELS = get_existing_models()

accuracy_results = []

for model_index, model in enumerate(MODELS):
    print(f"evaluating {model.name} on:")
    accuracy_results.append([])
    for dataset_index in range(len(datasets)):
        dataset = datasets[dataset_index] if model.supports_center_leaning else datasets_without_center[dataset_index]
        print(f"  {dataset.name}")

        predictions = []
        for body_index, body in enumerate(tqdm(dataset.dataframe["body"])):
            try:
                predictions.append(model.predict(body, TRUNCATE_TOKENS))
            except RuntimeError:
                if TRUNCATE_TOKENS:
                    raise
                predictions.append(None)

        valid_indices = [i for i, prediction in enumerate(predictions) if prediction is not None]
        predictions = list(map(lambda prediction: prediction.value, [predictions[i] for i in valid_indices]))
        accuracy = accuracy_score(
            dataset.dataframe["leaning"].iloc[valid_indices].tolist(),
            predictions
        ) if len(predictions) > 0 else 0
        accuracy_results[-1].append(
            f"{len(valid_indices) * accuracy:.0f}/{len(valid_indices)} ({np.round(accuracy * 100, 2)} %)"
        )

In [None]:
results_df = pd.DataFrame(
    accuracy_results,
    index=list(map(lambda model: model.name, get_dataset_benchmark_models())),
    columns=list(map(lambda dataset: dataset.name, datasets)),
)

results_df