In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from size_matters.aggregation.aggregators import (
    Aggregator,
    StandardApprovalAggregator,
    CondorcetAggregator,
    EuclidAggregator,
    DiceAggregator,
    JaccardAggregator,
)
import pandas as pd
from pathlib import Path
from size_matters.utils.inventory import COLUMNS
from sklearn.metrics import accuracy_score

In [None]:
FOLDER_PATH = Path("../data/countries")
ALTERNATIVES = ["France", "Tunisia", "Egypt", "Greece", "Spain"]
EXPORT = False

## Load and preprocess raw data

In [None]:
raw = pd.read_csv(FOLDER_PATH / "raw.csv")
raw = raw.drop(columns=["Horodateur", "Score"])

In [None]:
groundtruth = raw[raw["Choose you pseudo"] == "GroudTruth"].drop(
    columns=["Choose you pseudo"]
)
annotations = raw[raw["Choose you pseudo"] != "GroudTruth"].drop(
    columns=["Choose you pseudo"]
)

### Prepare ground truth

In [None]:
groundtruth = groundtruth.T
groundtruth[0] = pd.Categorical(groundtruth[0])
groundtruth = pd.get_dummies(groundtruth[0])
groundtruth.index.name = COLUMNS.question

In [None]:
if EXPORT:
    groundtruth.to_csv(FOLDER_PATH / "groundtruth.csv")

### Prepare Annotations

In [None]:
annotations = annotations.T
annotations = annotations.stack()
annotations.index.names = [COLUMNS.question, COLUMNS.voter]

multi_answers = annotations.str.split(", ", expand=False)
annotations = pd.DataFrame(index=annotations.index, columns=ALTERNATIVES)
annotations = annotations.apply(
    lambda x: multi_answers.apply(lambda y: int(x.name in y))
)

In [None]:
if EXPORT:
    annotations.to_csv(FOLDER_PATH / "annotations.csv")

## Aggregate and compare performances

In [None]:
AGGREGATORS: list[Aggregator] = [
    StandardApprovalAggregator(),
    CondorcetAggregator(),
    EuclidAggregator(),
    DiceAggregator(),
    JaccardAggregator(),
]

In [None]:
accuracies = pd.Series(
    {
        aggregator.type: accuracy_score(groundtruth, aggregator.aggregate(annotations))
        for aggregator in AGGREGATORS
    }
)

In [None]:
accuracies