# Results Analysis on SummEval

## Imports

In [6]:
import pandas as pd
import ast
from scipy.stats import kendalltau

## Analysis

In [8]:
files = {
    "Single": "Results/single.csv",
    "Parallel": "Results/parallel.csv",
    "Cooperative": "Results/cooperative.csv",
    "Competitive": "Results/competitive.csv"
}

def parse_and_evaluate(path):
    df = pd.read_csv(path)
    parsed = {dim: df[dim].apply(ast.literal_eval) for dim in df.columns}
    gt = pd.DataFrame({dim: parsed[dim].apply(lambda x: x["ground_truth"]) for dim in parsed})
    sys = pd.DataFrame({dim: parsed[dim].apply(lambda x: x["system_decision"]) for dim in parsed})
    dev = pd.DataFrame({dim: parsed[dim].apply(lambda x: x["deviation"]) for dim in parsed})
    
    kendalls = {dim: kendalltau(gt[dim], sys[dim]).correlation for dim in gt.columns}
    avg_kendall = sum(kendalls.values()) / len(kendalls)
    avg_dev_dim = dev.mean().to_dict()
    avg_dev_all = dev.values.flatten().mean()
    avg_gt_dim = gt.mean().to_dict()
    avg_sys_dim = sys.mean().to_dict()
    avg_gt_all = gt.values.flatten().mean()
    avg_sys_all = sys.values.flatten().mean()

    summary = pd.DataFrame({
        "Kendall Tau": kendalls,
        "Ø Abweichung": avg_dev_dim,
        "Ø Ground Truth": avg_gt_dim,
        "Ø System": avg_sys_dim
    })

    overview = {
        "Ø Kendall Tau (Dimensionen)": avg_kendall,
        "Ø Abweichung (gesamt)": avg_dev_all,
        "Ø Ground Truth (gesamt)": avg_gt_all,
        "Ø System (gesamt)": avg_sys_all
    }

    return summary.round(3), overview

for name, path in files.items():
    summary, overview = parse_and_evaluate(path)
    print(f"\n==== {name} – Tabelle pro Dimension ====")
    print(summary)
    print(f"\n==== {name} – Durchschnittswerte Gesamt ====")
    for k, v in overview.items():
        print(f"{k}: {v:.3f}")


==== Single – Tabelle pro Dimension ====
             Kendall Tau  Ø Abweichung  Ø Ground Truth  Ø System
relevance            NaN         1.333           3.667     3.000
coherence          0.816         0.667           3.333     2.667
fluency            0.000         1.333           4.333     3.000
consistency        1.000         0.000           4.000     4.000

==== Single – Durchschnittswerte Gesamt ====
Ø Kendall Tau (Dimensionen): nan
Ø Abweichung (gesamt): 0.833
Ø Ground Truth (gesamt): 3.833
Ø System (gesamt): 3.167

==== Parallel – Tabelle pro Dimension ====
             Kendall Tau  Ø Abweichung  Ø Ground Truth  Ø System
relevance          0.816         1.000           3.667     2.667
coherence          0.816         0.667           3.333     2.667
fluency            0.000         1.333           4.333     3.000
consistency        1.000         1.000           4.000     3.000

==== Parallel – Durchschnittswerte Gesamt ====
Ø Kendall Tau (Dimensionen): 0.658
Ø Abweichung (ges