# Results Analysis on SummEval

## Imports

In [1]:
import pandas as pd
import ast
from scipy.stats import kendalltau

## Data

In [2]:
single_results = pd.read_csv("Results/single.csv")

## Analysis

In [3]:
def parse_dict_column(col):
    return col.apply(ast.literal_eval)

# === Dictionary-Zellen in echte Dictionaries umwandeln ===
parsed_data = {dim: parse_dict_column(single_results[dim]) for dim in single_results.columns}

# === Ground Truth, Systementscheidung und Abweichung extrahieren ===
gt_df = pd.DataFrame({dim: parsed_data[dim].apply(lambda x: x["ground_truth"]) for dim in parsed_data})
system_df = pd.DataFrame({dim: parsed_data[dim].apply(lambda x: x["system_decision"]) for dim in parsed_data})
deviation_df = pd.DataFrame({dim: parsed_data[dim].apply(lambda x: x["deviation"]) for dim in parsed_data})

# === Kendall's Tau pro Dimension ===
kendall_per_dim = {dim: kendalltau(gt_df[dim], system_df[dim]).correlation for dim in gt_df.columns}

# === Durchschnittlicher Kendall-Tau über alle Dimensionen ===
avg_kendall_tau = sum(kendall_per_dim.values()) / len(kendall_per_dim)

# === Durchschnittliche Abweichung pro Dimension ===
avg_dev_per_dim = deviation_df.mean().to_dict()
avg_dev_overall = deviation_df.values.flatten().mean()

# === Durchschnittswerte GT und System pro Dimension ===
avg_gt_per_dim = gt_df.mean().to_dict()
avg_sys_per_dim = system_df.mean().to_dict()
avg_gt_overall = gt_df.values.flatten().mean()
avg_sys_overall = system_df.values.flatten().mean()

# === Tabelle erstellen ===
summary_table = pd.DataFrame({
    "Kendall Tau": kendall_per_dim,
    "Ø Abweichung": avg_dev_per_dim,
    "Ø Ground Truth": avg_gt_per_dim,
    "Ø System": avg_sys_per_dim
})

# === Übersicht der Gesamtwerte ===
gesamtwerte = {
    "Ø Kendall Tau (Dimensionen)": avg_kendall_tau,
    "Ø Abweichung (gesamt)": avg_dev_overall,
    "Ø Ground Truth (gesamt)": avg_gt_overall,
    "Ø System (gesamt)": avg_sys_overall
}

# === Ausgabe ===
print("==== Tabelle pro Dimension ====")
print(summary_table.round(3))
print("\n==== Durchschnittswerte Gesamt ====")
for k, v in gesamtwerte.items():
    print(f"{k}: {v:.3f}")

==== Tabelle pro Dimension ====
             Kendall Tau  Ø Abweichung  Ø Ground Truth  Ø System
relevance          0.395          1.17            3.74      2.61
coherence          0.402          1.12            3.36      2.32
fluency            0.404          1.58            4.58      3.00
consistency        0.540          0.84            4.57      3.77

==== Durchschnittswerte Gesamt ====
Ø Kendall Tau (Dimensionen): 0.435
Ø Abweichung (gesamt): 1.177
Ø Ground Truth (gesamt): 4.062
Ø System (gesamt): 2.925
