# SummEval – Exploração e Estatísticas

In [1]:
import json
import pandas as pd
import numpy as np
from pathlib import Path
from scipy.stats import spearmanr

DATA_PATH = Path("C:\\Users\\thiago.ouverney\\Projetos\\pyAutoSummarizer\\data\\model_annotations.aligned.jsonl")


## 1. Carregamento dos dados
Função auxiliar para ler `.jsonl` e criar um `DataFrame`.

In [2]:
def load_summ_eval(jsonl_path: Path) -> pd.DataFrame:
    """Lê arquivo SummEval (.jsonl) e devolve DataFrame tabular."""
    records = []
    with jsonl_path.open(encoding="utf-8") as fh:
        for line in fh:
            data = json.loads(line)
            rec = {
                "doc_id": data["id"],
                "model_id": data["model_id"],
                "summary": data["decoded"],
            }
            # Médias de especialistas e turkers
            for metric in ["coherence", "consistency", "fluency", "relevance"]:
                rec[f"expert_{metric}"] = np.mean([ann[metric] for ann in data["expert_annotations"]])
                rec[f"turker_{metric}"] = np.mean([ann[metric] for ann in data["turker_annotations"]])
            records.append(rec)
    df = pd.DataFrame(records)
    df["summary_len"] = df["summary"].str.split().apply(len)
    return df

df = load_summ_eval(DATA_PATH)
df.head()


Unnamed: 0,doc_id,model_id,summary,expert_coherence,turker_coherence,expert_consistency,turker_consistency,expert_fluency,turker_fluency,expert_relevance,turker_relevance,summary_len
0,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M11,paul merson was brought on with only seven min...,1.333333,3.0,1.0,3.0,3.0,4.0,1.666667,3.0,65
1,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M13,paul merson has restarted his row with andros ...,2.333333,2.0,5.0,3.0,5.0,2.0,2.666667,3.0,46
2,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M1,paul merson has restarted his row with andros ...,2.333333,3.8,5.0,4.2,5.0,3.8,2.666667,4.6,63
3,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M14,paul merson has restarted his row with andros ...,1.666667,5.0,5.0,5.0,5.0,5.0,2.666667,4.0,47
4,dm-test-8764fb95bfad8ee849274873a92fb8d6b400eee2,M15,paul merson has restarted his row with andros ...,3.333333,2.0,5.0,4.0,3.333333,2.0,4.0,4.0,81


## 2. Estatísticas básicas

In [3]:
def compute_basic_stats(df: pd.DataFrame) -> pd.DataFrame:
    return pd.DataFrame({
        "N_doc": [df["doc_id"].nunique()],
        "N_sys": [df["model_id"].nunique()],
        "L_sum_mean": [df["summary_len"].mean()],
        "L_sum_std": [df["summary_len"].std()],
    })

basic_stats = compute_basic_stats(df)
basic_stats


Unnamed: 0,N_doc,N_sys,L_sum_mean,L_sum_std
0,100,16,62.99,19.800914


## 3. Médias das notas (especialistas)

In [4]:
def mean_scores(df: pd.DataFrame, prefix: str) -> pd.Series:
    cols = [f"{prefix}_{m}" for m in ["coherence", "consistency", "fluency", "relevance"]]
    return df[cols].mean()

expert_means = mean_scores(df, "expert")
expert_means


expert_coherence      3.412500
expert_consistency    4.660417
expert_fluency        4.672917
expert_relevance      3.777083
dtype: float64

## 4. Correlação Spearman – Expert × Turker

In [5]:
def spearman_expert_turker(df: pd.DataFrame) -> pd.Series:
    rho = {}
    for m in ["coherence", "consistency", "fluency", "relevance"]:
        rho[m], _ = spearmanr(df[f"expert_{m}"], df[f"turker_{m}"])
    rho["mean"] = np.mean(list(rho.values()))
    return pd.Series(rho, name="rho_E_T")

rho_e_t = spearman_expert_turker(df)
rho_e_t


coherence      0.039070
consistency   -0.010086
fluency        0.050965
relevance      0.090638
mean           0.042647
Name: rho_E_T, dtype: float64

## 5. Correlação média entre Turkers

In [6]:
def mean_turker_pairwise_corr(jsonl_path: Path) -> float:
    import collections
    turker_data = collections.defaultdict(lambda: collections.defaultdict(list))
    with jsonl_path.open(encoding="utf-8") as fh:
        for line in fh:
            data = json.loads(line)
            annots = data["turker_annotations"]
            if len(annots) < 2:
                continue
            for idx, ann in enumerate(annots):
                for m in ["coherence", "consistency", "fluency", "relevance"]:
                    turker_data[idx][m].append(ann[m])
    pairwise = []
    for m in ["coherence", "consistency", "fluency", "relevance"]:
        keys = list(turker_data.keys())
        for i, a in enumerate(keys):
            for b in keys[i+1:]:
                rho, _ = spearmanr(turker_data[a][m], turker_data[b][m])
                pairwise.append(rho)
    return float(np.mean(pairwise))

rho_T = mean_turker_pairwise_corr(DATA_PATH)
rho_T


0.49577505280708467

## 6. Sumário consolidado

In [7]:
summary_df = pd.DataFrame({
    "Descrição": [
        "Número total de artigos",
        "Número total de resumos de modelos",
        "Comprimento médio do resumo (tokens)",
        "Desvio-padrão do comprimento de resumo",
        "Média das notas de coerência (especialistas)",
        "Média das notas de consistência",
        "Média das notas de fluência",
        "Média das notas de relevância",
        "Correlação Spearman média Expert × Turker",
        "Correlação Spearman média entre Turkers",
    ],
    "Símbolo": [
        "N_doc", "N_sys", "¯L_sum", "σ_{L_sum}",
        "μ_coh^{exp}", "μ_cons^{exp}", "μ_flu^{exp}", "μ_rel^{exp}",
        "ρ_{E×T}", "ρ_T"
    ],
    "Valor": [
        int(basic_stats.at[0, "N_doc"]),
        int(basic_stats.at[0, "N_sys"]),
        round(basic_stats.at[0, "L_sum_mean"], 2),
        round(basic_stats.at[0, "L_sum_std"], 2),
        round(expert_means["coherence"], 2),
        round(expert_means["consistency"], 2),
        round(expert_means["fluency"], 2),
        round(expert_means["relevance"], 2),
        round(rho_e_t["mean"], 3),
        round(rho_T, 3)
    ]
})
summary_df


KeyError: 'coherence'