In [29]:
from __future__ import annotations
import argparse
import os
import glob
import re
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

import pandas as pd


In [30]:
# ============================================================
# Normalisation des fichiers CSV : aligner les colonnes sur le GOLD
# - Prend tous les fichiers CSV du dossier RESULTS_DIR
# - Crée de nouveaux CSV dans OUT_DIR, où :
#     * les colonnes suivent EXACTEMENT le même ordre que dans le GOLD
#     * les colonnes manquantes sont ajoutées (NaN)
#     * les colonnes en trop sont supprimées
# - Le nom du fichier est conservé
# ============================================================


RESULTS_DIR = "/Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/resultats_LLM"
GOLD_CSV = "/Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/Gold.csv"
PATTERN = "*.csv"

OUT_DIR = "/Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/resultats_LLM/normalized_outputs"   # сюда сохраняем новые файлы

In [31]:
def read_csv_any(path: str) -> pd.DataFrame:
    """Чтение CSV: сначала ',' потом ';'."""
    try:
        return pd.read_csv(path)
    except Exception:
        return pd.read_csv(path, sep=";")

# 1) Читаем gold и берём порядок колонок
gold_df = read_csv_any(GOLD_CSV)
gold_cols = list(gold_df.columns)

# 2) Собираем файлы моделей
files = sorted(glob.glob(os.path.join(RESULTS_DIR, PATTERN)))
files = [f for f in files if os.path.abspath(f) != os.path.abspath(GOLD_CSV)]

if not files:
    raise RuntimeError(f"Нет файлов в {RESULTS_DIR} по паттерну {PATTERN}")

# 3) Создаём папку для результатов
os.makedirs(OUT_DIR, exist_ok=True)

# 4) Приводим каждый файл к колонкам gold и сохраняем
for f in files:
    df = read_csv_any(f)

    # Добавляем недостающие колонки
    for c in gold_cols:
        if c not in df.columns:
            df[c] = pd.NA

    # Удаляем лишние колонки + ставим порядок как в gold
    df_norm = df[gold_cols]

    out_path = os.path.join(OUT_DIR, os.path.basename(f))
    df_norm.to_csv(out_path, index=False)

print(f"Ready, fichiers sauvegardés ici: {OUT_DIR}")
print(f"Nombre des fichiers: {len(files)}")
print(f"Nombre des coulonnes(gold): {len(gold_cols)}")


Ready, fichiers sauvegardés ici: /Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/resultats_LLM/normalized_outputs
Nombre des fichiers: 3
Nombre des coulonnes(gold): 32


In [53]:
# ----------------------------
# Paramètres à modifier
# ----------------------------

# Dossier qui contient les CSV produits par les modèles (un fichier = un modèle)
RESULTS_DIR = "/Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/resultats_LLM/normalized_outputs" 

# Fichier CSV annoté (gold) avec les bons résultats
GOLD_CSV = "/Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/Gold.csv"

# (Optionnel) motif pour filtrer les fichiers
PATTERN = "*.csv"

# (Optionnel) si vos lignes doivent être alignées via une colonne id (sinon on compare par index 0..n-1)
ID_COL = None   # ex: "id"  ou laissez None

# Seuil de similarité (Jaccard) sur les "mots essentiels" (0..1)
JACCARD_THRESHOLD = 0.80

# Nom du fichier de sortie
OUT_SUMMARY_CSV = "/Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/summary.csv"

EXPORT_PER_MODEL_PER_COLUMN = True
OUT_PER_COLUMN_DIR = "/Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/per_model_columns"


In [54]:
STOPWORDS = {
    # FR articles/determiners/pronouns/common function words
    "le","la","les","un","une","des","du","de","d","de","au","aux",
    "ce","cet","cette","ces","mon","ma","mes","ton","ta","tes","son","sa","ses",
    "notre","nos","votre","vos","leur","leurs",
    "et","ou","mais","donc","or","ni","car",
    "a","à","en","dans","sur","sous","chez","par","pour","avec","sans","vers","entre",
    "que","qui","quoi","dont","où",
    "ne","pas","plus","moins","très",
    # EN tiny set (на случай смешанных ответов)
    "the","a","an","and","or","of","to","in","on","for","with","without",
}


In [55]:
# ----------------------------
# Lecture CSV + préparation index
# ----------------------------

def read_csv_any(path: str) -> pd.DataFrame:
    """Lecture CSV robuste: essaie ',' puis ';'."""
    try:
        return pd.read_csv(path)
    except Exception:
        return pd.read_csv(path, sep=";")


def prepare_df(df: pd.DataFrame, id_col: Optional[str]) -> pd.DataFrame:
    """
    Prépare un DataFrame:
    - si id_col est défini : utiliser cette colonne comme index.
    """
    if id_col is not None:
        if id_col not in df.columns:
            raise ValueError(f"Colonne ID '{id_col}' introuvable dans le CSV.")
        df = df.set_index(id_col)
    return df


In [56]:
# ----------------------------
# Détection NR / manquant
# ----------------------------

def is_missing_or_nr(x) -> bool:
    """
    True si la cellule correspond à une absence d'information :
    - None
    - NaN (pandas)
    - chaîne vide
    - 'NR'
    - 'non précisé'
    """
    if x is None:
        return True

    if isinstance(x, float) and pd.isna(x):
        return True

    if isinstance(x, str):
        s = x.strip().lower()
        return s in {
            "",
            "nr",
            "nan",
            "non précisé",
            "non precise",   # au cas où sans accent
        }

    return False


In [57]:
# ----------------------------
# Normalisation "info vs info"
# ----------------------------

def normalize_info_text(x) -> str:
    """
    Normalisation appliquée UNIQUEMENT aux cellules informatives (non NR).
    - "/" -> "et"
    - minuscules
    - suppression ponctuation
    - espaces normalisés
    """
    s = str(x).strip().lower()
    s = re.sub(r"\s*/\s*", " et ", s)
    s = re.sub(r"[^\w\s]", " ", s, flags=re.UNICODE)
    s = re.sub(r"\s+", " ", s).strip()
    return s


def content_words(s: str) -> List[str]:
    """Tokens 'essentiels' : sans stopwords, tokens > 1 char."""
    if not s:
        return []
    toks = s.split()
    return [t for t in toks if t not in STOPWORDS and len(t) > 1]


def jaccard(a: List[str], b: List[str]) -> float:
    sa, sb = set(a), set(b)
    if not sa and not sb:
        return 1.0
    if not sa or not sb:
        return 0.0
    return len(sa & sb) / len(sa | sb)


In [58]:
# ----------------------------
# Règle finale de comparaison d'une cellule
# ----------------------------

def cell_equal(pred, gold, jaccard_threshold: float = JACCARD_THRESHOLD) -> bool:
    """
    1) gold NR/NaN/vide + pred NR/NaN/vide => True
    2) gold NR/NaN/vide + pred info        => False
    3) gold info        + pred NR/NaN/vide => False
    4) gold info        + pred info        => comparaison robuste (Jaccard)
    """
    gold_missing = is_missing_or_nr(gold)
    pred_missing = is_missing_or_nr(pred)

    if gold_missing and pred_missing:
        return True
    if gold_missing != pred_missing:
        return False

    # обе стороны информативные
    g = normalize_info_text(gold)
    p = normalize_info_text(pred)

    if p == g:
        return True

    gw = content_words(g)
    pw = content_words(p)

    if not gw and not pw:
        return p == g

    return jaccard(pw, gw) >= jaccard_threshold


In [59]:
# ----------------------------
# Règle finale de comparaison d'une cellule
# ----------------------------

def cell_equal(pred, gold, jaccard_threshold: float = JACCARD_THRESHOLD) -> bool:
    """
    1) gold NR/NaN/vide + pred NR/NaN/vide => True
    2) gold NR/NaN/vide + pred info        => False
    3) gold info        + pred NR/NaN/vide => False
    4) gold info        + pred info        => comparaison robuste (Jaccard)
    """
    gold_missing = is_missing_or_nr(gold)
    pred_missing = is_missing_or_nr(pred)

    if gold_missing and pred_missing:
        return True
    if gold_missing != pred_missing:
        return False

    # обе стороны информативные
    g = normalize_info_text(gold)
    p = normalize_info_text(pred)

    if p == g:
        return True

    gw = content_words(g)
    pw = content_words(p)

    if not gw and not pw:
        return p == g

    return jaccard(pw, gw) >= jaccard_threshold


In [60]:
# ----------------------------
# Alignement des tables (colonnes + index)
# ----------------------------

def align_tables(pred: pd.DataFrame, gold: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Aligne pred sur gold:
    - ajoute colonnes manquantes (NaN)
    - garde uniquement colonnes du gold et dans le même ordre
    - aligne lignes via l'index du gold
    """
    pred = pred.copy()
    gold = gold.copy()

    for col in gold.columns:
        if col not in pred.columns:
            pred[col] = pd.NA

    pred = pred[gold.columns]
    pred = pred.reindex(gold.index)

    return pred, gold


In [61]:
# ----------------------------
# Résultats + métriques
# ----------------------------

@dataclass
class CompareResult:
    model_name: str
    cell_accuracy: float
    macro_col_accuracy: float
    per_col_accuracy: Dict[str, float]
    n_cells: int


def compare_df(pred: pd.DataFrame, gold: pd.DataFrame, model_name: str) -> CompareResult:
    """
    Calcule :
    - cell_accuracy : accuracy sur toutes les cellules
    - macro_col_accuracy : moyenne des accuracies par colonne
    """
    pred, gold = align_tables(pred, gold)

    per_col_accuracy: Dict[str, float] = {}
    total_correct = 0
    total_cells = 0

    cols = list(gold.columns)
    n_rows = len(gold)

    for j, col in enumerate(cols):
        correct_col = 0

        for i in range(n_rows):
            total_cells += 1
            if cell_equal(pred.iat[i, j], gold.iat[i, j]):
                correct_col += 1
                total_correct += 1

        per_col_accuracy[col] = correct_col / n_rows if n_rows else 0.0

    cell_accuracy = total_correct / total_cells if total_cells else 0.0
    macro_col_accuracy = sum(per_col_accuracy.values()) / len(per_col_accuracy) if per_col_accuracy else 0.0

    return CompareResult(
        model_name=model_name,
        cell_accuracy=cell_accuracy,
        macro_col_accuracy=macro_col_accuracy,
        per_col_accuracy=per_col_accuracy,
        n_cells=total_cells
    )


In [62]:
# ----------------------------
# Exécution : comparer tous les CSV du dossier
# ----------------------------

gold = read_csv_any(GOLD_CSV)
gold = prepare_df(gold, ID_COL)

files = sorted(glob.glob(os.path.join(RESULTS_DIR, PATTERN)))
files = [f for f in files if os.path.abspath(f) != os.path.abspath(GOLD_CSV)]

if not files:
    raise RuntimeError(f"Aucun fichier trouvé dans {RESULTS_DIR} avec le motif {PATTERN}")

# Créer dossier pour exports par colonne (optionnel)
if EXPORT_PER_MODEL_PER_COLUMN:
    os.makedirs(OUT_PER_COLUMN_DIR, exist_ok=True)

results: List[CompareResult] = []

for f in files:
    model_name = os.path.splitext(os.path.basename(f))[0]  # имя файла без расширения
    pred = read_csv_any(f)
    pred = prepare_df(pred, ID_COL)

    res = compare_df(pred, gold, model_name=model_name)
    results.append(res)

    # Export optionnel : accuracy par colonne pour ce modèle
    if EXPORT_PER_MODEL_PER_COLUMN:
        per_col_df = (
            pd.DataFrame({"column": list(res.per_col_accuracy.keys()),
                          "accuracy": list(res.per_col_accuracy.values())})
            .sort_values("accuracy", ascending=False)
            .reset_index(drop=True)
        )
        per_col_df.to_csv(os.path.join(OUT_PER_COLUMN_DIR, f"per_column_{model_name}.csv"), index=False)

# Table récapitulative LLM
summary = (
    pd.DataFrame([{
        "model": r.model_name,
        "cell_accuracy": r.cell_accuracy,
        #"macro_accuracy": r.macro_col_accuracy,
        #"n_cells": r.n_cells
    } for r in results])
    .sort_values(["cell_accuracy"
                  #, "macro_accuracy"
                 ], ascending=False)
    .reset_index(drop=True)
)

summary


Unnamed: 0,model,cell_accuracy
0,Mistral_Large_3,0.974432
1,ChatBot_ChatGPT,0.400568
2,MiniMistral,0.323864


In [63]:
# ----------------------------
# Sauvegarde du récapitulatif
# ----------------------------

summary.to_csv(OUT_SUMMARY_CSV, index=False)
print("Saved:", OUT_SUMMARY_CSV)



Saved: /Users/quentinnippert/Documents/hackaton_week/SciencePo_project/evaluation_résultats_LLM/summary.csv
