In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
import json, pandas as pd
from pathlib import Path

BASE = Path("/content/drive/MyDrive/agentic-rag-telecom-thesis/results")

SYSTEMS = {
    "baseline": {
        "ragas_candidates": [BASE / "baseline_ragas_fixed.csv", BASE / "baseline_ragas.csv"],
        "traces": BASE / "traces" / "baseline_traces.jsonl",
    },
    "react": {
        "ragas_candidates": [BASE / "react_ragas_fixed.csv", BASE / "react_ragas.csv"],
        "traces": BASE / "traces" / "react_traces.jsonl",
    },
    "planner": {
        "ragas_candidates": [BASE / "planner_ragas_fixed.csv", BASE / "planner_ragas.csv"],
        "traces": BASE / "traces" / "planner_traces.jsonl",
    },
}

def load_jsonl(p: Path) -> pd.DataFrame:
    rows = []
    with open(p, "r", encoding="utf-8") as f:
        for line in f:
            rows.append(json.loads(line))
    return pd.DataFrame(rows)

def normalize_cols(df: pd.DataFrame) -> pd.DataFrame:
    # Standardize likely columns across ragas exports/traces
    ren = {}
    if "question" in df.columns: ren["question"] = "query"
    if "answer" in df.columns: ren["answer"] = "prediction"
    if "ground_truth" in df.columns: ren["ground_truth"] = "reference"
    return df.rename(columns=ren)

def pick_existing(paths):
    for p in paths:
        if p.exists():
            return p
    return None

picked_all = []

for sys, paths in SYSTEMS.items():
    ragas_path = pick_existing(paths["ragas_candidates"])
    if ragas_path is None:
        raise FileNotFoundError(f"Missing RAGAS CSV for {sys}. Looked for: {paths['ragas_candidates']}")
    if not paths["traces"].exists():
        raise FileNotFoundError(f"Missing traces JSONL for {sys}: {paths['traces']}")

    ragas = normalize_cols(pd.read_csv(ragas_path))
    traces = normalize_cols(load_jsonl(paths["traces"]))

    # Ensure we have an 'i' key to merge on (best alignment)
    if "i" not in ragas.columns:
        ragas["i"] = range(1, len(ragas) + 1)
    if "i" not in traces.columns:
        traces["i"] = range(1, len(traces) + 1)

    merged = ragas.merge(traces, on="i", how="left", suffixes=("_ragas", "_trace"))

    # Bottom-3 by faithfulness
    low = merged.sort_values("faithfulness", ascending=True).head(3).copy()
    low["system"] = sys
    low["query_short"] = low["query"].astype(str).str.slice(0, 120)

    # Heuristic failure guess (optional; you can overwrite manually)
    def guess(row):
        cp = row.get("context_precision", None)
        if pd.isna(cp):
            cp = row.get("context_precision_trace", None)
        if pd.isna(cp):
            cp = row.get("context_precision_ragas", None)
        if isinstance(row.get("contexts", None), float) and pd.isna(row.get("contexts")):
            return "retrieval miss / empty contexts"
        if isinstance(cp, (int, float)) and cp < 0.25:
            return "irrelevant context / retrieval drift"
        return "unsupported claim / synthesis error"

    low["failure_guess"] = low.apply(guess, axis=1)

    keep_cols = [c for c in [
        "system","i","faithfulness","answer_relevancy","context_precision",
        "query_short","query","prediction","reference","failure_guess","contexts","retrieved"
    ] if c in low.columns]
    picked_all.append(low[keep_cols])

picked = pd.concat(picked_all, ignore_index=True)

out_csv = BASE / "qual_failure_candidates.csv"
picked.to_csv(out_csv, index=False)
print("Saved:", out_csv)

# Print a readable view (no giant contexts)
view_cols = [c for c in ["system","i","faithfulness","answer_relevancy","context_precision","query_short","failure_guess"] if c in picked.columns]
display(picked[view_cols])

Saved: /content/drive/MyDrive/agentic-rag-telecom-thesis/results/qual_failure_candidates.csv


Unnamed: 0,system,i,faithfulness,answer_relevancy,context_precision,query_short,failure_guess
0,baseline,1,0.0,,0.0,I do have an enhanced document and I am trying...,irrelevant context / retrieval drift
1,baseline,4,0.0,,0.0,"Yes, my credit is not bad.",irrelevant context / retrieval drift
2,baseline,5,0.0,,0.0,I need to manage my VA health benefits.,irrelevant context / retrieval drift
3,react,2,0.0,0.311966,0.0,NO,irrelevant context / retrieval drift
4,react,4,0.0,0.0,0.0,"Yes, my credit is not bad.",irrelevant context / retrieval drift
5,react,7,0.0,0.0,1.0,If some of my loans are still in the grace per...,unsupported claim / synthesis error
6,planner,1,0.0,0.0,0.0,I do have an enhanced document and I am trying...,irrelevant context / retrieval drift
7,planner,2,0.0,0.0,0.0,NO,irrelevant context / retrieval drift
8,planner,4,0.0,0.0,0.0,"Yes, my credit is not bad.",irrelevant context / retrieval drift
