In [None]:
"""
validate_claims.py

Simple validation of LLM narratives against ground-truth stats.

Assumptions:
- data/player_stats.csv contains anonymized stats with at least:
    player_id, points_per_game, minutes_per_game, efficiency
- results/llm_outputs.csv contains a column 'recommended_player'
  indicating which player the model focused on.

This script does a light check (example only). You can adjust column names
to match your actual data.
"""

from pathlib import Path
import pandas as pd

STATS_PATH = Path("data") / "player_stats.csv"
RESULTS_PATH = Path("results") / "llm_outputs.csv"
SUMMARY_DIR = Path("analysis")


def main():
    SUMMARY_DIR.mkdir(exist_ok=True)

    if not STATS_PATH.exists():
        raise FileNotFoundError(f"Stats file not found: {STATS_PATH}")
    if not RESULTS_PATH.exists():
        raise FileNotFoundError(f"Results file not found: {RESULTS_PATH}")

    stats = pd.read_csv(STATS_PATH)
    results = pd.read_csv(RESULTS_PATH)

    # Example assumption: player_id column matches recommended_player
    if "player_id" not in stats.columns:
        raise ValueError("Expected column 'player_id' in player_stats.csv")
    if "recommended_player" not in results.columns:
        raise ValueError("Expected column 'recommended_player' in llm_outputs.csv")

    # Identify top scorer by points_per_game for a simple ground-truth check
    if "points_per_game" not in stats.columns:
        print("Column 'points_per_game' not found; skipping top-scorer validation.")
        return

    top_scorer = stats.sort_values("points_per_game", ascending=False).iloc[0]["player_id"]
    print("Top scorer by points_per_game (ground truth):", top_scorer)

    # Was the top scorer frequently recommended?
    if "hypothesis" in results.columns and "condition" in results.columns:
        rec_counts = (
            results.groupby(["hypothesis", "condition", "recommended_player"])
            .size()
            .reset_index(name="count")
        )
    else:
        rec_counts = (
            results.groupby(["recommended_player"])
            .size()
            .reset_index(name="count")
        )

    print("\nRecommendation counts:\n", rec_counts)

    # Simple flag: how often is the top scorer *not* recommended?
    total_recs = len(results)
    top_recs = (results["recommended_player"] == top_scorer).sum()
    print(f"\nTotal recommendations: {total_recs}")
    print(f"Recommendations choosing the actual top scorer: {top_recs}")

    summary_path = SUMMARY_DIR / "validation_summary.txt"
    summary_text = (
        f"Top scorer by points_per_game: {top_scorer}\n"
        f"Total recommendations: {total_recs}\n"
        f"Recommendations choosing the top scorer: {top_recs}\n"
        f"Recommendations choosing other players: {total_recs - top_recs}\n"
    )
    summary_path.write_text(summary_text, encoding="utf-8")
    print(f"\nSaved validation summary to {summary_path.resolve()}")


if __name__ == "__main__":
    main()
