# Alfred FPL — Eval Loop

Run the **real Alfred pipeline** end-to-end and inspect responses, charts, and prompt logs inline.

**Prerequisites:**
- `OPENAI_API_KEY` in `.env`
- `FPL_DEV_USER_ID` in `.env` (from `seed_demo.py`)
- Supabase seeded (`python scripts/sync.py --from-gw 22`)

**Workflow:** Run a question → see Alfred's response → check `prompt_logs/` for what the LLM saw → tweak prompts → re-run.

In [None]:
# Cell 0: Setup
import os
import sys
import asyncio
import time
import glob
import json
from pathlib import Path
from IPython.display import display, Markdown, Image, HTML

# Enable prompt logging
os.environ["ALFRED_LOG_PROMPTS"] = "1"
# Use mini for all calls (cheaper for iteration)
os.environ.setdefault("ALFRED_USE_ADVANCED_MODELS", "false")

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / "src"))

# Load .env
from dotenv import load_dotenv
load_dotenv(Path.cwd().parent / ".env")

# Register FPL domain + import alfred
import alfred_fpl  # noqa: F401 — triggers domain registration
from alfred.graph.workflow import run_alfred
from alfred.memory.conversation import initialize_conversation
from alfred_fpl.config import settings

USER_ID = settings.fpl_dev_user_id
assert USER_ID, "FPL_DEV_USER_ID not set in .env — run seed_demo.py first"
print(f"User: {USER_ID[:8]}...")
print(f"Model mode: {'mini-only' if os.environ.get('ALFRED_USE_ADVANCED_MODELS') == 'false' else 'advanced'}")
print("Prompt logs: prompt_logs/")
print("Ready!")

In [None]:
# Cell 1: Conversation state + helper

conversation = initialize_conversation()


async def ask_alfred(question: str, reset: bool = False) -> str:
    """Send a question to Alfred and display the response.
    
    Args:
        question: The user's question.
        reset: If True, start a fresh conversation.
    
    Returns:
        The raw response string.
    """
    global conversation
    if reset:
        conversation = initialize_conversation()
        print("[conversation reset]")
    
    print(f"You: {question}")
    print("-" * 40)
    
    start = time.time()
    response, conversation = await run_alfred(
        user_message=question,
        user_id=USER_ID,
        conversation=conversation,
    )
    elapsed = time.time() - start
    
    print(f"Alfred ({elapsed:.1f}s):")
    display(Markdown(response))
    
    return response


def show_latest_prompt_log():
    """Display the most recent prompt log file."""
    log_dir = Path.cwd().parent / "prompt_logs"
    if not log_dir.exists():
        print("No prompt_logs/ directory found.")
        return
    
    logs = sorted(log_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not logs:
        # Try .md files
        logs = sorted(log_dir.glob("*.md"), key=lambda p: p.stat().st_mtime, reverse=True)
    if not logs:
        print("No log files found in prompt_logs/")
        return
    
    latest = logs[0]
    print(f"Latest log: {latest.name}")
    print("=" * 60)
    
    content = latest.read_text(encoding="utf-8")
    if latest.suffix == ".json":
        try:
            data = json.loads(content)
            print(json.dumps(data, indent=2)[:3000])
        except json.JSONDecodeError:
            print(content[:3000])
    else:
        print(content[:3000])
    
    if len(content) > 3000:
        print(f"\n... [{len(content) - 3000} more chars]")


print("Helpers ready: ask_alfred(question), show_latest_prompt_log()")

---
## Scenario 1: Squad View
Tests the squad subdomain — should read squad data and display formation/players.

In [None]:
response = await ask_alfred("show my squad", reset=True)

## Scenario 2: Scouting
Tests the scouting subdomain — should query players, filter, rank.

In [None]:
response = await ask_alfred("show me the best value midfielders under 8m", reset=True)

In [None]:
# Follow-up turn (same conversation)
response = await ask_alfred("compare the top 2 by form over the last 5 gameweeks")

## Scenario 3: Fixtures
Tests fixture analysis — should compute FDR, maybe produce heatmap.

In [None]:
response = await ask_alfred("which teams have the easiest fixtures next 5 GWs?", reset=True)

## Scenario 4: League
Tests league standings + rival comparison.

In [None]:
response = await ask_alfred("show my league standings", reset=True)

In [None]:
response = await ask_alfred("compare my squad with the league leader")

## Scenario 5: Transfers
Tests transfer planning flow — squad view then forward search.

In [None]:
response = await ask_alfred("show my squad", reset=True)

In [None]:
response = await ask_alfred("who are the cheapest performing forwards?")

## Scenario 6: Market
Tests market/transfer trends.

In [None]:
response = await ask_alfred("show me the most transferred-in players this week", reset=True)

---
## Prompt Inspector
View what the LLM actually received.

In [None]:
show_latest_prompt_log()

In [None]:
# List all prompt logs from this session
log_dir = Path.cwd().parent / "prompt_logs"
if log_dir.exists():
    logs = sorted(log_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True)
    for f in logs[:20]:
        size = f.stat().st_size
        print(f"  {f.name}  ({size:,} bytes)")
else:
    print("No prompt_logs/ directory yet.")

---
## Ad-hoc Testing
Type your own questions here.

In [None]:
# response = await ask_alfred("your question here", reset=True)