# Athanor — Stage 3: Hypothesis Generator

**Input:** Stage 2 `gap_report.json`  
**Process:** Human-in-loop approval → Claude generates falsifiable hypothesis + experiment design per gap  
**Output:** `outputs/hypotheses/<domain>/hypothesis_report.json`  

**This closes the core loop:**  
`Ingest → Map → Gap-find → Hypothesize → Design experiment → Test computationally → Surface → Repeat`


In [None]:
import os, sys, json, time
from pathlib import Path

sys.path.insert(0, str(Path("..").resolve()))

from dotenv import load_dotenv
load_dotenv(Path("..") / ".env")

from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from rich.text import Text
import plotly.graph_objects as go

console = Console()
print("✓ Imports OK")


## 1. Configuration

In [None]:
CONFIG = {
    "domain": "information_theory",
    "claude_model": "claude-opus-4-5",

    # Stage 2 input
    "gap_report_path": Path("..") / "outputs" / "gaps" / "gap_report.json",

    # Stage 3 outputs
    "output_dir": Path("..") / "outputs" / "hypotheses" / "information_theory",

    # Generate hypotheses for all gaps, or only approved ones?
    # Set to True after running §3 (human review)
    "approved_only": False,

    # Cap: how many hypotheses to generate (controls cost)
    "max_hypotheses": 10,
}

CONFIG["output_dir"].mkdir(parents=True, exist_ok=True)

if not os.environ.get("ANTHROPIC_API_KEY"):
    console.print("[bold red]ANTHROPIC_API_KEY not set![/]")
else:
    console.print(f"[bold green]✓ Config ready[/] — model: {CONFIG['claude_model']}")


## 2. Load Stage 2 Gap Report

In [None]:
from athanor.gaps.models import GapReport

if not CONFIG["gap_report_path"].exists():
    console.print(f"[bold red]Missing:[/] {CONFIG['gap_report_path']}\nRun Stage 2 first.")
    raise FileNotFoundError(CONFIG["gap_report_path"])

gap_report = GapReport.model_validate_json(CONFIG["gap_report_path"].read_text())
ranked = gap_report.ranked[:CONFIG["max_hypotheses"]]

table = Table(title=f"Gap Report — {gap_report.domain} ({len(gap_report.analyses)} gaps)", show_lines=True)
table.add_column("#", style="dim", width=3)
table.add_column("Concept A", style="cyan")
table.add_column("Concept B", style="cyan")
table.add_column("Score", style="bold green")
table.add_column("N", style="magenta")
table.add_column("T", style="blue")
table.add_column("I", style="red")
table.add_column("✓?", style="yellow")
table.add_column("Research Question", style="white")

for i, a in enumerate(ranked, 1):
    approved_str = {None: "—", True: "✓", False: "✗"}.get(getattr(a, "approved", None), "—")
    table.add_row(
        str(i), a.concept_a, a.concept_b,
        f"{a.composite_score:.2f}",
        str(a.novelty), str(a.tractability), str(a.impact),
        approved_str,
        a.research_question[:70] + ("…" if len(a.research_question) > 70 else ""),
    )

console.print(table)


## 3. Human-in-Loop Review

Set `approved=True` for gaps you want hypotheses for.  
Set `approved=False` to skip.  
Leave `approved=None` to include all (when `approved_only=False` in CONFIG).

Edit the dict below and re-run this cell before §4.

In [None]:
# ── Edit this dict to approve/reject gaps ─────────────────────────────────────
# Key:   "{concept_a} ↔ {concept_b}"  (as shown in the table above)
# Value: True = run hypothesis gen | False = skip | (omit = leave as None)
APPROVALS: dict[str, bool] = {
    # Example — replace with actual concept pairs from your table:
    # "entropy ↔ neural networks": True,
    # "channel capacity ↔ algorithmic complexity": True,
}

# Apply approvals
for analysis in ranked:
    key = f"{analysis.concept_a} ↔ {analysis.concept_b}"
    key_rev = f"{analysis.concept_b} ↔ {analysis.concept_a}"
    if key in APPROVALS:
        analysis.approved = APPROVALS[key]
    elif key_rev in APPROVALS:
        analysis.approved = APPROVALS[key_rev]
    # else: leave as None

approved_count = sum(1 for a in ranked if getattr(a, "approved", None) is True)
rejected_count = sum(1 for a in ranked if getattr(a, "approved", None) is False)
unreviewed = sum(1 for a in ranked if getattr(a, "approved", None) is None)

console.print(f"[bold]Review status:[/] ✓ {approved_count} approved | ✗ {rejected_count} rejected | — {unreviewed} unreviewed")
if CONFIG["approved_only"] and approved_count == 0:
    console.print("[yellow]Warning: approved_only=True but no gaps approved. Set approvals above or set approved_only=False.[/]")


## 4. Generate Hypotheses

Claude produces for each gap:
- A falsifiable **hypothesis statement**
- The proposed **mechanism**
- A specific, measurable **prediction**
- **Falsification criteria** (what would definitively refute it)
- A full **experiment design** with steps, tools, effort estimate
- Flag: **computational** vs. wet-lab primary

Results are cached — re-run is free.

In [None]:
from athanor.hypotheses import HypothesisGenerator, HypothesisReport
from tqdm.notebook import tqdm

cache_path = CONFIG["output_dir"] / "hypothesis_cache.json"

if cache_path.exists():
    console.print("[yellow]Loading cached hypothesis report…[/]")
    hyp_report = HypothesisReport.model_validate_json(cache_path.read_text())
else:
    generator = HypothesisGenerator(
        domain=CONFIG["domain"],
        model=CONFIG["claude_model"],
        api_key=os.environ["ANTHROPIC_API_KEY"],
    )

    targets = [
        a for a in ranked
        if not CONFIG["approved_only"] or getattr(a, "approved", None) is True
    ]

    hyp_report = HypothesisReport(
        domain=CONFIG["domain"],
        query=gap_report.query,
        n_gaps_considered=len(targets),
    )

    for i, analysis in enumerate(tqdm(targets, desc="Generating hypotheses")):
        console.print(f"  [{i+1}/{len(targets)}] [cyan]{analysis.concept_a}[/] ↔ [cyan]{analysis.concept_b}[/]")
        hyp = generator._generate_one(analysis)
        if hyp:
            hyp_report.hypotheses.append(hyp)
        time.sleep(0.5)

    cache_path.write_text(hyp_report.model_dump_json(indent=2))
    console.print(f"[green]✓ Cached → {cache_path}[/]")

console.print(f"\n[bold green]✓ {len(hyp_report.hypotheses)} hypotheses generated[/]")
console.print(f"  Computational: {len(hyp_report.computational)} | Pending review: {len(hyp_report.pending_review)}")
