### Imports and loading 

In [None]:
import asyncio
import dspy
from dspy import GEPA

from prompt_optimization.config import (
    LMConfig,
    get_default_config,
    patch_romaconfig,
)
from prompt_optimization.datasets import (
    load_aimo_datasets,
    load_frames_dataset,
    load_seal0_dataset,
    load_simpleqa_verified_dataset,
)
from prompt_optimization.judge import ComponentJudge
from prompt_optimization.metrics import MetricWithFeedback
from prompt_optimization.optimizer import create_optimizer
from prompt_optimization.prompts import (
    AGGREGATOR_PROMPT,
    ATOMIZER_DEMOS,
    ATOMIZER_PROMPT,
    GRADER_PROMPT,
    PLANNER_DEMOS,
    PLANNER_PROMPT,
)
from roma_dspy.config import load_config
from roma_dspy.core.engine.solve import RecursiveSolver
from roma_dspy.core.modules.recursive_solver import RecursiveSolverModule
from roma_dspy.utils import AsyncParallelExecutor

dspy.settings.provide_traceback = True  # optional but mirrors the old notebook
opt_cfg = load_config(profile="test")

### Config LLMS

In [None]:
# Batch the knobs you used to tweak in the notebook.
opt_cfg.train_size = 32
opt_cfg.val_size = 8
opt_cfg.test_size = 8
opt_cfg.dataset_seed = 42
opt_cfg.max_metric_calls = 225
opt_cfg.max_depth = 1
opt_cfg.enable_logging = False

In [None]:
#Add few-shot examples + prompts
opt_cfg.agents.atomizer.signature_instructions = ATOMIZER_PROMPT
opt_cfg.agents.planner.signature_instructions = PLANNER_PROMPT
opt_cfg.agents.aggregator.signature_instructions = AGGREGATOR_PROMPT

### Init solvers and what not

In [None]:
solver = RecursiveSolver(
    config=opt_cfg,
    max_depth=opt_cfg.max_depth,
    enable_logging=opt_cfg.enable_logging,
    enable_checkpoints=False,
)
solver_module = RecursiveSolverModule(solver=solver)

In [None]:
judge_lm = LMConfig("openrouter/anthropic/claude-sonnet-4.5", temperature=0.75, max_tokens=128000, cache=True)

In [None]:
judge = ComponentJudge(prompt=GRADER_PROMPT, lm_config=judge_lm)  # keyword required after the refactor
metric = MetricWithFeedback(judge)

In [None]:

train_set, val_set, test_set = load_frames_dataset(
    train_size=opt_cfg.train_size,
    val_size=opt_cfg.val_size,
    test_size=opt_cfg.test_size,
    seed=opt_cfg.dataset_seed,
)

### Perform an eval on the val set

In [None]:
# executor = AsyncParallelExecutor(max_concurrency=4)

# results = await executor.execute_batch(solver_module, test_set)

In [None]:
# print(results)

### Prompt tuning stuff

In [None]:
optimizer = GEPA(
    metric=metric,
    # auto="light",
    component_selector="round_robin",
    max_metric_calls=32,
    add_format_failure_as_feedback=True,
    num_threads=6,
    track_stats=True,
    log_dir="logs/frames_test",
    # use_wandb=True,
    # wandb_init_kwargs={"project": "aime_test"},
    reflection_minibatch_size=8,
    reflection_lm=dspy.LM(model="openrouter/anthropic/claude-sonnet-4.5", temperature=.75, max_tokens=128000)
)

In [None]:
optimized_program = optimizer.compile(
    solver_module,
    trainset=train_set,
    valset=val_set,
)

In [None]:
optimized_program.named_predictors()