### Imports and loading 

In [None]:
import asyncio
import dspy
from prompt_optimization.seed_prompts import ATOMIZER_PROMPT, PLANNER_PROMPT, AGGREGATOR_PROMPT, ATOMIZER_DEMOS, PLANNER_DEMOS
from dspy import GEPA

from prompt_optimization import (
    get_default_config,
    LMConfig,
    patch_romaconfig,
    load_aimo_datasets,
    ComponentJudge,
    MetricWithFeedback,
    create_optimizer,
)
from prompt_optimization.seed_prompts import (
    ATOMIZER_PROMPT,
    ATOMIZER_DEMOS,
    PLANNER_PROMPT,
    PLANNER_DEMOS,
    AGGREGATOR_PROMPT,
)
from roma_dspy.config import load_config
from roma_dspy.core.engine.solve import RecursiveSolver
from roma_dspy.core.modules.recursive_solver import RecursiveSolverModule
from roma_dspy.utils import AsyncParallelExecutor

dspy.settings.provide_traceback = True  # optional but mirrors the old notebook
opt_cfg = load_config(profile="test")

### Config LLMS

In [None]:
opt_cfg.atomizer_lm = LMConfig("cerebras/qwen-3-235b-a22b-instruct-2507", temperature=0.35, max_tokens=128_000)
opt_cfg.planner_lm = LMConfig("cerebras/qwen-3-235b-a22b-instruct-2507", temperature=0.3, max_tokens=128_000)
opt_cfg.executor_lm = LMConfig("cerebras/gpt-oss-120b", temperature=0.6, max_tokens=128_000)
opt_cfg.aggregator_lm = LMConfig("cerebras/gpt-oss-120b", temperature=0.4, max_tokens=64_000)
opt_cfg.judge_lm = LMConfig("openrouter/anthropic/claude-sonnet-4.5", temperature=0.75, max_tokens=128_000, cache=True)
opt_cfg.reflection_lm = LMConfig("openrouter/anthropic/claude-sonnet-4.5", temperature=0.9, max_tokens=64_000)

In [None]:
# Batch the knobs you used to tweak in the notebook.
opt_cfg.train_size = 32
opt_cfg.val_size = 8
opt_cfg.test_size = 8
opt_cfg.dataset_seed = 42
opt_cfg.max_metric_calls = 225
opt_cfg.num_threads = 8
opt_cfg.max_parallel = 4
opt_cfg.concurrency = 4
opt_cfg.max_depth = 1
opt_cfg.enable_logging = True

In [None]:
#Add few-shot examples + prompts
opt_cfg.agents.atomizer.signature_instructions = ATOMIZER_PROMPT
opt_cfg.agents.planner.signature_instructions = PLANNER_PROMPT
opt_cfg.agents.aggregator.signature_instructions = AGGREGATOR_PROMPT

### Init solvers and what not

In [None]:
solver = RecursiveSolver(
    config=opt_cfg,
    max_depth=opt_cfg.max_depth,
    enable_logging=opt_cfg.enable_logging,
    enable_checkpoints=False,
)
solver_module = RecursiveSolverModule(solver=solver)

In [None]:
judge = ComponentJudge(lm_config=opt_cfg.judge_lm)  # keyword required after the refactor
metric = MetricWithFeedback(judge)

In [None]:
train_set, val_set, test_set = load_aimo_datasets(
    train_size=opt_cfg.train_size,
    val_size=opt_cfg.val_size,
    test_size=opt_cfg.test_size,
    seed=opt_cfg.dataset_seed,
)

### Perform an eval on the val set

In [None]:
# executor = AsyncParallelExecutor(max_concurrency=4)

# results = await executor.execute_batch(solver_module, test_set)

In [None]:
print(results)

### Prompt tuning stuff

In [None]:
optimizer = GEPA(
    metric=metric,
    # auto="light",
    component_selector="round_robin",
    max_metric_calls=12,
    add_format_failure_as_feedback=True,
    num_threads=12,
    track_stats=True,
    log_dir="logs/aime_test",
    # use_wandb=True,
    # wandb_init_kwargs={"project": "aime_test"},
    reflection_minibatch_size=8,
    reflection_lm=dspy.LM(model="openrouter/anthropic/claude-sonnet-4.5", temperature=.75, max_tokens=128000)
)

In [None]:
optimized_program = optimizer.compile(
    solver_module,
    trainset=train_set,
    valset=val_set,
)

In [None]:
optimized_program