### Imports and loading 

In [1]:
import asyncio
import dspy
from prompt_optimization.seed_prompts import ATOMIZER_PROMPT, PLANNER_PROMPT, AGGREGATOR_PROMPT, ATOMIZER_DEMOS, PLANNER_DEMOS
from dspy import GEPA

from prompt_optimization import (
    get_default_config,
    LMConfig,
    patch_romaconfig,
    load_aimo_datasets,
    load_simpleqa_verified_dataset,
    load_frames_dataset,
    load_seal0_dataset,
    ComponentJudge,
    MetricWithFeedback,
    create_optimizer,
)
from prompt_optimization.seed_prompts import (
    ATOMIZER_PROMPT,
    ATOMIZER_DEMOS,
    PLANNER_PROMPT,
    PLANNER_DEMOS,
    AGGREGATOR_PROMPT,
)
from prompt_optimization.grader_prompt import GRADER_PROMPT
from roma_dspy.config import load_config
from roma_dspy.core.engine.solve import RecursiveSolver
from roma_dspy.core.modules.recursive_solver import RecursiveSolverModule
from roma_dspy.utils import AsyncParallelExecutor

dspy.settings.provide_traceback = True  # optional but mirrors the old notebook
opt_cfg = load_config(profile="test")

[32m2025-10-17 13:54:37.128[0m | [34m[1mDEBUG   [0m | [36mroma_dspy.config.manager[0m:[36mload_config[0m:[36m57[0m - [34m[1mLoading config: path=None, profile=test, overrides=None, env_prefix=ROMA_[0m
[32m2025-10-17 13:54:37.128[0m | [34m[1mDEBUG   [0m | [36mroma_dspy.config.manager[0m:[36mload_config[0m:[36m66[0m - [34m[1mInitialized empty base config (defaults applied in validation)[0m
[32m2025-10-17 13:54:37.136[0m | [34m[1mDEBUG   [0m | [36mroma_dspy.config.manager[0m:[36m_load_yaml[0m:[36m129[0m - [34m[1mLoaded and cached config from config/defaults/config.yaml[0m
[32m2025-10-17 13:54:37.137[0m | [34m[1mDEBUG   [0m | [36mroma_dspy.config.manager[0m:[36mload_config[0m:[36m81[0m - [34m[1mMerged default config from config/defaults/config.yaml[0m
[32m2025-10-17 13:54:37.146[0m | [34m[1mDEBUG   [0m | [36mroma_dspy.config.manager[0m:[36m_load_yaml[0m:[36m129[0m - [34m[1mLoaded and cached config from config/profiles/tes

### Config LLMS

In [2]:
# Batch the knobs you used to tweak in the notebook.
opt_cfg.train_size = 32
opt_cfg.val_size = 8
opt_cfg.test_size = 8
opt_cfg.dataset_seed = 42
opt_cfg.max_metric_calls = 225
opt_cfg.max_depth = 1
opt_cfg.enable_logging = False

In [3]:
#Add few-shot examples + prompts
opt_cfg.agents.atomizer.signature_instructions = ATOMIZER_PROMPT
opt_cfg.agents.planner.signature_instructions = PLANNER_PROMPT
opt_cfg.agents.aggregator.signature_instructions = AGGREGATOR_PROMPT

### Init solvers and what not

In [4]:
solver = RecursiveSolver(
    config=opt_cfg,
    max_depth=opt_cfg.max_depth,
    enable_logging=opt_cfg.enable_logging,
    enable_checkpoints=False,
)
solver_module = RecursiveSolverModule(solver=solver)

[32m2025-10-17 13:54:42.425[0m | [34m[1mDEBUG   [0m | [36mroma_dspy.core.factory.agent_factory[0m:[36m_resolve_signature[0m:[36m119[0m - [34m[1mUsing default signature for atomizer[0m
[32m2025-10-17 13:54:42.427[0m | [1mINFO    [0m | [36mroma_dspy.core.factory.agent_factory[0m:[36mcreate_agent[0m:[36m91[0m - [1mCreated atomizer agent (task_type=default, signature=default)[0m
[32m2025-10-17 13:54:42.427[0m | [34m[1mDEBUG   [0m | [36mroma_dspy.core.registry.agent_registry[0m:[36mregister_agent[0m:[36m180[0m - [34m[1mRegistered atomizer instance #1 (task_type=default)[0m
[32m2025-10-17 13:54:42.428[0m | [34m[1mDEBUG   [0m | [36mroma_dspy.core.factory.agent_factory[0m:[36m_resolve_signature[0m:[36m119[0m - [34m[1mUsing default signature for planner[0m
[32m2025-10-17 13:54:42.429[0m | [1mINFO    [0m | [36mroma_dspy.core.factory.agent_factory[0m:[36mcreate_agent[0m:[36m91[0m - [1mCreated planner agent (task_type=default, signat

In [7]:
judge_lm = LMConfig("openrouter/anthropic/claude-sonnet-4.5", temperature=0.75, max_tokens=128000, cache=True)

In [8]:
judge = ComponentJudge(prompt=GRADER_PROMPT, lm_config=judge_lm)  # keyword required after the refactor
metric = MetricWithFeedback(judge)

In [9]:

train_set, val_set, test_set = load_frames_dataset(
    train_size=opt_cfg.train_size,
    val_size=opt_cfg.val_size,
    test_size=opt_cfg.test_size,
    seed=opt_cfg.dataset_seed,
)

### Perform an eval on the val set

In [10]:
# executor = AsyncParallelExecutor(max_concurrency=4)

# results = await executor.execute_batch(solver_module, test_set)

In [11]:
# print(results)

### Prompt tuning stuff

In [None]:
optimizer = GEPA(
    metric=metric,
    # auto="light",
    component_selector="round_robin",
    max_metric_calls=32,
    add_format_failure_as_feedback=True,
    num_threads=6,
    track_stats=True,
    log_dir="logs/frames_test",
    # use_wandb=True,
    # wandb_init_kwargs={"project": "aime_test"},
    reflection_minibatch_size=8,
    reflection_lm=dspy.LM(model="openrouter/anthropic/claude-sonnet-4.5", temperature=.75, max_tokens=128000)
)

In [None]:
optimized_program = optimizer.compile(
    solver_module,
    trainset=train_set,
    valset=val_set,
)

In [None]:
optimized_program.named_predictors()