From b15e8cd6d0956ddf3196fc16facacfe67343de79 Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Mon, 6 Oct 2025 11:49:11 -0400 Subject: [PATCH 1/5] fix(MIPROv2): zero shot not taking .compile parameters into account before determining if the program was zero shot --- dspy/teleprompt/mipro_optimizer_v2.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/dspy/teleprompt/mipro_optimizer_v2.py b/dspy/teleprompt/mipro_optimizer_v2.py index cf76f7daf9..c28870bece 100644 --- a/dspy/teleprompt/mipro_optimizer_v2.py +++ b/dspy/teleprompt/mipro_optimizer_v2.py @@ -127,6 +127,15 @@ def compile( if self.max_errors is not None else dspy.settings.max_errors ) + + # Update max demos if specified + initial_max_bootstrapped_demos = self.max_bootstrapped_demos + if max_bootstrapped_demos is not None: + self.max_bootstrapped_demos = max_bootstrapped_demos + initial_max_labeled_demos = self.max_labeled_demos + if max_labeled_demos is not None: + self.max_labeled_demos = max_labeled_demos + zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0) # If auto is None, and num_trials is not provided (but num_candidates is), raise an error that suggests a good num_trials value @@ -149,11 +158,6 @@ def compile( seed = seed or self.seed self._set_random_seeds(seed) - # Update max demos if specified - if max_bootstrapped_demos is not None: - self.max_bootstrapped_demos = max_bootstrapped_demos - if max_labeled_demos is not None: - self.max_labeled_demos = max_labeled_demos # Set training & validation sets trainset, valset = self._set_and_validate_datasets(trainset, valset) @@ -199,7 +203,10 @@ def compile( # If zero-shot, discard demos if zeroshot_opt: + logger.info("ZEROSHOT IS TRUE") demo_candidates = None + else: + logger.info("ZEROSHOT IS FALSE") with dspy.context(lm=self.task_model): # Step 3: Find optimal prompt parameters @@ -216,6 +223,10 @@ def compile( seed, ) + # Reset max demos + self.max_bootstrapped_demos = initial_max_bootstrapped_demos + self.max_labeled_demos = initial_max_labeled_demos + return best_program def _set_random_seeds(self, seed): @@ -447,6 +458,7 @@ def _optimize_prompt_parameters( logger.info( "We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.\n" ) + logger.info(f"INSIDE OPTIMIZE PROMPT PARAMS: {demo_candidates}") # Compute the adjusted total trials that we will run (including full evals) run_additional_full_eval_at_end = 1 if num_trials % minibatch_full_eval_steps != 0 else 0 From 6d12f223ee850b96b2c49bb851452ab1ff8ee651 Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Mon, 6 Oct 2025 11:50:33 -0400 Subject: [PATCH 2/5] remove extra logs --- dspy/teleprompt/mipro_optimizer_v2.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dspy/teleprompt/mipro_optimizer_v2.py b/dspy/teleprompt/mipro_optimizer_v2.py index c28870bece..d7dd2e307c 100644 --- a/dspy/teleprompt/mipro_optimizer_v2.py +++ b/dspy/teleprompt/mipro_optimizer_v2.py @@ -203,10 +203,7 @@ def compile( # If zero-shot, discard demos if zeroshot_opt: - logger.info("ZEROSHOT IS TRUE") demo_candidates = None - else: - logger.info("ZEROSHOT IS FALSE") with dspy.context(lm=self.task_model): # Step 3: Find optimal prompt parameters From 8dc8d0d48a4b7a4fd075c6afb5d577da3f67462b Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Mon, 6 Oct 2025 11:52:12 -0400 Subject: [PATCH 3/5] Remove log --- dspy/teleprompt/mipro_optimizer_v2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dspy/teleprompt/mipro_optimizer_v2.py b/dspy/teleprompt/mipro_optimizer_v2.py index d7dd2e307c..cdea76b8a6 100644 --- a/dspy/teleprompt/mipro_optimizer_v2.py +++ b/dspy/teleprompt/mipro_optimizer_v2.py @@ -455,7 +455,6 @@ def _optimize_prompt_parameters( logger.info( "We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.\n" ) - logger.info(f"INSIDE OPTIMIZE PROMPT PARAMS: {demo_candidates}") # Compute the adjusted total trials that we will run (including full evals) run_additional_full_eval_at_end = 1 if num_trials % minibatch_full_eval_steps != 0 else 0 From 2e898bcd34875817bf89f8d3ef15f53993cc2b79 Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Tue, 7 Oct 2025 09:37:23 -0400 Subject: [PATCH 4/5] Fix merge conflict --- dspy/teleprompt/mipro_optimizer_v2.py | 143 ++++++++++++++++++-------- 1 file changed, 101 insertions(+), 42 deletions(-) diff --git a/dspy/teleprompt/mipro_optimizer_v2.py b/dspy/teleprompt/mipro_optimizer_v2.py index cdea76b8a6..bc00685556 100644 --- a/dspy/teleprompt/mipro_optimizer_v2.py +++ b/dspy/teleprompt/mipro_optimizer_v2.py @@ -128,15 +128,14 @@ def compile( else dspy.settings.max_errors ) - # Update max demos if specified - initial_max_bootstrapped_demos = self.max_bootstrapped_demos - if max_bootstrapped_demos is not None: - self.max_bootstrapped_demos = max_bootstrapped_demos - initial_max_labeled_demos = self.max_labeled_demos - if max_labeled_demos is not None: - self.max_labeled_demos = max_labeled_demos + effective_max_bootstrapped_demos = ( + max_bootstrapped_demos if max_bootstrapped_demos is not None else self.max_bootstrapped_demos + ) + effective_max_labeled_demos = ( + max_labeled_demos if max_labeled_demos is not None else self.max_labeled_demos + ) - zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0) + zeroshot_opt = (effective_max_bootstrapped_demos == 0) and (effective_max_labeled_demos == 0) # If auto is None, and num_trials is not provided (but num_candidates is), raise an error that suggests a good num_trials value if self.auto is None and (self.num_candidates is not None and num_trials is None): @@ -162,13 +161,42 @@ def compile( # Set training & validation sets trainset, valset = self._set_and_validate_datasets(trainset, valset) + num_instruct_candidates = ( + self.num_instruct_candidates + if self.num_instruct_candidates is not None + else self.num_candidates + ) + num_fewshot_candidates = ( + self.num_fewshot_candidates + if self.num_fewshot_candidates is not None + else self.num_candidates + ) + # Set hyperparameters based on run mode (if set) - num_trials, valset, minibatch = self._set_hyperparams_from_run_mode( - student, num_trials, minibatch, zeroshot_opt, valset + ( + num_trials, + valset, + minibatch, + num_instruct_candidates, + num_fewshot_candidates, + ) = self._set_hyperparams_from_run_mode( + student, + num_trials, + minibatch, + zeroshot_opt, + valset, + num_instruct_candidates, + num_fewshot_candidates, ) if self.auto: - self._print_auto_run_settings(num_trials, minibatch, valset) + self._print_auto_run_settings( + num_trials, + minibatch, + valset, + num_fewshot_candidates, + num_instruct_candidates, + ) if minibatch and minibatch_size > len(valset): raise ValueError(f"Minibatch size cannot exceed the size of the valset. Valset size: {len(valset)}.") @@ -186,8 +214,19 @@ def compile( ) with dspy.context(lm=self.task_model): + # Step 1: Bootstrap few-shot examples - demo_candidates = self._bootstrap_fewshot_examples(program, trainset, seed, teacher) + demo_candidates = self._bootstrap_fewshot_examples( + program, + trainset, + seed, + teacher, + num_fewshot_candidates=num_fewshot_candidates, + max_bootstrapped_demos=effective_max_bootstrapped_demos, + max_labeled_demos=effective_max_labeled_demos, + max_errors=effective_max_errors, + metric_threshold=self.metric_threshold, + ) # Step 2: Propose instruction candidates instruction_candidates = self._propose_instructions( @@ -199,6 +238,7 @@ def compile( data_aware_proposer, tip_aware_proposer, fewshot_aware_proposer, + num_instruct_candidates=num_instruct_candidates, ) # If zero-shot, discard demos @@ -220,10 +260,6 @@ def compile( seed, ) - # Reset max demos - self.max_bootstrapped_demos = initial_max_bootstrapped_demos - self.max_labeled_demos = initial_max_labeled_demos - return best_program def _set_random_seeds(self, seed): @@ -242,13 +278,17 @@ def _set_num_trials_from_num_candidates(self, program, zeroshot_opt, num_candida def _set_hyperparams_from_run_mode( self, program: Any, - num_trials: int, + num_trials: int | None, minibatch: bool, zeroshot_opt: bool, valset: list, - ) -> tuple[int, list, bool]: + num_instruct_candidates: int | None, + num_fewshot_candidates: int | None, + ) -> tuple[int, list, bool, int, int]: if self.auto is None: - return num_trials, valset, minibatch + if num_instruct_candidates is None or num_fewshot_candidates is None: + raise ValueError("num_candidates must be provided when auto is None.") + return num_trials, valset, minibatch, num_instruct_candidates, num_fewshot_candidates auto_settings = AUTO_RUN_SETTINGS[self.auto] @@ -258,12 +298,12 @@ def _set_hyperparams_from_run_mode( # Set num instruct candidates to 1/2 of N if optimizing with few-shot examples, otherwise set to N # This is because we've found that it's generally better to spend optimization budget on few-shot examples # When they are allowed. - self.num_instruct_candidates = auto_settings["n"] if zeroshot_opt else int(auto_settings["n"] * 0.5) - self.num_fewshot_candidates = auto_settings["n"] + num_instruct_candidates = auto_settings["n"] if zeroshot_opt else int(auto_settings["n"] * 0.5) + num_fewshot_candidates = auto_settings["n"] num_trials = self._set_num_trials_from_num_candidates(program, zeroshot_opt, auto_settings["n"]) - return num_trials, valset, minibatch + return num_trials, valset, minibatch, num_instruct_candidates, num_fewshot_candidates def _set_and_validate_datasets(self, trainset: list, valset: list | None): if not trainset: @@ -282,13 +322,20 @@ def _set_and_validate_datasets(self, trainset: list, valset: list | None): return trainset, valset - def _print_auto_run_settings(self, num_trials: int, minibatch: bool, valset: list): + def _print_auto_run_settings( + self, + num_trials: int, + minibatch: bool, + valset: list, + num_fewshot_candidates: int, + num_instruct_candidates: int, + ): logger.info( f"\nRUNNING WITH THE FOLLOWING {self.auto.upper()} AUTO RUN SETTINGS:" f"\nnum_trials: {num_trials}" f"\nminibatch: {minibatch}" - f"\nnum_fewshot_candidates: {self.num_fewshot_candidates}" - f"\nnum_instruct_candidates: {self.num_instruct_candidates}" + f"\nnum_fewshot_candidates: {num_fewshot_candidates}" + f"\nnum_instruct_candidates: {num_instruct_candidates}" f"\nvalset size: {len(valset)}\n" ) @@ -301,18 +348,19 @@ def _estimate_lm_calls( minibatch_full_eval_steps: int, valset: list, program_aware_proposer: bool, + num_instruct_candidates: int, ) -> tuple[str, str]: num_predictors = len(program.predictors()) # Estimate prompt model calls estimated_prompt_model_calls = ( 10 # Data summarizer calls - + self.num_instruct_candidates * num_predictors # Candidate generation + + num_instruct_candidates * num_predictors # Candidate generation + (num_predictors + 1 if program_aware_proposer else 0) # Program-aware proposer ) prompt_model_line = ( f"{YELLOW}- Prompt Generation: {BLUE}{BOLD}10{ENDC}{YELLOW} data summarizer calls + " - f"{BLUE}{BOLD}{self.num_instruct_candidates}{ENDC}{YELLOW} * " + f"{BLUE}{BOLD}{num_instruct_candidates}{ENDC}{YELLOW} * " f"{BLUE}{BOLD}{num_predictors}{ENDC}{YELLOW} lm calls in program " f"+ ({BLUE}{BOLD}{num_predictors + 1}{ENDC}{YELLOW}) lm calls in program-aware proposer " f"= {BLUE}{BOLD}{estimated_prompt_model_calls}{ENDC}{YELLOW} prompt model calls{ENDC}" @@ -339,38 +387,48 @@ def _estimate_lm_calls( return prompt_model_line, task_model_line - def _bootstrap_fewshot_examples(self, program: Any, trainset: list, seed: int, teacher: Any) -> list | None: + def _bootstrap_fewshot_examples( + self, + program: Any, + trainset: list, + seed: int, + teacher: Any, + *, + num_fewshot_candidates: int, + max_bootstrapped_demos: int, + max_labeled_demos: int, + max_errors: int | None, + metric_threshold: float | None, + ) -> list | None: logger.info("\n==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==") - if self.max_bootstrapped_demos > 0: + if max_bootstrapped_demos > 0: logger.info( "These will be used as few-shot example candidates for our program and for creating instructions.\n" ) else: logger.info("These will be used for informing instruction proposal.\n") - logger.info(f"Bootstrapping N={self.num_fewshot_candidates} sets of demonstrations...") + logger.info(f"Bootstrapping N={num_fewshot_candidates} sets of demonstrations...") - zeroshot = self.max_bootstrapped_demos == 0 and self.max_labeled_demos == 0 + zeroshot = max_bootstrapped_demos == 0 and max_labeled_demos == 0 - # try: - effective_max_errors = ( - self.max_errors if self.max_errors is not None else dspy.settings.max_errors - ) + if max_errors is None: + max_errors = dspy.settings.max_errors demo_candidates = create_n_fewshot_demo_sets( student=program, - num_candidate_sets=self.num_fewshot_candidates, + num_candidate_sets=num_fewshot_candidates, trainset=trainset, - max_labeled_demos=(LABELED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else self.max_labeled_demos), + max_labeled_demos=(LABELED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else max_labeled_demos), max_bootstrapped_demos=( - BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else self.max_bootstrapped_demos + BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else max_bootstrapped_demos ), metric=self.metric, - max_errors=effective_max_errors, + max_errors=max_errors, teacher=teacher, teacher_settings=self.teacher_settings, seed=seed, - metric_threshold=self.metric_threshold, + metric_threshold=metric_threshold, rng=self.rng, ) # NOTE: Bootstrapping is essential to MIPRO! @@ -392,6 +450,7 @@ def _propose_instructions( data_aware_proposer: bool, tip_aware_proposer: bool, fewshot_aware_proposer: bool, + num_instruct_candidates: int, ) -> dict[int, list[str]]: logger.info("\n==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==") logger.info( @@ -416,12 +475,12 @@ def _propose_instructions( init_temperature=self.init_temperature, ) - logger.info(f"\nProposing N={self.num_instruct_candidates} instructions...\n") + logger.info(f"\nProposing N={num_instruct_candidates} instructions...\n") instruction_candidates = proposer.propose_instructions_for_program( trainset=trainset, program=program, demo_candidates=demo_candidates, - N=self.num_instruct_candidates, + N=num_instruct_candidates, trial_logs={}, ) From 5e5013384458a5d85123f0944a7dcf0a95b86ad9 Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Tue, 7 Oct 2025 09:38:25 -0400 Subject: [PATCH 5/5] Remove extra whitespace --- dspy/teleprompt/mipro_optimizer_v2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dspy/teleprompt/mipro_optimizer_v2.py b/dspy/teleprompt/mipro_optimizer_v2.py index bc00685556..c96d71d3f5 100644 --- a/dspy/teleprompt/mipro_optimizer_v2.py +++ b/dspy/teleprompt/mipro_optimizer_v2.py @@ -214,7 +214,6 @@ def compile( ) with dspy.context(lm=self.task_model): - # Step 1: Bootstrap few-shot examples demo_candidates = self._bootstrap_fewshot_examples( program,