Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 102 additions & 36 deletions dspy/teleprompt/mipro_optimizer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,15 @@ def compile(
if self.max_errors is not None
else dspy.settings.max_errors
)
zeroshot_opt = (self.max_bootstrapped_demos == 0) and (self.max_labeled_demos == 0)

effective_max_bootstrapped_demos = (
max_bootstrapped_demos if max_bootstrapped_demos is not None else self.max_bootstrapped_demos
)
effective_max_labeled_demos = (
max_labeled_demos if max_labeled_demos is not None else self.max_labeled_demos
)

zeroshot_opt = (effective_max_bootstrapped_demos == 0) and (effective_max_labeled_demos == 0)

# If auto is None, and num_trials is not provided (but num_candidates is), raise an error that suggests a good num_trials value
if self.auto is None and (self.num_candidates is not None and num_trials is None):
Expand All @@ -149,22 +157,46 @@ def compile(
seed = seed or self.seed
self._set_random_seeds(seed)

# Update max demos if specified
if max_bootstrapped_demos is not None:
self.max_bootstrapped_demos = max_bootstrapped_demos
if max_labeled_demos is not None:
self.max_labeled_demos = max_labeled_demos

# Set training & validation sets
trainset, valset = self._set_and_validate_datasets(trainset, valset)

num_instruct_candidates = (
self.num_instruct_candidates
if self.num_instruct_candidates is not None
else self.num_candidates
)
num_fewshot_candidates = (
self.num_fewshot_candidates
if self.num_fewshot_candidates is not None
else self.num_candidates
)

# Set hyperparameters based on run mode (if set)
num_trials, valset, minibatch = self._set_hyperparams_from_run_mode(
student, num_trials, minibatch, zeroshot_opt, valset
(
num_trials,
valset,
minibatch,
num_instruct_candidates,
num_fewshot_candidates,
) = self._set_hyperparams_from_run_mode(
student,
num_trials,
minibatch,
zeroshot_opt,
valset,
num_instruct_candidates,
num_fewshot_candidates,
)

if self.auto:
self._print_auto_run_settings(num_trials, minibatch, valset)
self._print_auto_run_settings(
num_trials,
minibatch,
valset,
num_fewshot_candidates,
num_instruct_candidates,
)

if minibatch and minibatch_size > len(valset):
raise ValueError(f"Minibatch size cannot exceed the size of the valset. Valset size: {len(valset)}.")
Expand All @@ -183,7 +215,17 @@ def compile(

with dspy.context(lm=self.task_model):
# Step 1: Bootstrap few-shot examples
demo_candidates = self._bootstrap_fewshot_examples(program, trainset, seed, teacher)
demo_candidates = self._bootstrap_fewshot_examples(
program,
trainset,
seed,
teacher,
num_fewshot_candidates=num_fewshot_candidates,
max_bootstrapped_demos=effective_max_bootstrapped_demos,
max_labeled_demos=effective_max_labeled_demos,
max_errors=effective_max_errors,
metric_threshold=self.metric_threshold,
)

# Step 2: Propose instruction candidates
instruction_candidates = self._propose_instructions(
Expand All @@ -195,6 +237,7 @@ def compile(
data_aware_proposer,
tip_aware_proposer,
fewshot_aware_proposer,
num_instruct_candidates=num_instruct_candidates,
)

# If zero-shot, discard demos
Expand Down Expand Up @@ -234,13 +277,17 @@ def _set_num_trials_from_num_candidates(self, program, zeroshot_opt, num_candida
def _set_hyperparams_from_run_mode(
self,
program: Any,
num_trials: int,
num_trials: int | None,
minibatch: bool,
zeroshot_opt: bool,
valset: list,
) -> tuple[int, list, bool]:
num_instruct_candidates: int | None,
num_fewshot_candidates: int | None,
) -> tuple[int, list, bool, int, int]:
if self.auto is None:
return num_trials, valset, minibatch
if num_instruct_candidates is None or num_fewshot_candidates is None:
raise ValueError("num_candidates must be provided when auto is None.")
return num_trials, valset, minibatch, num_instruct_candidates, num_fewshot_candidates

auto_settings = AUTO_RUN_SETTINGS[self.auto]

Expand All @@ -250,12 +297,12 @@ def _set_hyperparams_from_run_mode(
# Set num instruct candidates to 1/2 of N if optimizing with few-shot examples, otherwise set to N
# This is because we've found that it's generally better to spend optimization budget on few-shot examples
# When they are allowed.
self.num_instruct_candidates = auto_settings["n"] if zeroshot_opt else int(auto_settings["n"] * 0.5)
self.num_fewshot_candidates = auto_settings["n"]
num_instruct_candidates = auto_settings["n"] if zeroshot_opt else int(auto_settings["n"] * 0.5)
num_fewshot_candidates = auto_settings["n"]

num_trials = self._set_num_trials_from_num_candidates(program, zeroshot_opt, auto_settings["n"])

return num_trials, valset, minibatch
return num_trials, valset, minibatch, num_instruct_candidates, num_fewshot_candidates

def _set_and_validate_datasets(self, trainset: list, valset: list | None):
if not trainset:
Expand All @@ -274,13 +321,20 @@ def _set_and_validate_datasets(self, trainset: list, valset: list | None):

return trainset, valset

def _print_auto_run_settings(self, num_trials: int, minibatch: bool, valset: list):
def _print_auto_run_settings(
self,
num_trials: int,
minibatch: bool,
valset: list,
num_fewshot_candidates: int,
num_instruct_candidates: int,
):
logger.info(
f"\nRUNNING WITH THE FOLLOWING {self.auto.upper()} AUTO RUN SETTINGS:"
f"\nnum_trials: {num_trials}"
f"\nminibatch: {minibatch}"
f"\nnum_fewshot_candidates: {self.num_fewshot_candidates}"
f"\nnum_instruct_candidates: {self.num_instruct_candidates}"
f"\nnum_fewshot_candidates: {num_fewshot_candidates}"
f"\nnum_instruct_candidates: {num_instruct_candidates}"
f"\nvalset size: {len(valset)}\n"
)

Expand All @@ -293,18 +347,19 @@ def _estimate_lm_calls(
minibatch_full_eval_steps: int,
valset: list,
program_aware_proposer: bool,
num_instruct_candidates: int,
) -> tuple[str, str]:
num_predictors = len(program.predictors())

# Estimate prompt model calls
estimated_prompt_model_calls = (
10 # Data summarizer calls
+ self.num_instruct_candidates * num_predictors # Candidate generation
+ num_instruct_candidates * num_predictors # Candidate generation
+ (num_predictors + 1 if program_aware_proposer else 0) # Program-aware proposer
)
prompt_model_line = (
f"{YELLOW}- Prompt Generation: {BLUE}{BOLD}10{ENDC}{YELLOW} data summarizer calls + "
f"{BLUE}{BOLD}{self.num_instruct_candidates}{ENDC}{YELLOW} * "
f"{BLUE}{BOLD}{num_instruct_candidates}{ENDC}{YELLOW} * "
f"{BLUE}{BOLD}{num_predictors}{ENDC}{YELLOW} lm calls in program "
f"+ ({BLUE}{BOLD}{num_predictors + 1}{ENDC}{YELLOW}) lm calls in program-aware proposer "
f"= {BLUE}{BOLD}{estimated_prompt_model_calls}{ENDC}{YELLOW} prompt model calls{ENDC}"
Expand All @@ -331,38 +386,48 @@ def _estimate_lm_calls(

return prompt_model_line, task_model_line

def _bootstrap_fewshot_examples(self, program: Any, trainset: list, seed: int, teacher: Any) -> list | None:
def _bootstrap_fewshot_examples(
self,
program: Any,
trainset: list,
seed: int,
teacher: Any,
*,
num_fewshot_candidates: int,
max_bootstrapped_demos: int,
max_labeled_demos: int,
max_errors: int | None,
metric_threshold: float | None,
) -> list | None:
logger.info("\n==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==")
if self.max_bootstrapped_demos > 0:
if max_bootstrapped_demos > 0:
logger.info(
"These will be used as few-shot example candidates for our program and for creating instructions.\n"
)
else:
logger.info("These will be used for informing instruction proposal.\n")

logger.info(f"Bootstrapping N={self.num_fewshot_candidates} sets of demonstrations...")
logger.info(f"Bootstrapping N={num_fewshot_candidates} sets of demonstrations...")

zeroshot = self.max_bootstrapped_demos == 0 and self.max_labeled_demos == 0
zeroshot = max_bootstrapped_demos == 0 and max_labeled_demos == 0

# try:
effective_max_errors = (
self.max_errors if self.max_errors is not None else dspy.settings.max_errors
)
if max_errors is None:
max_errors = dspy.settings.max_errors

demo_candidates = create_n_fewshot_demo_sets(
student=program,
num_candidate_sets=self.num_fewshot_candidates,
num_candidate_sets=num_fewshot_candidates,
trainset=trainset,
max_labeled_demos=(LABELED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else self.max_labeled_demos),
max_labeled_demos=(LABELED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else max_labeled_demos),
max_bootstrapped_demos=(
BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else self.max_bootstrapped_demos
BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT if zeroshot else max_bootstrapped_demos
),
metric=self.metric,
max_errors=effective_max_errors,
max_errors=max_errors,
teacher=teacher,
teacher_settings=self.teacher_settings,
seed=seed,
metric_threshold=self.metric_threshold,
metric_threshold=metric_threshold,
rng=self.rng,
)
# NOTE: Bootstrapping is essential to MIPRO!
Expand All @@ -384,6 +449,7 @@ def _propose_instructions(
data_aware_proposer: bool,
tip_aware_proposer: bool,
fewshot_aware_proposer: bool,
num_instruct_candidates: int,
) -> dict[int, list[str]]:
logger.info("\n==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==")
logger.info(
Expand All @@ -408,12 +474,12 @@ def _propose_instructions(
init_temperature=self.init_temperature,
)

logger.info(f"\nProposing N={self.num_instruct_candidates} instructions...\n")
logger.info(f"\nProposing N={num_instruct_candidates} instructions...\n")
instruction_candidates = proposer.propose_instructions_for_program(
trainset=trainset,
program=program,
demo_candidates=demo_candidates,
N=self.num_instruct_candidates,
N=num_instruct_candidates,
trial_logs={},
)

Expand Down