In [3]:
import argparse
import json
import random
import textwrap
from pathlib import Path

import dspy
from dspy.teleprompt import LabeledFewShot
from dspy.evaluate import Evaluate
from ccf.pylib.lm_data import PROMPT, Instance, TraitExtractor, score_prediction

from pprint import pp

In [9]:
args = argparse.Namespace(
    examples=Path("../data/fna/Asteraceae_examples_2025-03-17a.json"),
    model="ollama_chat/gemma3:27b",
    api_base="http://localhost:11434",
    api_key="",
    train_split=0.1,
    dev_split=0.5,
    test_split=0.4,
    seed=2203673,
)

In [10]:
random.seed(args.seed)

with args.examples.open() as f:
    example_data = json.load(f)

In [11]:
def example_splits(instances: list[Instance], train_split, dev_split):
    examples = [
        dspy.Example(text=i.text, prompt=PROMPT, traits=i.traits).with_inputs(
            "text", "prompt"
        )
        for i in instances
    ]
    random.shuffle(examples)

    total = len(examples)
    split1 = round(total * train_split)
    split2 = split1 + round(total * dev_split)

    dataset = {
        "train": examples[:split1],
        "dev": examples[split1:split2],
        "test": examples[split2:],
    }

    return dataset

In [12]:
instances = [Instance.dict_to_instance(d) for d in example_data]
dataset = example_splits(instances, args.train_split, args.dev_split)

lm = dspy.LM(args.model, api_base=args.api_base, api_key=args.api_key)
dspy.configure(lm=lm)

trait_extractor = dspy.Predict(TraitExtractor)

evaluator = Evaluate(
    devset=dataset["dev"],
    metric=score_prediction,
    num_threads=1,
    display_progress=True,
    display_table=True,
)

evaluator(trait_extractor, devset=dataset["dev"])

Average Metric: 180.56 / 203 (88.9%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 203/203 [00:02<00:00, 68.07it/s]


2025/03/18 11:10:40 INFO dspy.evaluate.evaluate: Average Metric: 180.56017689206732 / 203 (88.9%)


Unnamed: 0,text,prompt,example_traits,pred_traits,score_prediction
0,"<b>Perennials,</b> 15–25 cm. <b>Stems</b> erect, puberulent to his...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='15–25 cm', leaf_shape='elliptic-lanceolate to...","Traits(plant_height='15–25 cm', leaf_shape='elliptic-lanceolate to...",✔️ [1.000]
1,"<b>Annuals,</b> 30–150(–400+) cm. <b>Stems</b> erect. <b>Leaves</b...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='30–150(–400+) cm', leaf_shape='rounded-deltat...","Traits(plant_height='30–150(–400+) cm', leaf_shape='rounded-deltat...",✔️ [0.929]
2,<b>Plants </b>10–80(–200) cm; nodal spines 0. <b>Leaves</b>: petio...,"What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='10–80(–200) cm', leaf_shape='suborbiculate to...","Traits(plant_height='10–80(–200) cm', leaf_shape='suborbiculate to...",✔️ [0.857]
3,"<b>Biennials </b>or perennials, 60–150(–200) cm; crown sprouts fro...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='60–150(–200) cm', leaf_shape='linear or linea...","Traits(plant_height='60–150(–200) cm', leaf_shape='linear or linea...",✔️ [0.752]
4,"<b>Annuals,</b> to 60 cm. <b>Stems</b> often spreading or prostrat...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='60 cm', leaf_shape='tapering to winged petiol...","Traits(plant_height='to 60 cm', leaf_shape='lanceolate to oblanceo...",✔️ [0.954]
...,...,...,...,...,...
198,"<b>Perennials,</b> 10–60 cm; taproots vertical, thick, fibrous, ca...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='10–60 cm', leaf_shape='oblanceolate, lyrate',...","Traits(plant_height='10–60 cm', leaf_shape='oblanceolate, lyrate t...",✔️ [0.927]
199,"<b>Perennials,</b> 10–60(–100+) cm. <b>Stems</b> erect. <b>Leaves<...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='10–60(–100+) cm', leaf_shape='deltate to lanc...","Traits(plant_height='10–60(–100+) cm', leaf_shape='deltate to lanc...",✔️ [0.929]
200,"<b>Perennials,</b> 50–300 cm (rhizomes often elongate, slender, pl...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='50–300 cm', leaf_shape='ovate to lanceolate',...","Traits(plant_height='50–300 cm', leaf_shape='broadly ovate to lanc...",✔️ [0.898]
201,<b>Plants </b>50–200 cm; rhizomes short- to long-creeping. <b>Stem...,"What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='50–200 cm', leaf_shape='tapering to bases; bl...","Traits(plant_height='50–200 cm', leaf_shape='oblanceolate (basal/m...",✔️ [0.709]


88.95

In [13]:
mipro_optimizer = dspy.MIPROv2(
    metric=score_prediction,
    auto="medium",
)

optimized_trait_extractor = mipro_optimizer.compile(
    trait_extractor,
    trainset=dataset["train"],
    max_bootstrapped_demos=4,
    requires_permission_to_run=False,
    minibatch=False
)

2025/03/18 11:14:57 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING MEDIUM AUTO RUN SETTINGS:
num_trials: 25
minibatch: False
num_candidates: 19
valset size: 32

2025/03/18 11:14:57 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/03/18 11:14:57 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/03/18 11:14:57 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=19 sets of demonstrations...


Bootstrapping set 1/19
Bootstrapping set 2/19
Bootstrapping set 3/19


 44%|████████████████████████████████████████████████████████████████▉                                                                                 | 4/9 [00:31<00:38,  7.78s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 4/19


 44%|████████████████████████████████████████████████████████████████▉                                                                                 | 4/9 [00:26<00:32,  6.56s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 5/19


 44%|████████████████████████████████████████████████████████████████▉                                                                                 | 4/9 [00:25<00:31,  6.39s/it]


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 6/19


 11%|████████████████▏                                                                                                                                 | 1/9 [00:06<00:52,  6.58s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/19


 11%|████████████████▏                                                                                                                                 | 1/9 [00:07<00:57,  7.21s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 8/19


 22%|████████████████████████████████▍                                                                                                                 | 2/9 [00:12<00:43,  6.27s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 9/19


 11%|████████████████▏                                                                                                                                 | 1/9 [00:07<01:00,  7.57s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 10/19


 11%|████████████████▏                                                                                                                                 | 1/9 [00:06<00:52,  6.51s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 11/19


 22%|████████████████████████████████▍                                                                                                                 | 2/9 [00:15<00:52,  7.54s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 12/19


 22%|████████████████████████████████▍                                                                                                                 | 2/9 [00:12<00:44,  6.37s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 13/19


 11%|████████████████▏                                                                                                                                 | 1/9 [00:06<00:48,  6.10s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 14/19


 22%|████████████████████████████████▍                                                                                                                 | 2/9 [00:11<00:41,  5.91s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 15/19


 33%|████████████████████████████████████████████████▋                                                                                                 | 3/9 [00:19<00:39,  6.53s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 16/19


 33%|████████████████████████████████████████████████▋                                                                                                 | 3/9 [00:18<00:37,  6.21s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 17/19


 22%|████████████████████████████████▍                                                                                                                 | 2/9 [00:13<00:46,  6.71s/it]


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Bootstrapping set 18/19


 33%|████████████████████████████████████████████████▋                                                                                                 | 3/9 [00:19<00:38,  6.46s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 19/19


 44%|████████████████████████████████████████████████████████████████▉                                                                                 | 4/9 [00:27<00:34,  6.95s/it]
2025/03/18 11:19:25 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/03/18 11:19:25 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.


2025/03/18 11:19:38 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2025/03/18 11:34:14 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/03/18 11:34:14 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Analyze species descriptions and extract trait information.

2025/03/18 11:34:14 INFO dspy.teleprompt.mipro_optimizer_v2: 1: You are an expert botanist tasked with extracting specific trait information from species descriptions. Your goal is to meticulously analyze the provided text and populate a `Traits` object with the requested data. If a particular trait is not explicitly mentioned in the text, leave the corresponding field in the `Traits` object empty. Pay close attention to units and ranges when recording measurements.

Here's a breakdown of the fields in the `Traits` object:

*   `plant_height`: The height of the plant, including any specified range.
*   `leaf_shape`: The shape of the leaves.
*   `leaf_length`: The length of the

Average Metric: 28.10 / 32 (87.8%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:36<00:00,  4.88s/it]

2025/03/18 11:36:51 INFO dspy.evaluate.evaluate: Average Metric: 28.100887200701536 / 32 (87.8%)
2025/03/18 11:36:51 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 87.82

2025/03/18 11:36:51 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 25 =====



Average Metric: 30.20 / 32 (94.4%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:02<00:00,  5.71s/it]

2025/03/18 11:39:54 INFO dspy.evaluate.evaluate: Average Metric: 30.203338324871112 / 32 (94.4%)
2025/03/18 11:39:54 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 94.39
2025/03/18 11:39:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 94.39 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 7'].
2025/03/18 11:39:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39]
2025/03/18 11:39:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 11:39:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 25 =====



Average Metric: 29.69 / 32 (92.8%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:11<00:00,  5.99s/it]

2025/03/18 11:43:06 INFO dspy.evaluate.evaluate: Average Metric: 29.686686021948557 / 32 (92.8%)
2025/03/18 11:43:06 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.77 with parameters ['Predictor 0: Instruction 10', 'Predictor 0: Few-Shot Set 7'].
2025/03/18 11:43:06 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77]
2025/03/18 11:43:06 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 11:43:06 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 25 =====



Average Metric: 29.53 / 32 (92.3%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:57<00:00,  5.54s/it]

2025/03/18 11:46:03 INFO dspy.evaluate.evaluate: Average Metric: 29.525926340721323 / 32 (92.3%)
2025/03/18 11:46:03 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.27 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 18'].
2025/03/18 11:46:03 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27]
2025/03/18 11:46:03 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 11:46:03 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 25 =====



Average Metric: 28.85 / 32 (90.1%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:27<00:00,  6.49s/it]

2025/03/18 11:49:31 INFO dspy.evaluate.evaluate: Average Metric: 28.845821696908036 / 32 (90.1%)
2025/03/18 11:49:31 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 90.14 with parameters ['Predictor 0: Instruction 15', 'Predictor 0: Few-Shot Set 2'].
2025/03/18 11:49:31 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14]
2025/03/18 11:49:31 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 11:49:31 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 25 =====



Average Metric: 30.02 / 32 (93.8%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:11<00:00,  5.99s/it]

2025/03/18 11:52:43 INFO dspy.evaluate.evaluate: Average Metric: 30.01970825759037 / 32 (93.8%)
2025/03/18 11:52:43 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 93.81 with parameters ['Predictor 0: Instruction 8', 'Predictor 0: Few-Shot Set 18'].
2025/03/18 11:52:43 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81]
2025/03/18 11:52:43 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 11:52:43 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 25 =====



Average Metric: 30.07 / 32 (94.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:13<00:00,  6.03s/it]

2025/03/18 11:55:56 INFO dspy.evaluate.evaluate: Average Metric: 30.066068677772112 / 32 (94.0%)
2025/03/18 11:55:56 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 93.96 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 1'].
2025/03/18 11:55:56 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96]
2025/03/18 11:55:56 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 11:55:56 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 25 =====



Average Metric: 29.55 / 32 (92.3%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:44<00:00,  5.13s/it]

2025/03/18 11:58:40 INFO dspy.evaluate.evaluate: Average Metric: 29.54738068070874 / 32 (92.3%)
2025/03/18 11:58:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.34 with parameters ['Predictor 0: Instruction 7', 'Predictor 0: Few-Shot Set 12'].
2025/03/18 11:58:40 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34]
2025/03/18 11:58:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 11:58:40 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 25 =====



Average Metric: 28.78 / 32 (89.9%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:03<00:00,  5.74s/it]

2025/03/18 12:01:44 INFO dspy.evaluate.evaluate: Average Metric: 28.78181255008578 / 32 (89.9%)
2025/03/18 12:01:44 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 89.94 with parameters ['Predictor 0: Instruction 11', 'Predictor 0: Few-Shot Set 13'].
2025/03/18 12:01:44 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94]
2025/03/18 12:01:44 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 12:01:44 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 25 =====



Average Metric: 30.03 / 32 (93.8%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [02:54<00:00,  5.46s/it]

2025/03/18 12:04:39 INFO dspy.evaluate.evaluate: Average Metric: 30.027591316917025 / 32 (93.8%)
2025/03/18 12:04:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 93.84 with parameters ['Predictor 0: Instruction 5', 'Predictor 0: Few-Shot Set 4'].
2025/03/18 12:04:39 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84]
2025/03/18 12:04:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 94.39


2025/03/18 12:04:39 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 25 =====



Average Metric: 30.40 / 32 (95.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:15<00:00,  6.11s/it]

2025/03/18 12:07:54 INFO dspy.evaluate.evaluate: Average Metric: 30.40097623223648 / 32 (95.0%)
2025/03/18 12:07:54 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 95.0
2025/03/18 12:07:54 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 95.0 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:07:54 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0]
2025/03/18 12:07:54 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:07:54 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 12 / 25 =====



Average Metric: 30.40 / 32 (95.0%): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 3076.41it/s]

2025/03/18 12:07:55 INFO dspy.evaluate.evaluate: Average Metric: 30.40097623223648 / 32 (95.0%)
2025/03/18 12:07:55 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 95.0 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:07:55 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0]
2025/03/18 12:07:55 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:07:55 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 25 =====



Average Metric: 29.38 / 32 (91.8%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:17<00:00,  6.18s/it]

2025/03/18 12:11:13 INFO dspy.evaluate.evaluate: Average Metric: 29.376497569512733 / 32 (91.8%)
2025/03/18 12:11:13 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 91.8 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 9'].
2025/03/18 12:11:13 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8]
2025/03/18 12:11:13 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:11:13 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 14 / 25 =====



Average Metric: 30.40 / 32 (95.0%): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 1510.41it/s]

2025/03/18 12:11:13 INFO dspy.evaluate.evaluate: Average Metric: 30.40097623223648 / 32 (95.0%)
2025/03/18 12:11:13 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 95.0 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:11:13 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0]
2025/03/18 12:11:13 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:11:13 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 15 / 25 =====



Average Metric: 30.02 / 32 (93.8%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:21<00:00,  6.30s/it]

2025/03/18 12:14:35 INFO dspy.evaluate.evaluate: Average Metric: 30.015725956505605 / 32 (93.8%)
2025/03/18 12:14:35 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 93.8 with parameters ['Predictor 0: Instruction 4', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:14:35 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8]
2025/03/18 12:14:35 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:14:35 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 16 / 25 =====



Average Metric: 30.15 / 32 (94.2%): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:15<00:00,  6.12s/it]

2025/03/18 12:17:51 INFO dspy.evaluate.evaluate: Average Metric: 30.15113896293488 / 32 (94.2%)
2025/03/18 12:17:51 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 94.22 with parameters ['Predictor 0: Instruction 13', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:17:51 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22]
2025/03/18 12:17:51 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:17:51 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 17 / 25 =====



Average Metric: 30.06 / 32 (93.9%): 100%|██████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:01<00:00,  5.66s/it]                  

2025/03/18 12:20:52 INFO dspy.evaluate.evaluate: Average Metric: 30.05887605992618 / 32 (93.9%)
2025/03/18 12:20:52 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 93.93 with parameters ['Predictor 0: Instruction 9', 'Predictor 0: Few-Shot Set 16'].
2025/03/18 12:20:52 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93]
2025/03/18 12:20:52 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:20:52 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 18 / 25 =====



Average Metric: 29.65 / 32 (92.6%): 100%|██████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:09<00:00,  5.91s/it]

2025/03/18 12:24:01 INFO dspy.evaluate.evaluate: Average Metric: 29.646520519591874 / 32 (92.6%)
2025/03/18 12:24:01 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.65 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 6'].
2025/03/18 12:24:01 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93, 92.65]
2025/03/18 12:24:01 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:24:01 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 19 / 25 =====



Average Metric: 30.37 / 32 (94.9%): 100%|██████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:14<00:00,  6.09s/it]

2025/03/18 12:27:16 INFO dspy.evaluate.evaluate: Average Metric: 30.372841780573538 / 32 (94.9%)
2025/03/18 12:27:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 94.92 with parameters ['Predictor 0: Instruction 18', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:27:16 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93, 92.65, 94.92]
2025/03/18 12:27:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:27:16 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 20 / 25 =====



Average Metric: 30.07 / 32 (94.0%): 100%|██████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:18<00:00,  6.19s/it]

2025/03/18 12:30:34 INFO dspy.evaluate.evaluate: Average Metric: 30.070251253419308 / 32 (94.0%)
2025/03/18 12:30:34 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 93.97 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 17'].
2025/03/18 12:30:34 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93, 92.65, 94.92, 93.97]
2025/03/18 12:30:34 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:30:34 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 21 / 25 =====



Average Metric: 28.65 / 32 (89.5%): 100%|██████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:15<00:00,  6.10s/it]

2025/03/18 12:33:49 INFO dspy.evaluate.evaluate: Average Metric: 28.654356596155594 / 32 (89.5%)
2025/03/18 12:33:49 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 89.54 with parameters ['Predictor 0: Instruction 16', 'Predictor 0: Few-Shot Set 15'].
2025/03/18 12:33:50 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93, 92.65, 94.92, 93.97, 89.54]
2025/03/18 12:33:50 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:33:50 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 22 / 25 =====



Average Metric: 29.60 / 32 (92.5%): 100%|██████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:25<00:00,  6.42s/it]

2025/03/18 12:37:15 INFO dspy.evaluate.evaluate: Average Metric: 29.59675166556525 / 32 (92.5%)
2025/03/18 12:37:15 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 92.49 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:37:15 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93, 92.65, 94.92, 93.97, 89.54, 92.49]
2025/03/18 12:37:15 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:37:15 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 23 / 25 =====



Average Metric: 30.40 / 32 (95.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 1442.38it/s]

2025/03/18 12:37:16 INFO dspy.evaluate.evaluate: Average Metric: 30.40097623223648 / 32 (95.0%)
2025/03/18 12:37:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 95.0 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:37:16 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93, 92.65, 94.92, 93.97, 89.54, 92.49, 95.0]
2025/03/18 12:37:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:37:16 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 24 / 25 =====



Average Metric: 29.98 / 32 (93.7%): 100%|██████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:00<00:00,  5.63s/it]

2025/03/18 12:40:16 INFO dspy.evaluate.evaluate: Average Metric: 29.983525628326902 / 32 (93.7%)
2025/03/18 12:40:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 93.7 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 14'].
2025/03/18 12:40:16 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93, 92.65, 94.92, 93.97, 89.54, 92.49, 95.0, 93.7]
2025/03/18 12:40:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:40:16 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 25 / 25 =====



Average Metric: 30.40 / 32 (95.0%): 100%|████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 3306.67it/s]

2025/03/18 12:40:16 INFO dspy.evaluate.evaluate: Average Metric: 30.40097623223648 / 32 (95.0%)
2025/03/18 12:40:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 95.0 with parameters ['Predictor 0: Instruction 12', 'Predictor 0: Few-Shot Set 3'].
2025/03/18 12:40:16 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [87.82, 94.39, 92.77, 92.27, 90.14, 93.81, 93.96, 92.34, 89.94, 93.84, 95.0, 95.0, 91.8, 95.0, 93.8, 94.22, 93.93, 92.65, 94.92, 93.97, 89.54, 92.49, 95.0, 93.7, 95.0]
2025/03/18 12:40:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 95.0


2025/03/18 12:40:16 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 95.0!





In [14]:
evaluator(optimized_trait_extractor, devset=dataset["dev"])

Average Metric: 193.20 / 203 (95.2%): 100%|██████████████████████████████████████████████████████████████████████████████████████| 203/203 [20:17<00:00,  6.00s/it]

2025/03/18 13:16:01 INFO dspy.evaluate.evaluate: Average Metric: 193.19519160250113 / 203 (95.2%)





Unnamed: 0,text,prompt,example_traits,pred_traits,score_prediction
0,"<b>Perennials,</b> 15–25 cm. <b>Stems</b> erect, puberulent to his...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='15–25 cm', leaf_shape='elliptic-lanceolate to...","Traits(plant_height='15–25 cm', leaf_shape='elliptic-lanceolate to...",✔️ [0.857]
1,"<b>Annuals,</b> 30–150(–400+) cm. <b>Stems</b> erect. <b>Leaves</b...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='30–150(–400+) cm', leaf_shape='rounded-deltat...","Traits(plant_height='30–150(–400+) cm', leaf_shape='rounded-deltat...",✔️ [0.857]
2,<b>Plants </b>10–80(–200) cm; nodal spines 0. <b>Leaves</b>: petio...,"What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='10–80(–200) cm', leaf_shape='suborbiculate to...","Traits(plant_height='10–80(–200) cm', leaf_shape='suborbiculate to...",✔️ [0.929]
3,"<b>Biennials </b>or perennials, 60–150(–200) cm; crown sprouts fro...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='60–150(–200) cm', leaf_shape='linear or linea...","Traits(plant_height='60–150(–200) cm', leaf_shape='linear or linea...",✔️ [0.837]
4,"<b>Annuals,</b> to 60 cm. <b>Stems</b> often spreading or prostrat...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='60 cm', leaf_shape='tapering to winged petiol...","Traits(plant_height='to 60 cm', leaf_shape='lanceolate to oblanceo...",✔️ [0.954]
...,...,...,...,...,...
198,"<b>Perennials,</b> 10–60 cm; taproots vertical, thick, fibrous, ca...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='10–60 cm', leaf_shape='oblanceolate, lyrate',...","Traits(plant_height='10–60 cm', leaf_shape='oblanceolate', leaf_le...",✔️ [0.968]
199,"<b>Perennials,</b> 10–60(–100+) cm. <b>Stems</b> erect. <b>Leaves<...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='10–60(–100+) cm', leaf_shape='deltate to lanc...","Traits(plant_height='10–60(–100+) cm', leaf_shape='deltate to lanc...",✔️ [0.857]
200,"<b>Perennials,</b> 50–300 cm (rhizomes often elongate, slender, pl...","What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='50–300 cm', leaf_shape='ovate to lanceolate',...","Traits(plant_height='50–300 cm', leaf_shape='broadly ovate to lanc...",✔️ [0.898]
201,<b>Plants </b>50–200 cm; rhizomes short- to long-creeping. <b>Stem...,"What is the plant size, leaf shape, leaf length, leaf width, leaf ...","Traits(plant_height='50–200 cm', leaf_shape='tapering to bases; bl...","Traits(plant_height='50–200 cm', leaf_shape='oblanceolate', leaf_l...",✔️ [0.891]


95.17

In [15]:
dspy.inspect_history(n=1)





[34m[2025-03-18T13:16:01.896011][0m

[31mSystem message:[0m

Your input fields are:
1. `text` (str): the species description text
2. `prompt` (str): extract these traits

Your output fields are:
1. `traits` (Traits): the extracted traits

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## text ## ]]
{text}

[[ ## prompt ## ]]
{prompt}

[[ ## traits ## ]]
{traits}        # note: the value you produce must adhere to the JSON schema: {"type": "object", "properties": {"deciduousness": {"type": "string", "default": "", "title": "Deciduousness"}, "elevation": {"type": "string", "default": "", "title": "Elevation"}, "fruit_length": {"type": "string", "default": "", "title": "Fruit Length"}, "fruit_type": {"type": "string", "default": "", "title": "Fruit Type"}, "fruit_width": {"type": "string", "default": "", "title": "Fruit Width"}, "habitat": {"type": "string", "default": "", "title": "Habitat"}, "leaf_length": {"type": "string", 

In [17]:
optimized_trait_extractor.save("../data/fna/optimized_trait_extractor_2025-03-18a.pkl")

In [18]:
import pandas as pd

In [19]:
obj = pd.read_pickle("../data/fna/optimized_trait_extractor_2025-03-18a.pkl")

In [20]:
obj

{'lm': None,
 'traces': [],
 'train': [],
 'demos': [Example({'augmented': True, 'text': '<b>Annuals </b>or perennials, 10–70(–100) cm, herbaceous to suffrutescent. <b>Leaf</b> blades oblong to oblanceolate, mid-cauline (2.5–)4–13 mm wide. <b>Heads</b> essentially sessile, often surpassed by distal leaves that continue to very base of heads. <b>Involucres</b> (6–)10–15 × 12–30 mm. <b>Phyllaries</b> in 3–4 series, loose, subequal, apices erect to spreading, 1.3–1.7 mm wide, herbaceous. <b>Ray</b> florets 17–38; corollas 8.5–19 mm. <b>Disc</b> florets: corolla tubes longer than limbs. <b>2n</b> = 12.\nPhenology: Flowering (Jul–)Sep–Dec (sporadically Feb–Jun).\nHabitat: Sandy sites, along waterways, canal banks, dunes, beaches, salt flats\nElevation: 0–10 m', 'prompt': '\n    What is the plant size,\n    leaf shape, leaf length, leaf width, leaf thickness,\n    seed length, seed width,\n    fruit type, fruit length, fruit width,\n    deciduousness, phenology, habitat, elevation?\n    If i

In [21]:
optimized_trait_extractor.save("../data/fna/optimized_trait_extractor_2025-03-18a.json")

RuntimeError: Failed to save state to ../data/fna/optimized_trait_extractor_2025-03-18a.json with error: Traits(plant_height='10–70(–100) cm', leaf_shape='oblong to oblanceolate', leaf_length='', leaf_width='4–13 mm', leaf_thickness='', fruit_type='', fruit_length='', fruit_width='', seed_length='', seed_width='', deciduousness='', phenology='Flowering (Jul–)Sep–Dec (sporadically Feb–Jun)', habitat='Sandy sites, along waterways, canal banks, dunes, beaches, salt flats', elevation='0–10 m') is not JSON serializable. Your DSPy program may contain non json-serializable objects, please consider saving the state in .pkl by using `path` ending with `.pkl`, or saving the whole program by setting `save_program=True`.