In [1]:
import argparse
import random
from pathlib import Path

import dspy
from dspy.evaluate import Evaluate

from ccf.pylib import trait_extractor as te

In [2]:
args = argparse.Namespace(
    examples_json=Path("../data/fna/Asteraceae_examples_2025-03-19c.json"),
    model="ollama_chat/gemma3:27b",
    api_base="http://localhost:11434",
    api_key="",
    train_split=0.1,
    dev_split=0.5,
    test_split=0.4,
    seed=2203673,
)

In [3]:
random.seed(args.seed)

In [4]:
examples = te.read_examples(args.examples_json)
dataset = te.split_examples(examples, args.train_split, args.dev_split)

lm = dspy.LM(args.model, api_base=args.api_base, api_key=args.api_key, cache=False)
dspy.configure(lm=lm)

trait_extractor = dspy.Predict(te.TraitExtractor)

In [7]:
evaluator = Evaluate(
    devset=dataset["dev"],
    metric=te.score_prediction,
    num_threads=1,
    display_progress=True,
    display_table=True, 
    provide_traceback=True,
)

In [8]:
evaluator(trait_extractor, devset=dataset["dev"])

  0%|                                                                                                                                                                                  | 0/203 [00:00<?, ?it/s]

2025/03/19 14:16:45 ERROR dspy.utils.parallelizer: Error processing item Example({'text': '<b>Perennials,</b> 15–25 cm. <b>Stems</b> erect, puberulent to hispidulous. <b>Petioles</b> 5–12 mm. <b>Leaf</b> blades (3-nerved) elliptic-lanceolate to ovate-lanceolate or deltate-ovate, mostly 1.5–4 × 0.7–2.2 cm, margins shallowly dentate to serrate. <b>Heads</b> usually in clusters of 2–6. <b>Involucres</b> cylindric, 5.5–7.5(–8) mm. <b>Phyllaries</b> in 4–6 series, apices of the inner appressed, rounded (not petaloid or expanded). <b>Corollas</b> blue to lavender.\nPhenology: Flowering mostly Aug–Nov, sometimes year round.\nHabitat: Coastal rock barrens, edges of rockland hammocks, undisturbed sites\nElevation: 0–10 m', 'family': 'Asteraceae', 'taxon': 'Chromolaena frustrata', 'prompt': '\n    What is the plant size,\n    leaf shape, leaf length, leaf width, leaf thickness,\n    seed length, seed width,\n    fruit type, fruit length, fruit width,\n    deciduousness, phenology, habitat, eleva

Average Metric: 0.00 / 203 (0.0%):   0%|▋                                                                                                                                      | 1/203 [00:06<20:56,  6.22s/it]

KeyboardInterrupt: 

In [None]:
mipro_optimizer = dspy.MIPROv2(
    metric=te.score_prediction,
    auto="medium",
)

optimized_trait_extractor = mipro_optimizer.compile(
    trait_extractor,
    trainset=dataset["train"],
    max_bootstrapped_demos=4,
    requires_permission_to_run=False,
    minibatch=False
)

In [None]:
evaluator(optimized_trait_extractor, devset=dataset["dev"])

In [None]:
dspy.inspect_history(n=1)

In [None]:
optimized_trait_extractor.save("../data/fna/optimized_trait_extractor_2025-03-18a.json")