In [1]:
import pandas as pd

df = pd.read_csv("data/personal_transactions.csv")
df

Unnamed: 0,Date,Description,Amount,Transaction Type,category,Account Name,pass
0,03/16/2018,Biweekly Paycheck,2000.00,credit,Paycheck,Checking,True
1,03/17/2018,Brewing Company,19.50,debit,Alcohol & Bars,Silver Card,
2,03/17/2018,Pizza Place,23.34,debit,Fast Food,Platinum Card,
3,03/19/2018,Mediterranean Restaurant,36.48,debit,Restaurants,Silver Card,
4,03/19/2018,City Water Charges,35.00,debit,Utilities,Checking,
...,...,...,...,...,...,...,...
801,07/19/2019,Mexican Restaurant,28.00,debit,Restaurants,Platinum Card,
802,07/22/2019,Credit Card Payment,257.08,credit,Credit Card Payment,Silver Card,
803,07/22/2019,Thai Restaurant,26.67,debit,Restaurants,Silver Card,
804,07/23/2019,Credit Card Payment,257.08,debit,Credit Card Payment,Checking,


In [2]:
import sys

sys.path.append("..")

from langwatch_nlp.studio.dspy.patched_boostrap_few_shot import ExampleWithEntryMap

trainset = [
    ExampleWithEntryMap(
        _index=index, description=x["Description"], category=x["category"]
    ).with_inputs("description", "category")
    for (index, x) in df[0:20].iterrows()
]
testset = [
    ExampleWithEntryMap(
        _index=index, description=x["Description"], category=x["category"]
    ).with_inputs("description", "category")
    for (index, x) in df[20:40].iterrows()
]

trainset_no_category_input = [
    ExampleWithEntryMap(
        _index=index, description=x["Description"], category=x["category"]
    ).with_inputs("description")
    for (index, x) in df[0:20].iterrows()
]
testset_no_category_input = [
    ExampleWithEntryMap(
        _index=index, description=x["Description"], category=x["category"]
    ).with_inputs("description")
    for (index, x) in df[20:40].iterrows()
]

trainset[0]

  from .autonotebook import tqdm as notebook_tqdm


Example({'_index': 0, 'description': 'Biweekly Paycheck', 'category': 'Paycheck'}) (input_keys={'category', 'description'})

In [3]:
import langwatch

langwatch.login()

LangWatch API key is already set, if you want to login again, please call as langwatch.login(relogin=True)


In [4]:
import dspy
import sys
import os

from langwatch.dspy import LangWatchTrackedEvaluate

from langwatch_nlp.studio.modules.evaluators.langwatch import LangWatchEvaluator

sys.path.append("..")

from langwatch_nlp.studio.modules.evaluators.exact_match import ExactMatchEvaluator
from langwatch_nlp.studio.dspy import (
    LLMNode,
    LangWatchWorkflowModule,
    PredictionWithEvaluationAndMetadata,
    TemplateAdapter,
)
import langwatch_nlp.studio.dspy.patched_caching

# import langwatch_nlp.studio.dspy.patched_boostrap_few_shot
import langwatch_nlp.studio.dspy.patched_optional_image


class ClassifyTransactionSignature(dspy.Signature):
    """"""

    _messages = [{"role": "user", "content": "{{description}}"}]
    # _messages = []

    description: str = dspy.InputField()
    category: str = dspy.OutputField()


class ClassifyTransaction(LLMNode):
    def __init__(self):
        predict = dspy.Predict(ClassifyTransactionSignature)

        lm = dspy.LM("openai/gpt-4o-mini")
        demos = [
            # {"category": "Utilities", "description": "Power Company"},
            # {"category": "Credit Card Payment", "description": "Credit Card Payment"},
            # {"category": "Groceries", "description": "Grocery Store"},
            # {"category": "Credit Card Payment", "description": "Credit Card Payment"},
            # {"category": "Credit Card Payment", "description": "Credit Card Payment"},
            # {"category": "Utilities", "description": "Gas Company"},
            # {"category": "Electronics & Software", "description": "Best Buy"},
            # {"category": "Restaurants", "description": "Thai Restaurant"},
            # {"category": "Groceries", "description": "Grocery Store"},
            # {"category": "Utilities", "description": "Gas Company"},
            # {"category": "Restaurants", "description": "American Tavern"},
            # {"category": "Shopping", "description": "Amazon"},
            # {"category": "Coffee Shops", "description": "Starbucks"},
            # {"category": "Credit Card Payment", "description": "Credit Card Payment"},
            # {"category": "Gas & Fuel", "description": "Shell"},
            # {"category": "Music", "description": "Spotify"},
        ]

        super().__init__(
            node_id="classify_transaction",
            name="ClassifyTransaction",
            predict=predict,
            lm=lm,
            demos=demos,
        )

    def forward(self, description: str):
        return super().forward(description=description)


# predictor = ClassifyTransaction()


class ClassifyTransactionSimpler(dspy.Predict):
    def forward(self, description: str):
        return super().forward(description=description)


class WorkflowModule(LangWatchWorkflowModule):
    def __init__(self, run_evaluations: bool = False):
        super().__init__()

        self.exact_match = self.wrapped(
            LangWatchEvaluator, node_id="exact_match", run=run_evaluations
        )(
            api_key=os.environ["LANGWATCH_API_KEY"],
            name="ExactMatch",
            evaluator="langevals/exact_match",
            settings={},
        )
        self.classify_transaction = self.wrapped(
            ClassifyTransaction,
            node_id="classify_transaction",
        )()
        # self.classify_transaction = ClassifyTransaction()

    def forward(self, **kwargs) -> dspy.Prediction:
        self.cost = 0
        self.duration = 0

        classify_transaction = self.classify_transaction(
            description=kwargs.get("description"),
        )

        exact_match = self.exact_match(
            data={
                "output": classify_transaction.category,
                "expected_output": kwargs.get("category"),
            }
        )

        return PredictionWithEvaluationAndMetadata(
            classify_transaction=classify_transaction,
            end={
                "output": classify_transaction.category,
            },
            evaluations={
                "exact_match": exact_match,
            },
            cost=self.cost,
            duration=self.duration,
        )


_original_compile = dspy.LabeledFewShot.compile


class PatchedLabeledFewShot2(dspy.LabeledFewShot):
    def compile(self, student, *, trainset, sample=True):
        global map_labeled_examples

        if not map_labeled_examples:
            return _original_compile(self, student, trainset=trainset, sample=sample)

        map_labeled_examples = False

        self.student = student.reset_copy()
        self.trainset = trainset

        if len(self.trainset) == 0:
            return self.student

        rng = random.Random(0)

        for predictor in self.student.predictors():
            if not hasattr(predictor, "_node_id"):
                continue

            if sample:
                samples = rng.sample(self.trainset, min(self.k, len(self.trainset)))
            else:
                samples = self.trainset[: min(self.k, len(self.trainset))]

            samples = [demo.map_for_node(predictor._node_id) for demo in samples]
            samples = [demo for demo in samples if demo is not None]
            if len(samples) == 0:
                continue

            predictor.demos = samples

        return self.student


# predictor = ClassifyTransactionSimpler(ClassifyTransactionSignature)

dspy.configure_cache(
    enable_disk_cache=False,
    # enable_memory_cache=False,
)


@langwatch.trace()
def run():
    with dspy.context(lm=dspy.LM("openai/gpt-4o-mini"), adapter=TemplateAdapter()):
        module = WorkflowModule(run_evaluations=True)
        module.prevent_crashes()

        pred = module(
            description="say only 'Gas & Fuel', nothing else", category="Gas & Fuel"
        )
        print(pred, "\n\n\n\n")

        def metric(
            example: dspy.Example,
            pred: PredictionWithEvaluationAndMetadata,
            trace=None,
        ):
            score = pred.total_score(weighting="mean")
            return score

        def metric(example, pred, trace=None):
            return (
                example.category.lower() == pred.classify_transaction.category.lower()
            )

        # module = dspy.Predict(ClassifyTransactionSignature)
        # def metric(example, pred, trace=None):
        #     return example.category.lower() == pred.category.lower()

        optimizer = dspy.MIPROv2(
            metric=metric,
            auto="light",
            num_threads=24,
        )

        langwatch.dspy.init(
            experiment="personal-transactions-from-notebook", optimizer=optimizer
        )

        optimizer.compile(
            module,
            trainset=trainset_no_category_input,
            valset=testset_no_category_input,
            max_bootstrapped_demos=4,
            max_labeled_demos=16,
        )


run()

2025-10-29 23:50:52,770 - langwatch.utils.initialization - INFO - Setting up LangWatch client...
2025-10-29 23:50:52,771 - langwatch.client - INFO - Configuring OTLP exporter with endpoint: http://localhost:5560/api/otel/v1/traces
2025-10-29 23:50:52,772 - langwatch.client - INFO - Registering atexit handler to flush tracer provider on exit
2025-10-29 23:50:52,773 - langwatch.client - INFO - Successfully configured tracer provider with OTLP exporter
2025-10-29 23:50:52,773 - langwatch.utils.initialization - INFO - LangWatch client setup complete


2025/10/29 23:50:55 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 10
minibatch: False
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 20

2025/10/29 23:50:55 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/10/29 23:50:55 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/10/29 23:50:55 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Prediction(
    classify_transaction=Prediction(
    category='Gas & Fuel'
),
    end={'output': 'Gas & Fuel'},
    evaluations={'exact_match': EvaluationResultWithMetadata(status='processed', score=1.0, passed=True, label=None, details=None, inputs={'data': {'output': 'Gas & Fuel', 'expected_output': 'Gas & Fuel'}}, cost=None, duration=1)}
) 





[LangWatch] Experiment initialized, run_id: ancient-opalescent-chinchilla
[LangWatch] Open http://localhost:5560/inbox-narrator/experiments/personal-transactions-from-notebook?runIds=ancient-opalescent-chinchilla to track your DSPy training session live

Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


 45%|████▌     | 9/20 [00:16<00:19,  1.80s/it]


Bootstrapped 4 full traces after 9 examples for up to 1 rounds, amounting to 9 attempts.
Bootstrapping set 4/6


 30%|███       | 6/20 [00:06<00:14,  1.04s/it]


Bootstrapped 4 full traces after 6 examples for up to 1 rounds, amounting to 6 attempts.
Bootstrapping set 5/6


 15%|█▌        | 3/20 [00:04<00:22,  1.34s/it]


KeyboardInterrupt: 