In [1]:
# ! pip install -U dspy

In [2]:
# ! ollama pull llama3.2

In [3]:
import pandas as pd
import tqdm

### Trying out the framework on a simple example

In [4]:
import dspy
llm = dspy.LM('ollama_chat/llama3.2', api_base='http://localhost:11434', api_key='', temperature = 0.2)
dspy.configure(lm=llm)
dspy.configure_cache(enable_memory_cache=False, enable_disk_cache=False)

In [5]:
simple_model = dspy.Predict("question -> answer: int", cache = False)
simple_model(question="I have 5 different balls and I randomly select 4. How many possible combinations of the balls I can get?")

Prediction(
    answer=5
)

In [6]:
# llm.history[-1]

In [7]:
dspy.inspect_history(n = 1)





[34m[2025-06-20T20:56:29.355580][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str):
Your output fields are:
1. `answer` (int):
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## question ## ]]
{question}

Outputs will be a JSON object with the following fields.

{
  "answer": "{answer}        # note: the value you produce must be a single int value"
}
In adhering to this structure, your objective is: 
        Given the fields `question`, produce the fields `answer`.


[31mUser message:[0m

[[ ## question ## ]]
I have 5 different balls and I randomly select 4. How many possible combinations of the balls I can get?

Respond with a JSON object in the following order of fields: `answer` (must be formatted as a valid Python int).


[31mResponse:[0m

[32m{
  "answer": 5
}[0m







In [8]:
cot_model = dspy.ChainOfThought("question -> answer: int")
cot_model(question="I have 5 different balls and I randomly select 4. How many possible combinations of the balls I can get?")

Prediction(
    reasoning="This is a combination problem, where order does not matter. The number of combinations of n items taken r at a time can be calculated using the formula C(n, r) = n! / [r!(n-r)!]. In this case, we have 5 balls and we're selecting 4, so we calculate C(5, 4).",
    answer=5
)

In [9]:
dspy.inspect_history(n = 1)





[34m[2025-06-20T20:56:36.896674][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str):
Your output fields are:
1. `reasoning` (str): 
2. `answer` (int):
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## question ## ]]
{question}

Outputs will be a JSON object with the following fields.

{
  "reasoning": "{reasoning}",
  "answer": "{answer}        # note: the value you produce must be a single int value"
}
In adhering to this structure, your objective is: 
        Given the fields `question`, produce the fields `answer`.


[31mUser message:[0m

[[ ## question ## ]]
I have 5 different balls and I randomly select 4. How many possible combinations of the balls I can get?

Respond with a JSON object in the following order of fields: `reasoning`, then `answer` (must be formatted as a valid Python int).


[31mResponse:[0m

[32m{
  "reasoning": "This is a combination 

In [10]:
dspy.configure(adapter=dspy.JSONAdapter())

In [11]:
print(cot_model(question="I have 5 different balls and I randomly select 4. How many possible combinations of the balls I can get?"))

Prediction(
    reasoning='This is a combination problem, where the order of selection does not matter. The number of combinations can be calculated using the formula C(n, k) = n! / (k!(n-k)!), where n is the total number of items and k is the number of items to select. In this case, we have 5 balls and want to select 4, so we calculate C(5, 4).',
    answer=5
)


In [12]:
dspy.inspect_history(n = 1)





[34m[2025-06-20T20:56:40.166777][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str):
Your output fields are:
1. `reasoning` (str): 
2. `answer` (int):
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## question ## ]]
{question}

Outputs will be a JSON object with the following fields.

{
  "reasoning": "{reasoning}",
  "answer": "{answer}        # note: the value you produce must be a single int value"
}
In adhering to this structure, your objective is: 
        Given the fields `question`, produce the fields `answer`.


[31mUser message:[0m

[[ ## question ## ]]
I have 5 different balls and I randomly select 4. How many possible combinations of the balls I can get?

Respond with a JSON object in the following order of fields: `reasoning`, then `answer` (must be formatted as a valid Python int).


[31mResponse:[0m

[32m{
  "reasoning": "This is a combination 

In [13]:
print(cot_model(question="I have 25 different balls and I randomly select 9. How many possible combinations of the balls I can get?"))

Prediction(
    reasoning='This is a combination problem, where the order of selection does not matter. The number of combinations can be calculated using the formula C(n, k) = n! / (k!(n-k)!), where n is the total number of items and k is the number of items to choose.',
    answer=55
)


In [14]:
import math

In [15]:
n = 25
k = 9
round(math.factorial(n)/math.factorial(k)/math.factorial(n-k))

2042975

In [16]:
# ! brew install deno

In [17]:
from dspy import PythonInterpreter

In [18]:
def evaluate_math(expr: str) -> str:
    # Executes Python and returns the output as string
    with PythonInterpreter() as interp:
        return interp(expr)

In [19]:
react_model = dspy.ReAct(
    signature="question -> answer: int",  # expects an int answer
    tools=[evaluate_math]
)

In [20]:
response = react_model(question="I have 25 different balls and I randomly select 9. How many possible combinations of the balls I can get?")

In [21]:
response.answer

53130

In [22]:
response.trajectory

{'thought_0': 'To find the number of possible combinations of the balls I can get, we need to calculate the number of combinations of 9 items from a set of 25 items.',
 'tool_name_0': 'evaluate_math',
 'tool_args_0': {'expr': '{25} choose {9}'},
 'observation_0': 'Execution error in evaluate_math: \nTraceback (most recent call last):\n  File "/Users/marie/Documents/github/llm_env/lib/python3.11/site-packages/dspy/predict/react.py", line 89, in forward\n    trajectory[f"observation_{idx}"] = self.tools[pred.next_tool_name](**pred.next_tool_args)\n                                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/Users/marie/Documents/github/llm_env/lib/python3.11/site-packages/dspy/utils/callback.py", line 326, in sync_wrapper\n    return fn(instance, *args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/Users/marie/Documents/github/llm_env/lib/python3.11/site-packages/dspy/adapters/types/tool.py", line 166, in __call__\n    result = s

In [23]:
# we can use MLflow for observability - https://dspy.ai/tutorials/observability/
# no registration needed

In [24]:
# ! pip install -U mlflow

In [25]:
# # It is highly recommended to use SQL store when using MLflow tracing
# mlflow server --backend-store-uri sqlite:///mydb.sqlite
# python3 -m mlflow server --backend-store-uri sqlite:///mydb.sqlite
# If you don't specify a different port via --port flag, you MLflow server will be hosted at port 5000.

In [26]:
# import mlflow
# # Tell MLflow about the server URI.
# mlflow.set_tracking_uri("http://127.0.0.1:5000")
# # Create a unique name for your experiment.
# mlflow.set_experiment("DSPy")
# mlflow.dspy.autolog()

In [27]:
llm = dspy.LM('ollama_chat/llama3.2', api_base='http://localhost:11434', api_key='', temperature = 0.3)
dspy.configure(lm=llm)
dspy.configure_cache(enable_memory_cache=False, enable_disk_cache=False)
react_model = dspy.ReAct(
    signature="question -> answer: int",  # expects an int answer
    tools=[evaluate_math]
)

In [28]:
response = react_model(question="I have 25 different balls and I randomly select 9. How many possible combinations of the balls I can get?")

In [29]:
response.answer

53130

In [30]:
response.trajectory

{'thought_0': 'To find the number of possible combinations, we can use the concept of combinations in mathematics.',
 'tool_name_0': 'evaluate_math',
 'tool_args_0': {'expr': '{25}\n{9}'},
 'observation_0': {},
 'thought_1': 'The number of possible combinations can be calculated using the combination formula C(n, k) = n! / (k!(n-k)!). Here, we have n=25 balls and k=9 selected balls.',
 'tool_name_1': 'evaluate_math',
 'tool_args_1': {'expr': '{25}²{9}'},
 'observation_1': 'Execution error in evaluate_math: \nTraceback (most recent call last):\n  File "/Users/marie/Documents/github/llm_env/lib/python3.11/site-packages/dspy/predict/react.py", line 89, in forward\n    trajectory[f"observation_{idx}"] = self.tools[pred.next_tool_name](**pred.next_tool_args)\n                                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/Users/marie/Documents/github/llm_env/lib/python3.11/site-packages/dspy/utils/callback.py", line 326, in sync_wrapper\n    return fn

### NPS

In [31]:
import json
with open('nps_comments.json', 'r') as f:
    nps_data = json.loads(f.read())

In [32]:
topics = set()

for rec in nps_data: 
    for t in rec['topics']: 
        topics.add(t)

In [33]:
print(list(topics))

['Difficult Product Discovery', 'Damaged or Incorrect Items', 'Customs and Import Charges', 'Complicated Returns or Exchanges', 'Website or App Bugs', 'Inaccurate Product Descriptions or Photos', 'Confusing Loyalty or Discount Systems', 'Unresponsive or Generic Customer Support', 'Limited Size or Shade Availability', 'Slow or Unreliable Shipping']


In [34]:
from typing import Literal, List

class NPSTopic(dspy.Signature):
    """Classify NPS topics"""

    comment: str = dspy.InputField()
    answer: List[Literal['Slow or Unreliable Shipping', 'Inaccurate Product Descriptions or Photos', 'Limited Size or Shade Availability', 
                    'Unresponsive or Generic Customer Support', 'Website or App Bugs', 'Confusing Loyalty or Discount Systems', 
                    'Complicated Returns or Exchanges', 'Customs and Import Charges', 'Difficult Product Discovery', 
                    'Damaged or Incorrect Items']] = dspy.OutputField()

In [35]:
nps_data[0]

{'topics': ['Limited Size or Shade Availability'],
 'comment': "Absolutely frustrated! Every time I find something I love, it's sold out in my size. What's the point of having a wishlist if nothing is ever available?"}

In [36]:
nps_topic_model = dspy.ChainOfThought(NPSTopic)

In [37]:
response = nps_topic_model(comment = "Absolutely frustrated! Every time I find something I love, it's sold out in my size. What's the point of having a wishlist if nothing is ever available?")

In [38]:
dspy.inspect_history(n = 1)





[34m[2025-06-20T20:58:09.216473][0m

[31mSystem message:[0m

Your input fields are:
1. `comment` (str):
Your output fields are:
1. `reasoning` (str): 
2. `answer` (list[Literal['Slow or Unreliable Shipping', 'Inaccurate Product Descriptions or Photos', 'Limited Size or Shade Availability', 'Unresponsive or Generic Customer Support', 'Website or App Bugs', 'Confusing Loyalty or Discount Systems', 'Complicated Returns or Exchanges', 'Customs and Import Charges', 'Difficult Product Discovery', 'Damaged or Incorrect Items']]):
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## comment ## ]]
{comment}

Outputs will be a JSON object with the following fields.

{
  "reasoning": "{reasoning}",
  "answer": "{answer}        # note: the value you produce must adhere to the JSON schema: {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"Slow or Unreliable Shipping\", \"Inaccura

In [39]:
nps_df = pd.DataFrame(nps_data)

In [40]:
nps_df['id'] = list(map(lambda x: x + 1, range(nps_df.shape[0])))

In [41]:
tmp = []

for rec in tqdm.tqdm(nps_df.to_dict('records')):
    response = nps_topic_model(comment = rec['comment'])
    res = {
        'id': rec['id'],
        'model_topics': response.answer
    }

    tmp.append(res)

100%|█████████████████████████████████████████| 105/105 [06:17<00:00,  3.59s/it]


In [42]:
ini_model_topics_df = pd.DataFrame(tmp)

In [43]:
ini_model_topics_df

Unnamed: 0,id,model_topics
0,1,[Limited Size or Shade Availability]
1,2,"[Slow or Unreliable Shipping, Inaccurate Produ..."
2,3,[Inaccurate Product Descriptions or Photos]
3,4,[Confusing Loyalty or Discount Systems]
4,5,[Website or App Bugs]
...,...,...
100,101,"[Customs and Import Charges, Inaccurate Produc..."
101,102,"[Slow or Unreliable Shipping, Inaccurate Produ..."
102,103,"[Slow or Unreliable Shipping, Inaccurate Produ..."
103,104,"[Slow or Unreliable Shipping, Inaccurate Produ..."


In [44]:
nps_df = nps_df.merge(ini_model_topics_df)

In [45]:
nps_df.sample(5).to_dict('records')

[{'topics': ['Limited Size or Shade Availability'],
  'comment': "Absolutely frustrated! Every time I find something I love, it's sold out in my size. What's the point of having a wishlist if nothing is ever available?",
  'id': 1,
  'model_topics': ['Limited Size or Shade Availability']},
 {'topics': ['Customs and Import Charges'],
  'comment': 'Got hit with unexpected customs fees that doubled my order cost. Should have been disclosed before checkout.',
  'id': 27,
  'model_topics': ['Customs and Import Charges']},
 {'topics': ['Difficult Product Discovery'],
  'comment': "Search results are completely random. Looking for 'foundation' brings up nail polish. Makes no sense.",
  'id': 36,
  'model_topics': ['Slow or Unreliable Shipping',
   'Inaccurate Product Descriptions or Photos']},
 {'topics': ['Slow or Unreliable Shipping'],
  'comment': 'Premium shipping costs more than the product but still takes a week. What exactly am I paying premium for?',
  'id': 85,
  'model_topics': ['Sl

In [46]:
def compare_topics(l1, l2):
    l1_fmt = ', '.join(sorted(l1))
    l2_fmt = ', '.join(sorted(l2))
    if l1_fmt == l2_fmt: 
        return 1 
    return 0


nps_df['model_accuracy'] = list(map(
    compare_topics,
    nps_df.topics,
    nps_df.model_topics
))

In [47]:
round(100*nps_df.model_accuracy.mean(), 2)

54.29

In [48]:
import random
random.random()

0.1454613971032902

In [49]:
trainset = []
valset = []
for rec in nps_data: 
    if random.random() <= 0.5:
        trainset.append(
            dspy.Example(
                comment = rec['comment'],
                answer = rec['topics']
            ).with_inputs('comment')
        )
    else: 
        valset.append(
            dspy.Example(
                comment = rec['comment'],
                answer = rec['topics']
            ).with_inputs('comment')
        )

In [50]:
# tp = dspy.MIPROv2(metric=dspy.evaluate.answer_exact_match, auto="light", num_threads=24)

In [51]:
def list_exact_match(example, pred, trace=None):
    """Custom metric for comparing lists of topics"""
    try:
        pred_answer = pred.answer
        expected_answer = example.answer
        
        # Convert to sets for order-independent comparison
        if isinstance(pred_answer, list) and isinstance(expected_answer, list):
            return set(pred_answer) == set(expected_answer)
        else:
            return pred_answer == expected_answer
    except Exception as e:
        print(f"Error in metric: {e}")
        return False

In [52]:
tp = dspy.MIPROv2(metric=list_exact_match, auto="light", num_threads=24)

In [53]:
opt_nps_topic_model =  tp.compile(
    nps_topic_model, 
    trainset=trainset, 
    valset=valset,
    requires_permission_to_run = False, provide_traceback=True)

2025/06/20 21:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 10
minibatch: True
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 61

2025/06/20 21:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/06/20 21:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/06/20 21:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


 11%|█████                                       | 5/44 [00:16<02:07,  3.26s/it]


Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Bootstrapping set 4/6


 16%|███████                                     | 7/44 [00:23<02:04,  3.36s/it]


Bootstrapped 3 full traces after 7 examples for up to 1 rounds, amounting to 7 attempts.
Bootstrapping set 5/6


  2%|█                                           | 1/44 [00:04<02:55,  4.08s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 6/6


  5%|██                                          | 2/44 [00:06<02:15,  3.23s/it]
2025/06/20 21:05:17 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/06/20 21:05:17 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.
Error getting source code: unhashable type: 'dict'.

Running without program aware proposer.


2025/06/20 21:05:58 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...

2025/06/20 21:06:08 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/06/20 21:06:08 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Classify NPS topics

2025/06/20 21:06:08 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Help a customer find their perfect shade by recommending available products within a 3-day timeframe, or else risk losing them to a competitor brand that prioritizes product availability.

2025/06/20 21:06:08 INFO dspy.teleprompt.mipro_optimizer_v2: 2: Classify NPS (Net Promoter Score) topics as positive, negative, or neutral, ensuring that product discovery and filtering systems are considered to provide relevant recommendations for skincare and beauty products.

2025/06/20 21:06:08 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/06/20 21:06:08 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==
2025/06/20 21:

Average Metric: 35.00 / 61 (57.4%): 100%|███████| 61/61 [03:17<00:00,  3.24s/it]

2025/06/20 21:09:26 INFO dspy.evaluate.evaluate: Average Metric: 35 / 61 (57.4%)
2025/06/20 21:09:26 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 57.38

2025/06/20 21:09:26 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 13 - Minibatch ==



Average Metric: 25.00 / 35 (71.4%): 100%|███████| 35/35 [02:12<00:00,  3.79s/it]

2025/06/20 21:11:38 INFO dspy.evaluate.evaluate: Average Metric: 25 / 35 (71.4%)
2025/06/20 21:11:38 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 71.43 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/06/20 21:11:39 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43]
2025/06/20 21:11:39 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38]
2025/06/20 21:11:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 57.38


2025/06/20 21:11:39 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 13 - Minibatch ==



Average Metric: 21.00 / 35 (60.0%): 100%|███████| 35/35 [02:10<00:00,  3.73s/it]

2025/06/20 21:13:49 INFO dspy.evaluate.evaluate: Average Metric: 21 / 35 (60.0%)
2025/06/20 21:13:49 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 60.0 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/06/20 21:13:49 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0]
2025/06/20 21:13:49 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38]
2025/06/20 21:13:49 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 57.38


2025/06/20 21:13:49 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 13 - Minibatch ==



Average Metric: 26.00 / 35 (74.3%): 100%|███████| 35/35 [01:59<00:00,  3.41s/it]

2025/06/20 21:15:49 INFO dspy.evaluate.evaluate: Average Metric: 26 / 35 (74.3%)
2025/06/20 21:15:49 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 74.29 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/06/20 21:15:49 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0, 74.29]
2025/06/20 21:15:49 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38]
2025/06/20 21:15:49 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 57.38


2025/06/20 21:15:49 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 13 - Minibatch ==



Average Metric: 26.00 / 35 (74.3%): 100%|███████| 35/35 [02:00<00:00,  3.45s/it]

2025/06/20 21:17:49 INFO dspy.evaluate.evaluate: Average Metric: 26 / 35 (74.3%)
2025/06/20 21:17:49 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 74.29 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2'].
2025/06/20 21:17:49 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0, 74.29, 74.29]
2025/06/20 21:17:49 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38]
2025/06/20 21:17:49 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 57.38


2025/06/20 21:17:49 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 13 - Minibatch ==



Average Metric: 23.00 / 35 (65.7%): 100%|███████| 35/35 [01:55<00:00,  3.31s/it]

2025/06/20 21:19:45 INFO dspy.evaluate.evaluate: Average Metric: 23 / 35 (65.7%)
2025/06/20 21:19:45 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 65.71 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].
2025/06/20 21:19:45 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0, 74.29, 74.29, 65.71]
2025/06/20 21:19:45 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38]
2025/06/20 21:19:45 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 57.38


2025/06/20 21:19:45 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 13 - Full Evaluation =====
2025/06/20 21:19:45 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 74.29) from minibatch trials...



Average Metric: 40.00 / 61 (65.6%): 100%|███████| 61/61 [03:01<00:00,  2.98s/it]

2025/06/20 21:22:47 INFO dspy.evaluate.evaluate: Average Metric: 40 / 61 (65.6%)
2025/06/20 21:22:47 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 65.57
2025/06/20 21:22:47 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38, 65.57]
2025/06/20 21:22:47 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 65.57
2025/06/20 21:22:47 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/06/20 21:22:47 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 8 / 13 - Minibatch ==



Average Metric: 24.00 / 35 (68.6%): 100%|███████| 35/35 [01:59<00:00,  3.41s/it]

2025/06/20 21:24:47 INFO dspy.evaluate.evaluate: Average Metric: 24 / 35 (68.6%)
2025/06/20 21:24:47 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.57 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/06/20 21:24:47 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0, 74.29, 74.29, 65.71, 68.57]
2025/06/20 21:24:47 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38, 65.57]
2025/06/20 21:24:47 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 65.57


2025/06/20 21:24:47 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 9 / 13 - Minibatch ==



Average Metric: 26.00 / 35 (74.3%): 100%|███████| 35/35 [02:01<00:00,  3.46s/it]

2025/06/20 21:26:48 INFO dspy.evaluate.evaluate: Average Metric: 26 / 35 (74.3%)
2025/06/20 21:26:48 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 74.29 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/06/20 21:26:48 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0, 74.29, 74.29, 65.71, 68.57, 74.29]
2025/06/20 21:26:48 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38, 65.57]
2025/06/20 21:26:48 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 65.57


2025/06/20 21:26:48 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 10 / 13 - Minibatch ==



Average Metric: 21.00 / 35 (60.0%): 100%|███████| 35/35 [01:52<00:00,  3.20s/it]

2025/06/20 21:28:40 INFO dspy.evaluate.evaluate: Average Metric: 21 / 35 (60.0%)
2025/06/20 21:28:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 60.0 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4'].
2025/06/20 21:28:40 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0, 74.29, 74.29, 65.71, 68.57, 74.29, 60.0]
2025/06/20 21:28:40 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38, 65.57]
2025/06/20 21:28:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 65.57


2025/06/20 21:28:40 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 11 / 13 - Minibatch ==



Average Metric: 28.00 / 35 (80.0%): 100%|███████| 35/35 [01:57<00:00,  3.37s/it]

2025/06/20 21:30:38 INFO dspy.evaluate.evaluate: Average Metric: 28 / 35 (80.0%)
2025/06/20 21:30:38 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 80.0 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/06/20 21:30:38 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0, 74.29, 74.29, 65.71, 68.57, 74.29, 60.0, 80.0]
2025/06/20 21:30:38 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38, 65.57]
2025/06/20 21:30:38 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 65.57


2025/06/20 21:30:38 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 12 / 13 - Minibatch ==



Average Metric: 24.00 / 35 (68.6%): 100%|███████| 35/35 [02:03<00:00,  3.52s/it]

2025/06/20 21:32:41 INFO dspy.evaluate.evaluate: Average Metric: 24 / 35 (68.6%)
2025/06/20 21:32:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.57 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/06/20 21:32:41 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [71.43, 60.0, 74.29, 74.29, 65.71, 68.57, 74.29, 60.0, 80.0, 68.57]
2025/06/20 21:32:41 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38, 65.57]
2025/06/20 21:32:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 65.57


2025/06/20 21:32:41 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 13 - Full Evaluation =====
2025/06/20 21:32:41 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 74.29) from minibatch trials...



Average Metric: 48.00 / 61 (78.7%): 100%|███████| 61/61 [03:40<00:00,  3.61s/it]

2025/06/20 21:36:22 INFO dspy.evaluate.evaluate: Average Metric: 48 / 61 (78.7%)
2025/06/20 21:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 78.69
2025/06/20 21:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [57.38, 65.57, 78.69]
2025/06/20 21:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 78.69
2025/06/20 21:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/06/20 21:36:22 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 78.69!





In [54]:
opt_nps_topic_model(comment = "Absolutely frustrated! Every time I find something I love, it's sold out in my size. What's the point of having a wishlist if nothing is ever available?"
)

Prediction(
    reasoning='The customer is expressing frustration with the limited availability of products in their size, which suggests a problem with product inventory management.',
    answer=['Limited Size or Shade Availability']
)

In [55]:
dspy.inspect_history(n = 1)





[34m[2025-06-20T21:36:26.502410][0m

[31mSystem message:[0m

Your input fields are:
1. `comment` (str):
Your output fields are:
1. `reasoning` (str): 
2. `answer` (list[Literal['Slow or Unreliable Shipping', 'Inaccurate Product Descriptions or Photos', 'Limited Size or Shade Availability', 'Unresponsive or Generic Customer Support', 'Website or App Bugs', 'Confusing Loyalty or Discount Systems', 'Complicated Returns or Exchanges', 'Customs and Import Charges', 'Difficult Product Discovery', 'Damaged or Incorrect Items']]):
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## comment ## ]]
{comment}

Outputs will be a JSON object with the following fields.

{
  "reasoning": "{reasoning}",
  "answer": "{answer}        # note: the value you produce must adhere to the JSON schema: {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"Slow or Unreliable Shipping\", \"Inaccura

In [56]:
tmp = []

for e in tqdm.tqdm(valset):
    comment = e.comment 
    prev_resp = nps_topic_model(comment = comment) 
    new_resp = opt_nps_topic_model(comment = comment)

    tmp.append(
        {
        'comment': comment,
        'sot_answer': e.answer,
        'prev_answer': prev_resp.answer,
        'new_answer': new_resp.answer
        }
    )

100%|███████████████████████████████████████████| 61/61 [08:40<00:00,  8.53s/it]


In [57]:
cmp_df = pd.DataFrame(tmp)

In [58]:
def list_exact_match_raw(expected_answer, pred_answer, trace=None):
    """Custom metric for comparing lists of topics"""
    try:
        # Convert to sets for order-independent comparison
        if isinstance(pred_answer, list) and isinstance(expected_answer, list):
            return set(pred_answer) == set(expected_answer)
        else:
            return pred_answer == expected_answer
    except Exception as e:
        print(f"Error in metric: {e}")
        return False

In [59]:
cmp_df['prev_accuracy'] = list(map(
    list_exact_match_raw,
    cmp_df.sot_answer,
    cmp_df.prev_answer))

cmp_df['new_accuracy'] = list(map(
    list_exact_match_raw,
    cmp_df.sot_answer,
    cmp_df.new_answer))

In [60]:
cmp_df[['prev_accuracy', 'new_accuracy']].mean()*100

prev_accuracy    62.295082
new_accuracy     81.967213
dtype: float64

In [61]:
tp2 = dspy.BootstrapFewShotWithRandomSearch(list_exact_match, num_threads=24, max_bootstrapped_demos = 10)

Going to sample between 1 and 10 traces per predictor.
Will attempt to bootstrap 16 candidate sets.


In [62]:
opt2_nps_topic_model =  tp2.compile(
    nps_topic_model, 
    trainset=trainset, 
    valset=valset)

Average Metric: 39.00 / 61 (63.9%): 100%|███████| 61/61 [02:58<00:00,  2.92s/it]

2025/06/20 21:48:05 INFO dspy.evaluate.evaluate: Average Metric: 39 / 61 (63.9%)



New best score: 63.93 for seed -3
Scores so far: [63.93]
Best score so far: 63.93
Average Metric: 43.00 / 61 (70.5%): 100%|███████| 61/61 [02:49<00:00,  2.79s/it]

2025/06/20 21:50:55 INFO dspy.evaluate.evaluate: Average Metric: 43 / 61 (70.5%)



New best score: 70.49 for seed -2
Scores so far: [63.93, 70.49]
Best score so far: 70.49


 34%|██████████████▋                            | 15/44 [01:02<02:00,  4.16s/it]


Bootstrapped 10 full traces after 15 examples for up to 1 rounds, amounting to 15 attempts.
Average Metric: 47.00 / 61 (77.0%): 100%|███████| 61/61 [03:03<00:00,  3.01s/it]

2025/06/20 21:55:01 INFO dspy.evaluate.evaluate: Average Metric: 47 / 61 (77.0%)



New best score: 77.05 for seed -1
Scores so far: [63.93, 70.49, 77.05]
Best score so far: 77.05


 23%|█████████▊                                 | 10/44 [01:01<03:28,  6.12s/it]


Bootstrapped 7 full traces after 10 examples for up to 1 rounds, amounting to 10 attempts.
Average Metric: 42.00 / 61 (68.9%): 100%|███████| 61/61 [02:52<00:00,  2.83s/it]

2025/06/20 21:58:55 INFO dspy.evaluate.evaluate: Average Metric: 42 / 61 (68.9%)



Scores so far: [63.93, 70.49, 77.05, 68.85]
Best score so far: 77.05


  7%|███                                         | 3/44 [00:24<05:36,  8.20s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Average Metric: 45.00 / 61 (73.8%): 100%|███████| 61/61 [03:11<00:00,  3.13s/it]

2025/06/20 22:02:31 INFO dspy.evaluate.evaluate: Average Metric: 45 / 61 (73.8%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77]
Best score so far: 77.05


  9%|████                                        | 4/44 [00:29<04:51,  7.28s/it]


Bootstrapped 1 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.
Average Metric: 39.00 / 61 (63.9%): 100%|███████| 61/61 [11:29<00:00, 11.30s/it]

2025/06/20 22:14:30 INFO dspy.evaluate.evaluate: Average Metric: 39 / 61 (63.9%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93]
Best score so far: 77.05


 11%|█████                                       | 5/44 [00:31<04:08,  6.36s/it]


Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Average Metric: 48.00 / 61 (78.7%): 100%|███████| 61/61 [03:12<00:00,  3.15s/it]

2025/06/20 22:18:14 INFO dspy.evaluate.evaluate: Average Metric: 48 / 61 (78.7%)



New best score: 78.69 for seed 3
Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69]
Best score so far: 78.69


 16%|███████                                     | 7/44 [01:02<05:29,  8.90s/it]


Bootstrapped 4 full traces after 7 examples for up to 1 rounds, amounting to 7 attempts.
Average Metric: 41.00 / 61 (67.2%): 100%|███████| 61/61 [03:56<00:00,  3.88s/it]

2025/06/20 22:23:13 INFO dspy.evaluate.evaluate: Average Metric: 41 / 61 (67.2%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21]
Best score so far: 78.69


 48%|████████████████████▌                      | 21/44 [03:07<03:25,  8.95s/it]


Bootstrapped 10 full traces after 21 examples for up to 1 rounds, amounting to 21 attempts.
Average Metric: 43.00 / 61 (70.5%): 100%|███████| 61/61 [04:10<00:00,  4.11s/it]

2025/06/20 22:30:31 INFO dspy.evaluate.evaluate: Average Metric: 43 / 61 (70.5%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49]
Best score so far: 78.69


 48%|████████████████████▌                      | 21/44 [03:15<03:34,  9.32s/it]


Bootstrapped 10 full traces after 21 examples for up to 1 rounds, amounting to 21 attempts.
Average Metric: 40.00 / 61 (65.6%): 100%|███████| 61/61 [04:11<00:00,  4.12s/it]

2025/06/20 22:37:58 INFO dspy.evaluate.evaluate: Average Metric: 40 / 61 (65.6%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57]
Best score so far: 78.69


 18%|████████                                    | 8/44 [01:06<04:57,  8.26s/it]


Bootstrapped 6 full traces after 8 examples for up to 1 rounds, amounting to 8 attempts.
Average Metric: 46.00 / 61 (75.4%): 100%|███████| 61/61 [04:19<00:00,  4.25s/it]

2025/06/20 22:43:24 INFO dspy.evaluate.evaluate: Average Metric: 46 / 61 (75.4%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41]
Best score so far: 78.69


 16%|███████                                     | 7/44 [01:05<05:44,  9.32s/it]


Bootstrapped 4 full traces after 7 examples for up to 1 rounds, amounting to 7 attempts.
Average Metric: 43.00 / 61 (70.5%): 100%|███████| 61/61 [04:13<00:00,  4.15s/it]

2025/06/20 22:48:43 INFO dspy.evaluate.evaluate: Average Metric: 43 / 61 (70.5%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41, 70.49]
Best score so far: 78.69


 30%|████████████▋                              | 13/44 [01:30<03:36,  6.97s/it]


Bootstrapped 8 full traces after 13 examples for up to 1 rounds, amounting to 13 attempts.
Average Metric: 48.00 / 61 (78.7%): 100%|███████| 61/61 [03:28<00:00,  3.42s/it]

2025/06/20 22:53:42 INFO dspy.evaluate.evaluate: Average Metric: 48 / 61 (78.7%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41, 70.49, 78.69]
Best score so far: 78.69


 32%|█████████████▋                             | 14/44 [01:36<03:27,  6.92s/it]


Bootstrapped 10 full traces after 14 examples for up to 1 rounds, amounting to 14 attempts.
Average Metric: 29.00 / 61 (47.5%): 100%|███████| 61/61 [03:45<00:00,  3.69s/it]

2025/06/20 22:59:05 INFO dspy.evaluate.evaluate: Average Metric: 29 / 61 (47.5%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41, 70.49, 78.69, 47.54]
Best score so far: 78.69


 23%|█████████▊                                 | 10/44 [01:24<04:46,  8.43s/it]


Bootstrapped 8 full traces after 10 examples for up to 1 rounds, amounting to 10 attempts.
Average Metric: 49.00 / 61 (80.3%): 100%|███████| 61/61 [04:25<00:00,  4.35s/it]

2025/06/20 23:04:55 INFO dspy.evaluate.evaluate: Average Metric: 49 / 61 (80.3%)



New best score: 80.33 for seed 11
Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41, 70.49, 78.69, 47.54, 80.33]
Best score so far: 80.33


 27%|███████████▋                               | 12/44 [01:35<04:14,  7.94s/it]


Bootstrapped 8 full traces after 12 examples for up to 1 rounds, amounting to 12 attempts.
Average Metric: 42.00 / 61 (68.9%): 100%|███████| 61/61 [04:10<00:00,  4.10s/it]

2025/06/20 23:10:41 INFO dspy.evaluate.evaluate: Average Metric: 42 / 61 (68.9%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41, 70.49, 78.69, 47.54, 80.33, 68.85]
Best score so far: 80.33


 16%|███████                                     | 7/44 [01:02<05:27,  8.86s/it]


Bootstrapped 5 full traces after 7 examples for up to 1 rounds, amounting to 7 attempts.
Average Metric: 47.00 / 61 (77.0%): 100%|███████| 61/61 [03:27<00:00,  3.40s/it]

2025/06/20 23:15:10 INFO dspy.evaluate.evaluate: Average Metric: 47 / 61 (77.0%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41, 70.49, 78.69, 47.54, 80.33, 68.85, 77.05]
Best score so far: 80.33


 11%|█████                                       | 5/44 [00:46<05:59,  9.22s/it]


Bootstrapped 2 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Average Metric: 39.00 / 61 (63.9%): 100%|███████| 61/61 [03:37<00:00,  3.56s/it]

2025/06/20 23:19:33 INFO dspy.evaluate.evaluate: Average Metric: 39 / 61 (63.9%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41, 70.49, 78.69, 47.54, 80.33, 68.85, 77.05, 63.93]
Best score so far: 80.33


 11%|█████                                       | 5/44 [00:46<06:05,  9.38s/it]


Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Average Metric: 40.00 / 61 (65.6%): 100%|███████| 61/61 [03:46<00:00,  3.71s/it]

2025/06/20 23:24:07 INFO dspy.evaluate.evaluate: Average Metric: 40 / 61 (65.6%)



Scores so far: [63.93, 70.49, 77.05, 68.85, 73.77, 63.93, 78.69, 67.21, 70.49, 65.57, 75.41, 70.49, 78.69, 47.54, 80.33, 68.85, 77.05, 63.93, 65.57]
Best score so far: 80.33
19 candidate programs found.


In [63]:
tmp = []

for e in tqdm.tqdm(valset):
    comment = e.comment 
    prev_resp = nps_topic_model(comment = comment) 
    new_resp = opt_nps_topic_model(comment = comment)
    new_reason_resp = opt2_nps_topic_model(comment = comment)

    tmp.append(
        {
        'comment': comment,
        'sot_answer': e.answer,
        'prev_answer': prev_resp.answer,
        'new_answer': new_resp.answer,
        'new_reason_answer': new_reason_resp.answer
        }
    )

100%|████████████████████████████████████████| 61/61 [2:06:46<00:00, 124.70s/it]


In [64]:
cmp_df = pd.DataFrame(tmp)

In [65]:
cmp_df['prev_accuracy'] = list(map(
    list_exact_match_raw,
    cmp_df.sot_answer,
    cmp_df.prev_answer))

cmp_df['new_accuracy'] = list(map(
    list_exact_match_raw,
    cmp_df.sot_answer,
    cmp_df.new_answer))

cmp_df['new_reason_accuracy'] = list(map(
    list_exact_match_raw,
    cmp_df.sot_answer,
    cmp_df.new_reason_answer))

In [66]:
cmp_df[['prev_accuracy', 'new_accuracy', 'new_reason_accuracy']].mean()*100

prev_accuracy          55.737705
new_accuracy           75.409836
new_reason_accuracy    77.049180
dtype: float64

In [67]:
opt2_nps_topic_model(comment = "Absolutely frustrated! Every time I find something I love, it's sold out in my size. What's the point of having a wishlist if nothing is ever available?"
)

Prediction(
    reasoning='Not supplied for this particular example. ',
    answer=['Limited Size or Shade Availability', 'Difficult Product Discovery']
)

In [68]:
dspy.inspect_history(n = 1)





[34m[2025-06-21T01:30:56.644792][0m

[31mSystem message:[0m

Your input fields are:
1. `comment` (str):
Your output fields are:
1. `reasoning` (str): 
2. `answer` (list[Literal['Slow or Unreliable Shipping', 'Inaccurate Product Descriptions or Photos', 'Limited Size or Shade Availability', 'Unresponsive or Generic Customer Support', 'Website or App Bugs', 'Confusing Loyalty or Discount Systems', 'Complicated Returns or Exchanges', 'Customs and Import Charges', 'Difficult Product Discovery', 'Damaged or Incorrect Items']]):
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## comment ## ]]
{comment}

Outputs will be a JSON object with the following fields.

{
  "reasoning": "{reasoning}",
  "answer": "{answer}        # note: the value you produce must adhere to the JSON schema: {\"type\": \"array\", \"items\": {\"type\": \"string\", \"enum\": [\"Slow or Unreliable Shipping\", \"Inaccura