# DSPy Simple Introduction

### From https://x.com/MaximeRivest/article/1929861781448536081 
### Maxime Rivest May 2025

In [None]:
#! uv pip install dspy
#! uv pip install attachments "datar[pandas]"

In [None]:
from datar.dplyr import mutate, summarise, n
from datar.tibble import tibble
import datar.base as b
from datar import f
from attachments import Attachments
import dspy

lm = dspy.LM('ollama_chat/devstral', api_base='http://localhost:11434', api_key='')
# lm=dspy.LM('xai/grok-3-mini')
dspy.configure(lm=lm)


In [None]:
#Setup dspy program
class count_ai_occurrences(dspy.Signature):
    """Count the number times the word 'Artificial Intelligence' or 'AI' or any other reference to AI or AI-related terms appears
       in the paragraph"""
    paragraph: str= dspy.InputField(desc = "The paragraph to count the AI mentions in")
    ai_occurrences_count: int= dspy.OutputField(desc = "The number of times the word 'Artificial Intelligence' or 'AI' appears in the paragraph")

dspy_module = dspy.Predict(count_ai_occurrences)

def count_ai_occurrences_f(paragraph):
    return dspy_module(paragraph=paragraph).ai_occurrences_count


In [68]:
# This fetches the AI wikipedia page and splits it into paragraphs
attachments_dsl = "[images: false][select: p,title,h1,h2,h3,h4,h5,h6][split: paragraphs]"
a = Attachments("https://en.wikipedia.org/wiki/Artificial_intelligence" + attachments_dsl)


In [69]:
# This creates a dataframe with the paragraphs and the flash response
df = (tibble(paragraphs = [p.text for p in a[:20]]) >>
    mutate(flash_response= f.paragraphs.apply(count_ai_occurrences_f)))


In [70]:
df

Unnamed: 0,paragraphs,flash_response
,<object>,<int64>
0.0,# https://en.wikipedia.org/wiki/Artificial_int...,2
1.0,# Artificial intelligence - Wikipedia,1
2.0,## Contents,0
3.0,# Artificial intelligence,1
4.0,Artificial intelligence (AI) is the capability...,3
5.0,High-profile applications of AI include advanc...,10
6.0,Various subfields of AI research are centered ...,10
7.0,Artificial intelligence was founded as an acad...,10
8.0,## Goals,0


In [None]:
# This creates a column with the sonnet response, it will be used as the goldset
with dspy.context(lm=dspy.LM('anthropic/claude-sonnet-4-20250514')):
    df_with_goldset_col= mutate(df, resp_sonnet = f.paragraphs.apply(count_ai_occurrences_f))


In [72]:
#Just printing the baseline precision
(mutate(df_with_goldset_col, exact_match = f.resp_sonnet == f.flash_response) >>
    summarise(baseline_precision = b.sum(f.exact_match)/n() * 100))


Unnamed: 0,baseline_precision
,<float64>
0.0,70.0


In [73]:
# Reshape the data into a format that can be used for training
trainset = []
for r in df_with_goldset_col.to_dict(orient='records'):
    trainset.append(dspy.Example(
        paragraph=r['paragraphs'],           # this is the input
        ai_occurrences_count=r["resp_sonnet"]).  # this is the target
       with_inputs('paragraph'))            # this is needed (not sure why)


In [74]:
# Define the metric for the optimizer
def exact_match(x, y, trace=None): return x.ai_occurrences_count == y.ai_occurrences_count


In [75]:
# Compile the optimizer
optimizer = dspy.MIPROv2(metric=exact_match)
optimized_dspy_module = optimizer.compile(dspy_module, trainset=trainset, requires_permission_to_run=False)


2025/06/09 09:20:17 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 10
minibatch: False
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 16



[93m[1mProjected Language Model (LM) Calls[0m

Based on the parameters you have set, the maximum number of LM calls is projected as follows:

[93m- Prompt Generation: [94m[1m10[0m[93m data summarizer calls + [94m[1m3[0m[93m * [94m[1m1[0m[93m lm calls in program + ([94m[1m2[0m[93m) lm calls in program-aware proposer = [94m[1m15[0m[93m prompt model calls[0m
[93m- Program Evaluation: [94m[1m16[0m[93m examples in val set * [94m[1m10[0m[93m batches = [94m[1m160[0m[93m LM program calls[0m

[93m[1mEstimated Cost Calculation:[0m

[93mTotal Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token)
            + (Number of program calls * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).[0m

For a preliminary estimate of potential costs, we

2025/06/09 09:20:37 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/06/09 09:20:37 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/06/09 09:20:37 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...



No input received within 20 seconds. Proceeding with execution...
Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


100%|██████████| 4/4 [00:09<00:00,  2.26s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 4 attempts.
Bootstrapping set 4/6


 75%|███████▌  | 3/4 [00:00<00:00, 660.07it/s]


Bootstrapped 2 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/6


 25%|██▌       | 1/4 [00:01<00:04,  1.50s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 6/6


100%|██████████| 4/4 [00:04<00:00,  1.08s/it]
2025/06/09 09:20:52 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/06/09 09:20:52 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 4 attempts.


2025/06/09 09:21:05 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...

2025/06/09 09:22:11 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/06/09 09:22:11 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Count the number times the word 'Artificial Intelligence' or 'AI' or any other reference to AI or AI-related terms appears in the paragraph

2025/06/09 09:22:11 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Count the occurrences of "AI" or any reference to artificial intelligence in the given paragraph. Consider case insensitivity and variations like "Artificial Intelligence," "A.I.," etc.

2025/06/09 09:22:11 INFO dspy.teleprompt.mipro_optimizer_v2: 2: Analyze the given paragraph and return the number of occurrences where "Artificial Intelligence," "AI," or any related AI terms are mentioned. Ensure to count all direct and contextual references accurately.

2025/06/09 09:22:11 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/06/09 09:

Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [00:00<00:00, 2579.32it/s]

2025/06/09 09:22:11 INFO dspy.evaluate.evaluate: Average Metric: 11 / 16 (68.8%)
2025/06/09 09:22:11 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 68.75

2025/06/09 09:22:11 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 10 =====



Average Metric: 8.00 / 16 (50.0%): 100%|██████████| 16/16 [00:26<00:00,  1.67s/it]

2025/06/09 09:22:37 INFO dspy.evaluate.evaluate: Average Metric: 8 / 16 (50.0%)
2025/06/09 09:22:37 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 50.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/06/09 09:22:37 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0]
2025/06/09 09:22:37 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 68.75


2025/06/09 09:22:37 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 10 =====



Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [00:24<00:00,  1.54s/it]

2025/06/09 09:23:02 INFO dspy.evaluate.evaluate: Average Metric: 11 / 16 (68.8%)
2025/06/09 09:23:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.75 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/06/09 09:23:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75]
2025/06/09 09:23:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 68.75


2025/06/09 09:23:02 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 10 =====



Average Metric: 12.00 / 16 (75.0%): 100%|██████████| 16/16 [00:27<00:00,  1.75s/it]

2025/06/09 09:23:30 INFO dspy.evaluate.evaluate: Average Metric: 12 / 16 (75.0%)
2025/06/09 09:23:30 INFO dspy.teleprompt.mipro_optimizer_v2: [92mBest full score so far![0m Score: 75.0
2025/06/09 09:23:30 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 75.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/06/09 09:23:30 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75, 75.0]
2025/06/09 09:23:30 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0


2025/06/09 09:23:30 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 10 =====



Average Metric: 12.00 / 16 (75.0%): 100%|██████████| 16/16 [00:31<00:00,  1.98s/it]

2025/06/09 09:24:02 INFO dspy.evaluate.evaluate: Average Metric: 12 / 16 (75.0%)
2025/06/09 09:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 75.0 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2'].
2025/06/09 09:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75, 75.0, 75.0]
2025/06/09 09:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0


2025/06/09 09:24:02 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 10 =====



Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [00:33<00:00,  2.12s/it]

2025/06/09 09:24:36 INFO dspy.evaluate.evaluate: Average Metric: 11 / 16 (68.8%)
2025/06/09 09:24:36 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.75 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].
2025/06/09 09:24:36 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75, 75.0, 75.0, 68.75]
2025/06/09 09:24:36 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0


2025/06/09 09:24:36 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 10 =====



Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [00:00<00:00, 2828.38it/s]

2025/06/09 09:24:36 INFO dspy.evaluate.evaluate: Average Metric: 11 / 16 (68.8%)
2025/06/09 09:24:36 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.75 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/06/09 09:24:36 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75, 75.0, 75.0, 68.75, 68.75]
2025/06/09 09:24:36 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0


2025/06/09 09:24:36 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 10 =====



Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [00:31<00:00,  1.99s/it]

2025/06/09 09:25:07 INFO dspy.evaluate.evaluate: Average Metric: 11 / 16 (68.8%)
2025/06/09 09:25:07 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.75 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/06/09 09:25:07 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75, 75.0, 75.0, 68.75, 68.75, 68.75]
2025/06/09 09:25:07 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0


2025/06/09 09:25:07 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 10 =====



Average Metric: 8.00 / 16 (50.0%): 100%|██████████| 16/16 [00:31<00:00,  1.96s/it]

2025/06/09 09:25:39 INFO dspy.evaluate.evaluate: Average Metric: 8 / 16 (50.0%)
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 50.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4'].
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75, 75.0, 75.0, 68.75, 68.75, 68.75, 50.0]
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0


2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 10 =====



Average Metric: 11.00 / 16 (68.8%): 100%|██████████| 16/16 [00:00<00:00, 3199.32it/s]

2025/06/09 09:25:39 INFO dspy.evaluate.evaluate: Average Metric: 11 / 16 (68.8%)
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 68.75 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75, 75.0, 75.0, 68.75, 68.75, 68.75, 50.0, 68.75]
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0


2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 10 =====



Average Metric: 12.00 / 16 (75.0%): 100%|██████████| 16/16 [00:00<00:00, 4089.01it/s]

2025/06/09 09:25:39 INFO dspy.evaluate.evaluate: Average Metric: 12 / 16 (75.0%)
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 75.0 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [68.75, 50.0, 68.75, 75.0, 75.0, 68.75, 68.75, 68.75, 50.0, 68.75, 75.0]
2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 75.0


2025/06/09 09:25:39 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 75.0!





In [76]:
def count_ai_occurrences_opt(paragraph):
    return optimized_dspy_module(paragraph=paragraph).ai_occurrences_count

# That's it with DSPy, you can use the optimized model like this:
count_ai_occurrences_opt("This is about Deep Neural Networks")


0

In [77]:
# Using Datar to calculate the performance of the optimized model
final_performance = (df_with_goldset_col >>
mutate(
        # Applies flash to every row with the optimized prompt
        resp_flash_opt= f.paragraphs.apply(count_ai_occurrences_opt)) >>
    mutate(
        # Add 2 columns with 0 or 1 if the flash response is equal to the sonnet response
        flash_eq_sonnet = f.resp_sonnet == f.flash_response,  # Compare flash with sonnet
        flash_opt_eq_sonnet = f.resp_flash_opt == f.resp_sonnet  # Compare opt flash with sonnet
        ) >>
    summarise(
        # Sum the number of rows where the flash response is equal to the sonnet response
        flashlite_before_opt = b.sum(f.flash_eq_sonnet)/n() * 100, #n() is the number of rows in df
        # Sum the number of rows where the opt flash response is equal to the sonnet response
        flashlite_after_opt = b.sum(f.flash_opt_eq_sonnet)/n() * 100 #n() is the number of rows in df
    ) >>
    mutate(precision_increase=f.flashlite_after_opt-f.flashlite_before_opt)
    )

f"The precision increased by {final_performance['precision_increase'].values[0]:.2f}% 🔥"

'The precision increased by 10.00% 🔥'

In [57]:
optimized_dspy_module

Predict(StringSignature(paragraph -> ai_occurrences_count
    instructions='You are an expert AI text analyst. Given a paragraph of text, your task is to meticulously scan and count every occurrence of the terms "Artificial Intelligence," "AI," or any other related references, such as "machine learning," "neural networks," "deep learning," or phrases that clearly allude to artificial intelligence concepts (e.g., in titles, URLs, or contextual mentions). Be thorough: consider variations in capitalization, abbreviations, and synonyms, but only count direct and unambiguous references. Return your response as: "Total occurrences: [count]. Breakdown: [list of specific instances found].'
    paragraph = Field(annotation=str required=True json_schema_extra={'desc': 'The paragraph to count the AI mentions in', '__dspy_field_type': 'input', 'prefix': 'Paragraph:'})
    ai_occurrences_count = Field(annotation=int required=True json_schema_extra={'desc': "The number of times the word 'Artificial 

In [62]:
dspy.inspect_history(n=5)





[34m[2025-06-03T22:11:18.601101][0m

[31mSystem message:[0m

Your input fields are:
1. `paragraph` (str): The paragraph to count the AI mentions in
Your output fields are:
1. `ai_occurrences_count` (int): The number of times the word 'Artificial Intelligence' or 'AI' appears in the paragraph
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## paragraph ## ]]
{paragraph}

[[ ## ai_occurrences_count ## ]]
{ai_occurrences_count}        # note: the value you produce must be a single int value

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        You are an expert AI text analyst. Given a paragraph of text, your task is to meticulously scan and count every occurrence of the terms "Artificial Intelligence," "AI," or any other related references, such as "machine learning," "neural networks," "deep learning," or phrases that clearly allude to artificial intelligence concepts (e.g., in titles, URLs, or cont