In [4]:
import dspy as ds
import pandas as pd
import numpy as np
from rich import print
from dspy.datasets.gsm8k import GSM8K, gsm8k_metric
from tqdm import tqdm
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot
from dspy.datasets import HotPotQA

tqdm.pandas()

%load_ext rich


The rich extension is already loaded. To reload it, use:
  %reload_ext rich


## GSM8K


In [3]:
turbo = ds.OpenAI(model="gpt-3.5-turbo-instruct", max_tokens=250)
ds.settings.configure(lm=turbo, log_openai_usage=True)

In [17]:
gsm8k = GSM8K()

train_gsm8k, dev_gsm8k = gsm8k.train[:10], gsm8k.dev[:10]

print(train_gsm8k)


100%|██████████| 7473/7473 [00:00<00:00, 26876.81it/s]
100%|██████████| 1319/1319 [00:00<00:00, 28036.56it/s]


In [28]:
print(train_gsm8k[0])

### Define the modules


In [33]:
class CoT(ds.Module):
    def __init__(self):
        super().__init__()
        self.prog = ds.ChainOfThought("question -> answer")

    def forward(self, question):
        return self.prog(question=question)

In [43]:
teleprompter = BootstrapFewShot(
    metric=gsm8k_metric, max_bootstrapped_demos=4, max_labeled_demos=4
)

optimized_cot = teleprompter.compile(CoT(), trainset=train_gsm8k, valset=dev_gsm8k)


 50%|█████     | 5/10 [00:08<00:08,  1.67s/it]


Bootstrapped 4 full traces after 6 examples in round 0.



prog = [1;35mChainOfThought[0m[1m([0m[1;35mStringSignature[0m[1m([0mquestion -> answer
    [33minstructions[0m=[32m'Given the fields `question`, produce the fields `answer`.'[0m
    question = [1;35mField[0m[1m([0m[33mannotation[0m=[35mstr[0m [33mrequired[0m=[3;92mTrue[0m [33mjson_schema_extra[0m=[1m{[0m[32m'__dspy_field_type'[0m: [32m'input'[0m, [32m'prefix'[0m: [32m'Question:'[0m, [32m'desc'[0m: [32m'$[0m[32m{[0m[32mquestion[0m[32m}[0m[32m'[0m[1m}[0m[1m)[0m
    answer = [1;35mField[0m[1m([0m[33mannotation[0m=[35mstr[0m [33mrequired[0m=[3;92mTrue[0m [33mjson_schema_extra[0m=[1m{[0m[32m'__dspy_field_type'[0m: [32m'output'[0m, [32m'prefix'[0m: [32m'Answer:'[0m, [32m'desc'[0m: [32m'$[0m[32m{[0m[32manswer[0m[32m}[0m[32m'[0m[1m}[0m[1m)[0m
[1m)[0m[1m)[0m

In [52]:
optimized_cot(question="What is the capital of France?")


[1;35mPrediction[0m[1m([0m
    [33mrationale[0m=[32m'find the answer. First, we know that the country is France. Then, we know that the capital is the most important city in a country. Therefore, the capital of France is Paris.'[0m,
    [33manswer[0m=[32m'Paris'[0m
[1m)[0m

In [53]:
evalute = Evaluate(
    devset=dev_gsm8k,
    metric=gsm8k_metric,
    num_threads=4,
    display_progress=True,
    display_table=1,
)

In [54]:
evalute(optimized_cot)

Average Metric: 9 / 10  (90.0): 100%|██████████| 10/10 [00:04<00:00,  2.46it/s]


Average Metric: 9 / 10  (90.0%)


  df.loc[:, metric_name] = df[metric_name].apply(


Unnamed: 0,question,gold_reasoning,example_answer,rationale,pred_answer,gsm8k_metric
0,"20 birds migrate on a seasonal basis from one lake to another, searching for food. If they fly from lake Jim to lake Disney in...",The birds' flying distance between Lake Jim through lake Disney to lake London is 50+60 = <<50+60=110>>110 miles. Since each bird flies the 110 miles...,2200,"calculate the combined distance traveled by the birds. First, we know that the birds flew 50 miles from lake Jim to lake Disney and then...",2200 miles,✔️ [True]


[1;36m90.0[0m

In [60]:
turbo.inspect_history()




Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?
Reasoning: Let's think step by step in order to find Marion's score. We know that Ella got 4 incorrect answers, which means she got 36 correct answers out of 40. We also know that Marion got 6 more than half of Ella's score, which is 6 more than 36/2 = 18. Therefore, Marion's score is 18 + 6 = 24.
Answer: 24

---

Question: Bridget counted 14 shooting stars in the night sky. Reginald counted two fewer shooting stars than did Bridget, but Sam counted four more shooting stars than did Reginald. How many more shooting stars did Sam count in the night sky than was the average n

[32m"\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nQuestion: $[0m[32m{[0m[32mquestion[0m[32m}[0m[32m\nReasoning: Let's think step by step in order to $[0m[32m{[0m[32mproduce the answer[0m[32m}[0m[32m. We ...\nAnswer: $[0m[32m{[0m[32manswer[0m[32m}[0m[32m\n\n---\n\nQuestion: The result from the 40-item Statistics exam Marion and Ella took already came out. Ella got 4 incorrect answers while Marion got 6 more than half the score of Ella. What is Marion's score?\nReasoning: Let's think step by step in order to find Marion's score. We know that Ella got 4 incorrect answers, which means she got 36 correct answers out of 40. We also know that Marion got 6 more than half of Ella's score, which is 6 more than 36/2 = 18. Therefore, Marion's score is 18 + 6 = 24.\nAnswer: 24\n\n---\n\nQuestion: Bridget counted 14 shooting stars in the night sky. Reginald counted two fewer shooting stars than did Bridget, but Sam

## With retrieval module


In [5]:
turbo = ds.OpenAI(model="gpt-3.5-turbo")
colbertv2_wiki17_abstracts = ds.ColBERTv2(
    url="http://20.102.90.50:2017/wiki17_abstracts"
)

ds.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts, log_openai_usage=True)


In [6]:
dataset_hpqa = HotPotQA(
    train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0
)

  table = cls._concat_blocks(blocks, axis=0)


In [24]:
trainset_hpqa = [x.with_inputs("question") for x in dataset_hpqa.train]
devset_hpqa = [x.with_inputs("question") for x in dataset_hpqa.dev]

len(trainset_hpqa), len(devset_hpqa)


[1m([0m[1;36m20[0m, [1;36m50[0m[1m)[0m

In [25]:
print(trainset_hpqa[2])

In [26]:
print(devset_hpqa[0])

### Signature

In [27]:
class BasicQA(ds.Signature):
    """Answer questions with short factoid layers"""

    question = ds.InputField()
    answer = ds.OutputField(desc='often between 1 and 5 words')

In [28]:
BasicQA


[1;35mBasicQA[0m[1m([0mquestion -> answer
    [33minstructions[0m=[32m'Answer questions with short factoid layers'[0m
    question = [1;35mField[0m[1m([0m[33mannotation[0m=[35mstr[0m [33mrequired[0m=[3;92mTrue[0m [33mjson_schema_extra[0m=[1m{[0m[32m'__dspy_field_type'[0m: [32m'input'[0m, [32m'prefix'[0m: [32m'Question:'[0m, [32m'desc'[0m: [32m'$[0m[32m{[0m[32mquestion[0m[32m}[0m[32m'[0m[1m}[0m[1m)[0m
    answer = [1;35mField[0m[1m([0m[33mannotation[0m=[35mstr[0m [33mrequired[0m=[3;92mTrue[0m [33mjson_schema_extra[0m=[1m{[0m[32m'desc'[0m: [32m'often between 1 and 5 words'[0m, [32m'__dspy_field_type'[0m: [32m'output'[0m, [32m'prefix'[0m: [32m'Answer:'[0m[1m}[0m[1m)[0m
[1m)[0m

In [29]:
generate_answer = ds.Predict(BasicQA)

In [41]:
pred = generate_answer(question=devset_hpqa[3].question)

In [42]:
print(f'Question: {devset_hpqa[3].question}')
print(f'Answer: {pred.answer}')

In [43]:
turbo.inspect_history(n=3)




Answer questions with short factoid layers

---

Follow the following format.

Question: ${question}
Answer: often between 1 and 5 words

---

Question: What river is near the Crichton Collegiate Church?
Answer: River Nith





[32m'\n\n\nAnswer questions with short factoid layers\n\n---\n\nFollow the following format.\n\nQuestion: $[0m[32m{[0m[32mquestion[0m[32m}[0m[32m\nAnswer: often between 1 and 5 words\n\n---\n\nQuestion: What river is near the Crichton Collegiate Church?\nAnswer:\x1b[0m[32m[[0m[32m32m River Nith\x1b[0m[32m[[0m[32m0m\n\n\n'[0m

In [49]:
generate_cot = ds.ChainOfThought(BasicQA)

pred = generate_cot(question=devset_hpqa[14].question)

print(f'Question: {devset_hpqa[14].question}')
print(f'Thought: {pred.rationale.split(".", 1)[1].strip()}')
print(f'Answer: {pred.answer}')


In [62]:
retrieve = ds.Retrieve(k=3)

top_k_passages = retrieve(devset_hpqa[14].question).passages

print(
    f"Top {retrieve.k} passages for question: {devset_hpqa[14].question} \n",
    "-" * 30,
    "\n",
)

for idx, passage in enumerate(top_k_passages):
    print(f"{idx+1}]", passage, "\n")


In [63]:
retrieve("Who is the president of the United States?").passages


[1m[[0m
    [32m'President of the United States [0m[32m([0m[32mdisambiguation[0m[32m)[0m[32m | The President of the United States [0m[32m([0m[32mof America[0m[32m)[0m[32m has been chief of the United States executive branch since 1789. The current U.S. President is Donald Trump.'[0m,
    [32m'President of the United States | The President of the United States [0m[32m([0m[32minformally referred to as "POTUS"[0m[32m)[0m[32m is the head of state and head of government of the United States. The president directs the executive branch of the federal government and is the commander-in-chief of the United States Armed Forces.'[0m,
    [32m'List of Presidents of the United States | The President of the United States is the elected head of state and head of government of the United States. The president leads the executive branch of the federal government and is the commander-in-chief of the United States Armed Forces. The president is indirectly elected to a four