In [1]:
import dspy

mini = dspy.OpenAI(model='gpt-4o-mini')
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

dspy.settings.configure(lm=mini, rm=colbertv2_wiki17_abstracts)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

  table = cls._concat_blocks(blocks, axis=0)


(20, 50)

In [3]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [4]:
class GenerateSearchQuery(dspy.Signature):
    """Write a simple search query that will help answer a complex question"""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    query = dspy.OutputField()

In [5]:
from dsp.utils import deduplicate

class SimplifiedBaleed(dspy.Module):
    def __init__(self, passages_per_hop = 3, max_hops = 2):
        super().__init__()

        self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
        self.retrieve = dspy.Retrieve(k = passages_per_hop)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
        self.max_hops = max_hops

    def forward(self, question):
        context = []

        for hop in range(self.max_hops):
            query = self.generate_query[hop](context = context, question = question).query
            passages = self.retrieve(query).passages
            context = deduplicate(context + passages)

        pred = self.generate_answer(context = context, question = question)
        return dspy.Prediction(context = context, answer = pred.answer)

In [7]:
from dotenv import load_dotenv
load_dotenv()

True

In [8]:
my_question = "What date was the original Evil Dead released?"

uncompiled_baleen = SimplifiedBaleed()
pred = uncompiled_baleen(my_question)

print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")

Question: What date was the original Evil Dead released?
Predicted Answer: 1981
Retrieved Contexts (truncated): ['Evil Dead (2013 film) | Evil Dead is a 2013 American supernatural horror film directed by Fede Alvarez (in his directorial debut), written by Rodo Sayagues and Alvarez and produced by Bruce Campbell, ...', 'The Evil Dead (disambiguation) | The Evil Dead is a 1981 horror film and a franchise of sequels and other media....', 'The Evil Dead | The Evil Dead is a 1981 American supernatural horror film written and directed by Sam Raimi and executive produced by Raimi and Bruce Campbell, who also stars alongside Ellen Sandweiss...', 'Evil Dead II | Evil Dead II (also known in publicity materials as Evil Dead 2: Dead by Dawn) is a 1987 American horror comedy film directed by Sam Raimi and a parody sequel to the 1981 horror film "Th...']


In [9]:
mini.inspect_history(n=3)





Write a simple search query that will help answer a complex question

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the query}. We ...

Query: ${query}

---

Context: N/A

Question: What date was the original Evil Dead released?

Reasoning: Let's think step by step in order to[32m find the release date of the original Evil Dead movie. We need to identify the title of the movie and then search for its release information.

Query: "Evil Dead original release date"[0m







Write a simple search query that will help answer a complex question

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the query}. We ...

Query: ${query}

---

Context:
[1] «Evil Dead (2013 film) | Evil Dead is a 2013 American supernatural horror film directed by Fede Alvarez (in his directorial debut), 

In [10]:
def validate_context_and_answer_and_hops(example, pred, trace=None):
    if not dspy.evaluate.answer_exact_match(example, pred):
        return False
    if not dspy.evaluate.answer_passage_match(example, pred):
        return False
    
    hops = [example.question] + [outputs.query for *_, outputs in trace if 'query' in outputs]

    if max([len(h) for h in hops]) > 100:
        return False
    if any(dspy.evaluate.answer_exact_match_str(hops[idx], hops[:idx], frac = 0.8) for idx in range(2, len(hops))):
        return False
    
    return True

In [11]:
from dspy.teleprompt import BootstrapFewShot

teleprompter = BootstrapFewShot(metric = validate_context_and_answer_and_hops)
compiled_baleen = teleprompter.compile(SimplifiedBaleed(), teacher = SimplifiedBaleed(passages_per_hop=2), trainset = trainset)

 90%|█████████ | 18/20 [01:05<00:07,  3.61s/it]

Bootstrapped 4 full traces after 19 examples in round 0.





In [12]:
from dspy.evaluate.evaluate import Evaluate

# Define metric to check if we retrieved the correct documents
def gold_passages_retrieved(example, pred, trace=None):
    gold_titles = set(map(dspy.evaluate.normalize_text, example["gold_titles"]))
    found_titles = set(
        map(dspy.evaluate.normalize_text, [c.split(" | ")[0] for c in pred.context])
    )
    return gold_titles.issubset(found_titles)

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)


In [13]:
uncompiled_baleen_retrieval_score = evaluate_on_hotpotqa(uncompiled_baleen, metric=gold_passages_retrieved, display = False)
compiled_baleen_retrieval_score = evaluate_on_hotpotqa(compiled_baleen, metric=gold_passages_retrieved)

print(f"## Retrieval Score for uncompiled Baleen: {uncompiled_baleen_retrieval_score}")
print(f"## Retrieval Score for compiled Baleen: {compiled_baleen_retrieval_score}")

Average Metric: 34 / 50  (68.0): 100%|██████████| 50/50 [02:52<00:00,  3.46s/it]

Average Metric: 34 / 50  (68.0%)



  df.loc[:, metric_name] = df[metric_name].apply(


Unnamed: 0,question,example_answer,gold_titles,context,pred_answer,gold_passages_retrieved
0,Are both Cangzhou and Qionghai in the Hebei province of China?,no,"{'Qionghai', 'Cangzhou'}","['Cangzhou | Cangzhou () is a prefecture-level city in eastern Hebei province, People\'s Republic of China. At the 2010 census, Cangzhou\'s built-up (""or metro"") area...",No,✔️ [True]
1,Who conducts the draft in which Marc-Andre Fleury was drafted to the Vegas Golden Knights for the 2017-18 season?,National Hockey League,"{'2017–18 Pittsburgh Penguins season', '2017 NHL Expansion Draft'}","[""Marc-André Fleury | Marc-André Fleury (born November 28, 1984) is a French-Canadian professional ice hockey goaltender playing for the Vegas Golden Knights of the National...",National Hockey League,✔️ [True]
2,"The Wings entered a new era, following the retirement of which Canadian retired professional ice hockey player and current general manager of the Tampa Bay...",Steve Yzerman,"{'2006–07 Detroit Red Wings season', 'Steve Yzerman'}","['Steve Yzerman | Stephen Gregory ""Steve"" Yzerman ( ; born May 9, 1965) is a Canadian retired professional ice hockey player and current general manager...",Steve Yzerman,✔️ [True]
3,What river is near the Crichton Collegiate Church?,the River Tyne,"{'Crichton Castle', 'Crichton Collegiate Church'}","[""Crichton Collegiate Church | Crichton Collegiate Church is situated about 0.6 mi south west of the hamlet of Crichton in Midlothian, Scotland. Crichton itself is...",River Tyne,✔️ [True]
4,In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king?,King Alfred the Great,"{'Ealhswith', 'Æthelweard (son of Alfred)'}","[""Æthelfrith of Mercia | Æthelfrith (fl. 880s – c. 904/915) was an ealdorman of southern Mercia, who flourished in the last two decades of the...",King Alfred the Great,False


## Retrieval Score for uncompiled Baleen: 70.0
## Retrieval Score for compiled Baleen: 68.0
