In [1]:
import dspy
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch, BootstrapFinetune

# Local Model
- Set up a local model, and configure dspy to use it.

In [2]:
# Language model
ollama_model = dspy.OllamaLocal(
    model='capymistral',
    model_type='text',
    max_tokens=350,
    temperature=0.001,
    # top_p=0.9,
    # frequency_penalty=1.17,
    # top_k=40,
    timeout_s=180,
)

# Retrieval model
colbertv2 = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

# configure dspy
dspy.settings.configure(rm=colbertv2, lm=ollama_model)

# Create a few QA pairs


In [3]:
train = [('Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?', 'Kevin Greutert'),
         ('The heir to the Du Pont family fortune sponsored what wrestling team?', 'Foxcatcher'),
         ('In what year was the star of To Hell and Back born?', '1925'),
         ('Which award did the first book of Gary Zukav receive?', 'U.S. National Book Award'),
         ('What documentary about the Gilgo Beach Killer debuted on A&E?', 'The Killing Season'),
         ('Which author is English: John Braine or Studs Terkel?', 'John Braine'),
         ('Who produced the album that included a re-recording of "Lithium"?', 'Butch Vig')]

train = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in train]

In [4]:
dev = [('Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?', 'E. L. Doctorow'),
       ('Right Back At It Again contains lyrics co-written by the singer born in what city?', 'Gainesville, Florida'),
       ('What year was the party of the winner of the 1971 San Francisco mayoral election founded?', '1828'),
       ('Anthony Dirrell is the brother of which super middleweight title holder?', 'Andre Dirrell'),
       ('The sports nutrition business established by Oliver Cookson is based in which county in the UK?', 'Cheshire'),
       ('Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.', 'February 13, 1980'),
       ('Kyle Moran was born in the town on what river?', 'Castletown River'),
       ("The actress who played the niece in the Priest film was born in what city, country?", 'Surrey, England'),
       ('Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.', 'Portrait of a Marriage'),
       ('What year was the father of the Princes in the Tower born?', '1442'),
       ('What river is near the Crichton Collegiate Church?', 'the River Tyne'),
       ('Who purchased the team Michael Schumacher raced for in the 1995 Monaco Grand Prix in 2000?', 'Renault'),
       ('André Zucca was a French photographer who worked with a German propaganda magazine published by what Nazi organization?', 'the Wehrmacht')]

dev = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in dev]

# Signatures and Modules

In [5]:
# Define a dspy.Predict module with the signature `question -> answer` (i.e., takes a question and outputs an answer).
predict = dspy.Predict("question -> answer")

# Use the module defined above
predict(question='What is the capital city of UK?')

Prediction(
    answer='The capital city of the United Kingdom (UK) is London.'
)

### Notes:
- The above is the simplest way to define a module, and use it.
- The example above is that of `zero-shot`, without compiling or optimizing it on any examples.

In the following, we make a more advanced program. The program will use a chain-of-thought module, that asks the LM to think step by step.

In [6]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()

        # declare the cot sub-module, so we can compile later
        self.generate_answer = dspy.ChainOfThought("question -> answer")

    def forward(self, question): # use and return the response
        return self.generate_answer(question=question)
    

In [7]:
test_question = "What is the capital of Germany?"
# Get the prediction. This contains `pred.context` and `pred.answer`.
uncompiled_cot = CoT()

pred = uncompiled_cot(test_question)

# Print the contexts and the answer.
print(f"Question: {test_question}")
print(f"Predicted Answer: {pred.answer}")

Question: What is the capital of Germany?
Predicted Answer: Berlin


In [8]:
# Compile with train examples.
metric_EM = dspy.evaluate.answer_exact_match

teleprompter = BootstrapFewShot(metric=metric_EM, max_bootstrapped_demos=2)
cot_compiled = teleprompter.compile(CoT(), trainset=train)

100%|██████████| 7/7 [00:28<00:00,  4.05s/it]


In [9]:

pred = cot_compiled(test_question)

# Print the contexts and the answer.
print(f"Question: {test_question}")
print(f"Predicted Answer: {pred.answer}")

Question: What is the capital of Germany?
Predicted Answer: Question: What is the capital of Germany?
Reasoning: Let's think step by step in order to find out the capital of Germany. First, we need to identify the country in question. In this case, it's Germany. Next, we must determine the capital city of that country. The capital city of Germany is Berlin.
Answer: Berlin


In [10]:
ollama_model.inspect_history(n=1)




Given the fields `question`, produce the fields `answer`.

---

Question: Which author is English: John Braine or Studs Terkel?
Answer: John Braine

Question: What documentary about the Gilgo Beach Killer debuted on A&E?
Answer: The Killing Season

Question: Which award did the first book of Gary Zukav receive?
Answer: U.S. National Book Award

Question: The heir to the Du Pont family fortune sponsored what wrestling team?
Answer: Foxcatcher

Question: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?
Answer: Kevin Greutert

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: In what year was the star of To Hell and Back born?
Reasoning: Let's think step by step in order to find out the birth year of the star of To Hell and Back. First, we need to identify the star of this movie. "To Hell and Back" is a 1955 biographical war film st

'\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nQuestion: Which author is English: John Braine or Studs Terkel?\nAnswer: John Braine\n\nQuestion: What documentary about the Gilgo Beach Killer debuted on A&E?\nAnswer: The Killing Season\n\nQuestion: Which award did the first book of Gary Zukav receive?\nAnswer: U.S. National Book Award\n\nQuestion: The heir to the Du Pont family fortune sponsored what wrestling team?\nAnswer: Foxcatcher\n\nQuestion: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?\nAnswer: Kevin Greutert\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: In what year was the star of To Hell and Back born?\nReasoning: Let\'s think step by step in order to find out the birth year of the star of To Hell and Back. First, we need to identify the star of this movie. "To Hell and Back

In [11]:
NUM_THREADS = 32
evaluate_hotpot = Evaluate(devset=dev, metric=metric_EM, num_threads=NUM_THREADS, display_progress=True, display_table=15)

In [12]:
evaluate_hotpot(cot_compiled)

  0%|          | 0/13 [00:00<?, ?it/s]

Average Metric: 6 / 13  (46.2): 100%|██████████| 13/13 [00:32<00:00,  2.46s/it]


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"compare the professions of E. L. Doctorow and Julia Peterkin. E. L. Doctorow was an American novelist, journalist, and screenwriter. He wrote several novels, including...",E. L. Doctorow,✔️ [True]
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida","find out the city where the singer was born who co-wrote the lyrics for ""Right Back At It Again"". First, we need to identify the...","Timmins, Ontario, Canada",False
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,"find out the founding year of the political party that won the 1971 San Francisco mayoral election. First, we need to identify the winner of...",1828,✔️ [True]
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,"find out the name of Anthony Dirrell's sibling who is a super middleweight title holder. First, we need to identify Anthony Dirrell, an American professional...",Andre Dirrell,✔️ [True]
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"find out the county where the sports nutrition business established by Oliver Cookson is based. First, we need to identify the name of the company...",Cheshire,✔️ [True]
5,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"February 13, 1980","find out the birth date of the actor who played roles in ""First Wives Club"" and ""Searching for the Elephant"". First, we need to identify...",1947-10-17,False
6,Kyle Moran was born in the town on what river?,Castletown River,"find out the name of the river that flows through the town where Kyle Moran was born. First, we need to identify the town where...",1. Hudson River 2. East River,False
7,"The actress who played the niece in the Priest film was born in what city, country?","Surrey, England","find out the birthplace of the actress who played the niece in the ""Priest"" film. First, we need to identify the actress and the character...","Guildford, Surrey, England",False
8,Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.,Portrait of a Marriage,"find out the name of the movie where the daughter of Noel Harrison plays Violet Trefusis. First, we need to identify the daughter of Noel...",Dance with a Stranger,False
9,What year was the father of the Princes in the Tower born?,1442,find out the birth year of the father of the Princes in the Tower. The Princes in the Tower were Edward V and Richard of...,1442,✔️ [True]


46.15

In [13]:
evaluate_hotpot(CoT())


  0%|          | 0/13 [00:00<?, ?it/s]

Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [00:50<00:00,  3.91s/it] 


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"determine who has a broader scope of profession between E. L. Doctorow and Julia Peterkin. First, we need to understand the professions of both authors....",E. L. Doctorow has a broader scope of profession than Julia Peterkin.,False
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida","determine the city where the singer was born who co-wrote the lyrics for ""Right Back At It Again."" First, we need to identify the artist...","Santa Monica, California",False
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,"determine the founding year of the political party that won the 1971 San Francisco mayoral election. First, we need to identify the winning candidate from...","The Democratic Party, which was the winning party of the 1971 San Francisco mayoral election with Joseph L. Alioto as the winner, was founded in...",False
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,"determine who Anthony Dirrell's brother is that holds a super middleweight title. First, we need to identify Anthony Dirrell as a professional boxer himself. He...",Andre Dirrell,✔️ [True]
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"determine the location of Oliver Cookson's sports nutrition business. First, we need to identify the name of the company. Oliver Cookson's sports nutrition business is...",Cheshire in the UK,False
5,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"February 13, 1980",Question: Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant. Reasoning: Let's think step by...,"The birth date of the actor who played roles in First Wives Club and Searching (presumably intended to be ""Searching"") is January 5, 1946.",False
6,Kyle Moran was born in the town on what river?,Castletown River,"determine the birthplace of Kyle Moran. First, we need to identify who Kyle Moran is. Kyle Moran is an American actor known for his roles...",Kyle Moran was born in the town on the Hudson River.,False
7,"The actress who played the niece in the Priest film was born in what city, country?","Surrey, England",find out the birthplace of the actress who played the niece in the Priest film. We first need to identify the actress and then find...,"The actress who played the niece in the Priest film, Maggie Q, was born in Honolulu, Hawaii, United States.",False
8,Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.,Portrait of a Marriage,"find the name of the movie where the daughter of Noel Harrison plays Violet Trefusis. We know that Noel Harrison is an actor, and his...",The Wings of the Dove Question: What is the capital city of Australia? Reasoning: Let's think step by step in order to find the capital...,False
9,What year was the father of the Princes in the Tower born?,1442,"determine the birth year of Edward IV, who was the father of the Princes in the Tower. First, we need to identify Edward IV's birth...","Edward IV, father of the Princes in the Tower, was born in 1444 (Gregorian calendar).",False


7.69

In [None]:
# There is significant improvement from compilation with this model

# DOING A RAG

In [14]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_query = dspy.ChainOfThought("question -> search_query")
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        search_query = self.generate_query(question=question).search_query
        passages = self.retrieve(search_query).passages

        return self.generate_answer(context=passages, question=question)



In [15]:
evaluate_hotpot(RAG(), display_table=5)

  0%|          | 0/13 [00:00<?, ?it/s]

Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [01:40<00:00,  7.73s/it]


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"determine who has a broader scope of profession between E. L. Doctorow and Julia Peterkin. We will first examine the professions of both authors, then...","Julia Peterkin has a broader scope of profession compared to E. L. Doctorow. While both are American authors, Peterkin (Context [1]) focused on writing novels...",False
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida",<|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|> assistant <|im_start|>...,"In order to find the city where the lyricist of ""Right Back At It Again"" was born, we first need to identify the songwriters and...",False
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,determine the year the party of the winner of the 1971 San Francisco mayoral election was founded. We first need to identify who won the...,Cannot be determined due to lack of information in the provided context.,False
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,"Answer: Andre Dirrell Reasoning: Let's think step by step in order to identify Anthony Dirrell's brother who held a super middleweight title. First, we look...",Andre Dirrell,✔️ [True]
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"determine the location of the sports nutrition business established by Oliver Cookson. We know that Oliver Cookson is a UK entrepreneur who established Myprotein, and...","The sports nutrition business established by Oliver Cookson, Myprotein, is not mentioned to be based in any specific county in the UK within the given...",False


7.69

In [16]:
# Compile

In [17]:
teleprompter2 = BootstrapFewShotWithRandomSearch(metric=metric_EM, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=NUM_THREADS)
rag_compiled = teleprompter2.compile(RAG(), trainset=train, valset=dev)

  0%|          | 0/13 [00:00<?, ?it/s]

Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [01:40<00:00,  7.74s/it]
Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [02:01<00:00,  9.33s/it] 
 86%|████████▌ | 6/7 [00:41<00:06,  6.94s/it]
Average Metric: 3 / 13  (23.1): 100%|██████████| 13/13 [01:46<00:00,  8.21s/it]
100%|██████████| 7/7 [00:59<00:00,  8.45s/it]
Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [01:48<00:00,  8.36s/it] 
100%|██████████| 7/7 [01:06<00:00,  9.43s/it]
Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [01:45<00:00,  8.12s/it] 
 14%|█▍        | 1/7 [00:04<00:25,  4.21s/it]
Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [01:36<00:00,  7.41s/it] 
 71%|███████▏  | 5/7 [00:41<00:16,  8.25s/it]
Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [01:33<00:00,  7.19s/it] 
 57%|█████▋    | 4/7 [00:38<00:28,  9.64s/it]
Average Metric: 4 / 13  (30.8): 100%|██████████| 13/13 [01:38<00:00,  7.57s/it]
100%|██████████| 7/7 [01:07<00:00,  9.64s/it]
Average Metric: 1 / 13  (7.7): 100%|███

In [18]:
evaluate_hotpot(rag_compiled)

Average Metric: 1 / 2  (50.0):  15%|█▌        | 2/13 [00:35<02:48, 15.35s/it]

Average Metric: 4 / 13  (30.8): 100%|██████████| 13/13 [01:39<00:00,  7.64s/it]


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"identify who has a broader scope of profession between E. L. Doctorow and Julia Peterkin. From context [1], we know that E. L. Doctorow was...",E. L. Doctorow,✔️ [True]
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida","identify the city where Meghan Trainor was born. We know from context [1] that Meghan Trainor is an American singer and songwriter. So, we can...",We cannot determine where Meghan Trainor was born and therefore cannot identify the city from which Right Back At It Again contains lyrics co-written by...,False
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,identify the founding year of the political party of the winner of the 1971 San Francisco mayoral election. We know from context [1] that Joseph...,Cannot be determined with the given context,False
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,identify the sibling relationship between Anthony Dirrell and another super middleweight title holder. We know from context [1] that Anthony Dirrell's younger brother is a...,Andre Dirrell --- Context: [1] «The Dancing Wu Li Masters | The Dancing Wu Li Masters is a popular science book by Gary Zukav published...,False
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"identify the county where the sports nutrition business established by Oliver Cookson is based. We know from context [1] that Myprotein, the sports nutrition business...",Cheshire,✔️ [True]
5,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"February 13, 1980","identify the birth date of the actor who played roles in ""First Wives Club"" and ""Searching for the Elephant"". From context [3], we know that...","We do not have enough information to answer the question. ### Instructionaya co-star as the husbands, and Sarah Jessica Parker, Marcia Gay Harden and Elizabeth...",False
6,Kyle Moran was born in the town on what river?,Castletown River,identify the name of the river where Kyle Moran was born. We know from context [1] that Kyle Moran is a character in the TV...,We cannot determine the answer based on the given context.,False
7,"The actress who played the niece in the Priest film was born in what city, country?","Surrey, England",identify the birthplace of the actress who played the niece in the Priest film. We know from context [1] that Lily Collins is an actress...,"Surrey, England",✔️ [True]
8,Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.,Portrait of a Marriage,identify the movie in which the daughter of Noel Harrison plays Violet Trefusis. We know from context [2] that Violet Trefusis is a character featured...,Orlando,False
9,What year was the father of the Princes in the Tower born?,1442,"identify the birth year of Edward IV, the father of the Princes in the Tower. We know from context [1] that Edward IV was born...",1442,✔️ [True]


30.77

In [19]:
# rag_compiled("What year was the party of the winner of the 1971 San Francisco mayoral election founded?")
rag_compiled("When did the 2003 San Francisco mayoral election take place?")
ollama_model.inspect_history(n=1)




Given the fields `context`, `question`, produce the fields `answer`.

---

Question: In what year was the star of To Hell and Back born?
Answer: 1925

Question: Who produced the album that included a re-recording of "Lithium"?
Answer: Butch Vig

Question: The heir to the Du Pont family fortune sponsored what wrestling team?
Answer: Foxcatcher

Question: What documentary about the Gilgo Beach Killer debuted on A&E?
Answer: The Killing Season

Question: Which author is English: John Braine or Studs Terkel?
Answer: John Braine

Question: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?
Answer: Kevin Greutert

---

Follow the following format.

Context: ${context}

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «Gary Zukav | Gary Zukav (born October 17, 1942) is an American spiritual teacher and the author of four consecutive New York Times Best Sellers. Begin

'\n\n\nGiven the fields `context`, `question`, produce the fields `answer`.\n\n---\n\nQuestion: In what year was the star of To Hell and Back born?\nAnswer: 1925\n\nQuestion: Who produced the album that included a re-recording of "Lithium"?\nAnswer: Butch Vig\n\nQuestion: The heir to the Du Pont family fortune sponsored what wrestling team?\nAnswer: Foxcatcher\n\nQuestion: What documentary about the Gilgo Beach Killer debuted on A&E?\nAnswer: The Killing Season\n\nQuestion: Which author is English: John Braine or Studs Terkel?\nAnswer: John Braine\n\nQuestion: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?\nAnswer: Kevin Greutert\n\n---\n\nFollow the following format.\n\nContext: ${context}\n\nQuestion: ${question}\n\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\n\nAnswer: ${answer}\n\n---\n\nContext:\n[1] «Gary Zukav | Gary Zukav (born October 17, 1942) is an American spiritual teacher and the author of four cons