In [1]:
import dspy
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch, BootstrapFinetune

# Local Model
- Set up a local model, and configure dspy to use it.

In [2]:
# Language model
ollama_model = dspy.OllamaLocal(
    model='mixtral:latest',
    model_type='text',
    max_tokens=350,
    temperature=0.2,
    top_p=0.9,
    frequency_penalty=1.17,
    top_k=40,
    timeout_s=180,
    nohistory=True
)

# Retrieval model
colbertv2 = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

# configure dspy
dspy.settings.configure(rm=colbertv2, lm=ollama_model)

# Create a few QA pairs


In [3]:
train = [('Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?', 'Kevin Greutert'),
         ('The heir to the Du Pont family fortune sponsored what wrestling team?', 'Foxcatcher'),
         ('In what year was the star of To Hell and Back born?', '1925'),
         ('Which award did the first book of Gary Zukav receive?', 'U.S. National Book Award'),
         ('What documentary about the Gilgo Beach Killer debuted on A&E?', 'The Killing Season'),
         ('Which author is English: John Braine or Studs Terkel?', 'John Braine'),
         ('Who produced the album that included a re-recording of "Lithium"?', 'Butch Vig')]

train = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in train]

In [4]:
dev = [('Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?', 'E. L. Doctorow'),
       ('Right Back At It Again contains lyrics co-written by the singer born in what city?', 'Gainesville, Florida'),
       ('What year was the party of the winner of the 1971 San Francisco mayoral election founded?', '1828'),
       ('Anthony Dirrell is the brother of which super middleweight title holder?', 'Andre Dirrell'),
       ('The sports nutrition business established by Oliver Cookson is based in which county in the UK?', 'Cheshire'),
       ('Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.', 'February 13, 1980'),
       ('Kyle Moran was born in the town on what river?', 'Castletown River'),
       ("The actress who played the niece in the Priest film was born in what city, country?", 'Surrey, England'),
       ('Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.', 'Portrait of a Marriage'),
       ('What year was the father of the Princes in the Tower born?', '1442'),
       ('What river is near the Crichton Collegiate Church?', 'the River Tyne'),
       ('Who purchased the team Michael Schumacher raced for in the 1995 Monaco Grand Prix in 2000?', 'Renault'),
       ('André Zucca was a French photographer who worked with a German propaganda magazine published by what Nazi organization?', 'the Wehrmacht')]

dev = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in dev]

# Signatures and Modules

In [5]:
# Define a dspy.Predict module with the signature `question -> answer` (i.e., takes a question and outputs an answer).
predict = dspy.Predict("question -> answer")

# Use the module defined above
predict(question="What is the capital of Vatican City?")

Prediction(
    answer="Answer: The capital of Vatican City is Vatican City itself, as it is a city-state and its sovereign entity. However, if you're referring to the independent city-state's governing body, it is known as the Holy See, which has its seat in Vatican City."
)

### Notes:
- The above is the simplest way to define a module, and use it.
- The example above is that of `zero-shot`, without compiling or optimizing it on any examples.

In the following, we make a more advanced program. The program will use a chain-of-thought module, that asks the LM to think step by step.

In [6]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()

        # declare the cot sub-module, so we can compile later
        self.generate_answer = dspy.ChainOfThought("question -> answer")

    def forward(self, question): # use and return the response
        return self.generate_answer(question=question)
    

In [7]:
test_question = "What is the capital of Germany?"
# Get the prediction. This contains `pred.context` and `pred.answer`.
uncompiled_cot = CoT()

pred = uncompiled_cot(test_question)

# Print the contexts and the answer.
print(f"Question: {test_question}")
print(f"Predicted Answer: {pred.answer}")

Question: What is the capital of Germany?
Predicted Answer: Question: What is the capital of Germany?
Reasoning: Let's think step by step in order to Answer this question. The capital of Germany is Berlin. Here's how we know this: 1. Germany is a country in Europe with many states and cities. 2. Among these, one city serves as the country's administrative center, which we call the capital. 3. After researching and verifying, we find that Berlin is the capital of Germany, established in 1990 after the reunification of East and West Germany.
Answer: Berlin


In [8]:
# Compile with train examples.
metric_EM = dspy.evaluate.answer_exact_match

teleprompter = BootstrapFewShot(metric=metric_EM, max_bootstrapped_demos=2)
cot_compiled = teleprompter.compile(CoT(), trainset=train)

100%|██████████| 7/7 [01:43<00:00, 14.83s/it]


In [9]:

pred = cot_compiled(test_question)

# Print the contexts and the answer.
print(f"Question: {test_question}")
print(f"Predicted Answer: {pred.answer}")

Question: What is the capital of Germany?
Predicted Answer: Question: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?
Reasoning: Let's think step by step in order to find the answer. We know that Kevin Greutert is a well-known film director, and he directed a thriller movie called "Saw VI" in 2009. Upon checking the cast of this movie, we can confirm that Peter Outerbridge played William Easton in this film.
Answer: Kevin Greutert

---

Question: Which award did the first book of Gary Zukav receive?
Reasoning: Let's think step by step to determine the answer. The first book written by Gary Zukav is "The Dancing Wu Li Masters," published in 1979. After research, we find that this book received the U.S. National Book Award for Science in 1980, making it a significant recognition of its contribution to popularizing science and physics concepts through an accessible narrative style.
Answer: U.S. National Book Award

---

Question: What documentary abou

In [10]:
ollama_model.inspect_history(n=1)




Given the fields `question`, produce the fields `answer`.

---

Question: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?
Answer: Kevin Greutert

Question: Which award did the first book of Gary Zukav receive?
Answer: U.S. National Book Award

Question: What documentary about the Gilgo Beach Killer debuted on A&E?
Answer: The Killing Season

Question: In what year was the star of To Hell and Back born?
Answer: 1925

Question: The heir to the Du Pont family fortune sponsored what wrestling team?
Answer: Foxcatcher

Question: Who produced the album that included a re-recording of "Lithium"?
Answer: Butch Vig

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Which author is English: John Braine or Studs Terkel?
Reasoning: Let's think step by step in order to determine the answer. We know that an author's nationality is typically t

'\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nQuestion: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?\nAnswer: Kevin Greutert\n\nQuestion: Which award did the first book of Gary Zukav receive?\nAnswer: U.S. National Book Award\n\nQuestion: What documentary about the Gilgo Beach Killer debuted on A&E?\nAnswer: The Killing Season\n\nQuestion: In what year was the star of To Hell and Back born?\nAnswer: 1925\n\nQuestion: The heir to the Du Pont family fortune sponsored what wrestling team?\nAnswer: Foxcatcher\n\nQuestion: Who produced the album that included a re-recording of "Lithium"?\nAnswer: Butch Vig\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: Which author is English: John Braine or Studs Terkel?\nReasoning: Let\'s think step by step in order to determine the answer. We know that

In [11]:
NUM_THREADS = 32
evaluate_hotpot = Evaluate(devset=dev, metric=metric_EM, num_threads=NUM_THREADS, display_progress=True, display_table=15)

In [12]:
evaluate_hotpot(cot_compiled)

  0%|          | 0/13 [00:00<?, ?it/s]

Average Metric: 3 / 13  (23.1): 100%|██████████| 13/13 [02:39<00:00, 12.29s/it]


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"Answer this question, we need to compare the professions of E.L. Doctorow and Julia Peterkin. E.L. Doctorow is known for being a novelist, short story...",E. L. Doctorow,✔️ [True]
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida",Question: Right Back At It Again contains lyrics co-written by the singer born in what city? Reasoning: Let's think step by step in order to...,Essex,False
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,"Answer this question. We know that identifying a political party's founding year can help us determine the answer. After research, we find out that the...",1828,✔️ [True]
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,"Answer: Andre Dirrell Reasoning: In professional boxing, fighters often have close relationships with their siblings who may also be involved in the sport. After checking,...",Answer: Andre Dirrell Reasoning: Let's think step by step in order to determine the answer. We know that professional boxers often have close relationships with...,False
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"Answer: Leicestershire, United Kingdom. (Myprotein, a leading sports nutrition brand, was founded by Oliver Cookson and is headquartered in Leicestershire.)","Leicestershire, United Kingdom Reasoning: Let's think step by step in order to find the answer. We start by researching Myprotein, a well-known sports nutrition brand....",False
5,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"February 13, 1980","Answer: Campbell Scott Reasoning: To find the answer, we would need to search for the actor who appeared in both ""First Wives Club"" and ""Searching...",Pat Conroy,False
6,Kyle Moran was born in the town on what river?,Castletown River,Question: Kyle Moran was born in the town on what river? Reasoning: Let's think step by step in order to determine the answer. We know...,The Charles River,False
7,"The actress who played the niece in the Priest film was born in what city, country?","Surrey, England","Answer: Lindsay Pulsipher - She was born in Salt Lake City, Utah, USA. --- Question: Who is the author of ""The Catcher in the Rye""?",J.D. Salinger,False
8,Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.,Portrait of a Marriage,"Answer: The film is ""Carrington."" After research, we find that Noel Harrison, a British actor and musician, played Leonard Woolf in the movie. His daughter,...",Carrington Reasoning: Let's think step by step in order to answer the question. We need to find the movie where Noel Harrison's daughter plays Violet...,False
9,What year was the father of the Princes in the Tower born?,1442,"determine the answer. We know that the Princes in the Tower were two sons of King Edward IV of England, who reigned from 1461 until...",1442,✔️ [True]


23.08

In [13]:
evaluate_hotpot(CoT())


  0%|          | 0/13 [00:00<?, ?it/s]

Average Metric: 2 / 13  (15.4): 100%|██████████| 13/13 [02:54<00:00, 13.45s/it]


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"Answer: E.L. Doctorow has a broader scope of profession compared to Julia Peterkin. While both authors are known for their literary contributions, E.L. Doctorow gained...",Question: Who has won more Pulitzer Prizes: E.L. Doctorow or Julia Peterkin? Reasoning: Let's think step by step in order to Answer: E. L. Doctorow...,False
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida","find the city where the singer of ""Right Back At It Again"" was born. The song is a collaboration between Ajax and Hedley, with Hedley's...","Norwich, Ontario, Canada",False
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,"Answer this question. First, we need to find out who won the 1971 San Francisco mayoral election. After some research, we find that Joseph Alioto...","The Democratic Party, to which Joseph Alioto belonged when he won the 1971 San Francisco mayoral election, was founded in 1828.",False
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,"Answer this question. First, we need to recall who the current super middleweight title holders are. Once we have that information, we can look for...",Andre Dirrell,✔️ [True]
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"Answer this question. The sports nutrition business established by Oliver Cookson, Myprotein, is based in Cheshire County in the UK. This is because according to...",Answer: Cheshire County in the UK --- Question: Which type of animal is a kangaroo? Reasoning: Let's think step by step in order to Answer...,False
5,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"February 13, 1980",,,False
6,Kyle Moran was born in the town on what river?,Castletown River,"find out the name of the town where Kyle Moran was born. According to our knowledge, Kyle Moran is an American actor and comedian. By...",The town on the Charles River.,False
7,"The actress who played the niece in the Priest film was born in what city, country?","Surrey, England","find the birthplace of the actress who played the niece in the ""Priest"" film. The actress you're referring to is Lily Collins, and according to...","Guildford, Surrey, England",False
8,Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.,Portrait of a Marriage,"Answer this question. The movie being referred to is ""Heavenly Creatures"" (1994). To arrive at this answer, we need to break down the question into...","The movie is ""Heavenly Creatures"" (1994).",False
9,What year was the father of the Princes in the Tower born?,1442,Question: What year was the father of the Princes in the Tower born? Reasoning: Let's think step by step in order to find out. The...,"Edward IV of England, the father of the Princes in the Tower, was born in the year 1442.",False


15.38

# DOING A RAG

In [14]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_query = dspy.ChainOfThought("question -> search_query")
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        search_query = self.generate_query(question=question).search_query
        passages = self.retrieve(search_query).passages

        return self.generate_answer(context=passages, question=question)



In [15]:
evaluate_hotpot(RAG(), display_table=5)

  0%|          | 0/13 [00:00<?, ?it/s]

Average Metric: 1 / 8  (12.5):  62%|██████▏   | 8/13 [04:39<01:54, 22.81s/it][2m2024-06-15T12:05:09.740322Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=180)[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=[35m147[0m
Average Metric: 1.0 / 9  (11.1):  69%|██████▉   | 9/13 [04:54<01:21, 20.47s/it][2m2024-06-15T12:05:18.908323Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=180)[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=[35m147[0m
Average Metric: 1.0 / 10  (10.0):  77%|███████▋  | 10/13 [05:03<00:50, 16.98s/it][2m2024-06-15T12:05:25.872290Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 HTTPConnectionPool(host='localhost', port=11434): Read timed ou

Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match,answer
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"determine who has a broader scope of profession between E.L. Doctorow and Julia Peterkin. First, we need to understand the professions of both individuals. From...",E. L. Doctorow has a broader scope of profession compared to Julia Peterkin.,False,
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida","answer this question. First, we need to determine which singer is associated with the song ""Right Back At It Again"" from the band A Day...",I cannot provide an answer based on the given context.,False,
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,"answer this question. First, we need to identify the winner of the 1971 San Francisco mayoral election, which is George Moscone according to the context....",The Democratic Party was founded in 1828.,False,
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,"answer this question. First, we need to identify who the super middleweight title holders mentioned in the context are. From the provided information, we have...",Andre Dirrell,✔️ [True],
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"determine the county where Myprotein, the sports nutrition business established by Oliver Cookson, is based. First, we know from the context that Myprotein is a...","The sports nutrition business established by Oliver Cookson, Myprotein, is based in the county of Cheshire in the UK.",False,


7.69

In [16]:
# Compile

In [17]:
teleprompter2 = BootstrapFewShotWithRandomSearch(metric=metric_EM, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=NUM_THREADS)
rag_compiled = teleprompter2.compile(RAG(), trainset=train, valset=dev)

  0%|          | 0/13 [00:00<?, ?it/s]

Average Metric: 0 / 7  (0.0):  54%|█████▍    | 7/13 [04:28<02:36, 26.04s/it] [2m2024-06-15T12:10:28.380351Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=180)[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=[35m147[0m
Average Metric: 0.0 / 8  (0.0):  62%|██████▏   | 8/13 [04:43<01:52, 22.57s/it][2m2024-06-15T12:10:36.396316Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=180)[0m [[0m[1m[34mdspy.evaluate.evaluate[0m][0m [36mfilename[0m=[35mevaluate.py[0m [36mlineno[0m=[35m147[0m
Average Metric: 0.0 / 9  (0.0):  69%|██████▉   | 9/13 [04:51<01:12, 18.02s/it][2m2024-06-15T12:10:45.284236Z[0m [[31m[1merror    [0m] [1mError for example in dev set: 		 HTTPConnectionPool(host='localhost', port=11434): Read timed out. (

In [19]:
evaluate_hotpot(rag_compiled)

  0%|          | 0/13 [00:00<?, ?it/s]

ReadTimeout: HTTPConnectionPool(host='localhost', port=11434): Read timed out. (read timeout=180)

In [None]:
rag_compiled("What year was the party of the winner of the 1971 San Francisco mayoral election founded?")
ollama_model.inspect_history(n=1)




Given the fields `context`, `question`, produce the fields `answer`.

---

Question: Who produced the album that included a re-recording of "Lithium"?
Answer: Butch Vig

Question: In what year was the star of To Hell and Back born?
Answer: 1925

Question: Which award did the first book of Gary Zukav receive?
Answer: U.S. National Book Award

Question: Which author is English: John Braine or Studs Terkel?
Answer: John Braine

Question: What documentary about the Gilgo Beach Killer debuted on A&E?
Answer: The Killing Season

Question: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?
Answer: Kevin Greutert

Question: The heir to the Du Pont family fortune sponsored what wrestling team?
Answer: Foxcatcher

---

Follow the following format.

Context: ${context}

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: ${answer}

---

Context:
[1] «Mayoralty of Dianne Feinstein | Dianne Feinstein bec

'\n\n\nGiven the fields `context`, `question`, produce the fields `answer`.\n\n---\n\nQuestion: Who produced the album that included a re-recording of "Lithium"?\nAnswer: Butch Vig\n\nQuestion: In what year was the star of To Hell and Back born?\nAnswer: 1925\n\nQuestion: Which award did the first book of Gary Zukav receive?\nAnswer: U.S. National Book Award\n\nQuestion: Which author is English: John Braine or Studs Terkel?\nAnswer: John Braine\n\nQuestion: What documentary about the Gilgo Beach Killer debuted on A&E?\nAnswer: The Killing Season\n\nQuestion: Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?\nAnswer: Kevin Greutert\n\nQuestion: The heir to the Du Pont family fortune sponsored what wrestling team?\nAnswer: Foxcatcher\n\n---\n\nFollow the following format.\n\nContext: ${context}\n\nQuestion: ${question}\n\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\n\nAnswer: ${answer}\n\n---\n\nContext:\n[1] «Mayoral