In [1]:
import dspy
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch, BootstrapFinetune

# Local Model
- Set up a local model, and configure dspy to use it.

In [2]:
# Language model
ollama_model = dspy.OllamaLocal(
    model='deepseek-v2:latest',
    model_type='text',
    max_tokens=350,
    temperature=0.2,
    top_p=0.9,
    frequency_penalty=1.17,
    top_k=40,
    timeout_s=180,
    nohistory=True
)

# Retrieval model
colbertv2 = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

# configure dspy
dspy.settings.configure(rm=colbertv2, lm=ollama_model)

# Create a few QA pairs


In [3]:
train = [('Who was the director of the 2009 movie featuring Peter Outerbridge as William Easton?', 'Kevin Greutert'),
         ('The heir to the Du Pont family fortune sponsored what wrestling team?', 'Foxcatcher'),
         ('In what year was the star of To Hell and Back born?', '1925'),
         ('Which award did the first book of Gary Zukav receive?', 'U.S. National Book Award'),
         ('What documentary about the Gilgo Beach Killer debuted on A&E?', 'The Killing Season'),
         ('Which author is English: John Braine or Studs Terkel?', 'John Braine'),
         ('Who produced the album that included a re-recording of "Lithium"?', 'Butch Vig')]

train = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in train]

In [4]:
dev = [('Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?', 'E. L. Doctorow'),
       ('Right Back At It Again contains lyrics co-written by the singer born in what city?', 'Gainesville, Florida'),
       ('What year was the party of the winner of the 1971 San Francisco mayoral election founded?', '1828'),
       ('Anthony Dirrell is the brother of which super middleweight title holder?', 'Andre Dirrell'),
       ('The sports nutrition business established by Oliver Cookson is based in which county in the UK?', 'Cheshire'),
       ('Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.', 'February 13, 1980'),
       ('Kyle Moran was born in the town on what river?', 'Castletown River'),
       ("The actress who played the niece in the Priest film was born in what city, country?", 'Surrey, England'),
       ('Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.', 'Portrait of a Marriage'),
       ('What year was the father of the Princes in the Tower born?', '1442'),
       ('What river is near the Crichton Collegiate Church?', 'the River Tyne'),
       ('Who purchased the team Michael Schumacher raced for in the 1995 Monaco Grand Prix in 2000?', 'Renault'),
       ('André Zucca was a French photographer who worked with a German propaganda magazine published by what Nazi organization?', 'the Wehrmacht')]

dev = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in dev]

# Signatures and Modules

In [5]:
# Define a dspy.Predict module with the signature `question -> answer` (i.e., takes a question and outputs an answer).
predict = dspy.Predict("question -> answer")

# Use the module defined above
predict(question="What is the capital of Vatican City?")

Prediction(
    answer='1. Question: Who was Marie Curie, and what did she achieve in her scientific career?  2. Answer'
)

### Notes:
- The above is the simplest way to define a module, and use it.
- The example above is that of `zero-shot`, without compiling or optimizing it on any examples.

In the following, we make a more advanced program. The program will use a chain-of-thought module, that asks the LM to think step by step.

In [6]:
class CoT(dspy.Module):
    def __init__(self):
        super().__init__()

        # declare the cot sub-module, so we can compile later
        self.generate_answer = dspy.ChainOfThought("question -> answer")

    def forward(self, question): # use and return the response
        return self.generate_answer(question=question)
    

In [7]:
test_question = "What is the capital of Germany?"
# Get the prediction. This contains `pred.context` and `pred.answer`.
uncompiled_cot = CoT()

pred = uncompiled_cot(test_question)

# Print the contexts and the answer.
print(f"Question: {test_question}")
print(f"Predicted Answer: {pred.answer}")

Question: What is the capital of Germany?
Predicted Answer: Berlin


In [8]:
# Compile with train examples.
metric_EM = dspy.evaluate.answer_exact_match

teleprompter = BootstrapFewShot(metric=metric_EM, max_bootstrapped_demos=2)
cot_compiled = teleprompter.compile(CoT(), trainset=train)

 86%|████████▌ | 6/7 [00:13<00:02,  2.24s/it]


In [9]:

pred = cot_compiled(test_question)

# Print the contexts and the answer.
print(f"Question: {test_question}")
print(f"Predicted Answer: {pred.answer}")

Question: What is the capital of Germany?
Predicted Answer: Berlin


In [10]:
ollama_model.inspect_history(n=1)




Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: In what year was the star of To Hell and Back born?
Reasoning: Let's think step by step in order to 1. Identify which movie "To Hell and Back" is about, as it features Audie Murphy who starred in that film. The answer should be a birth year for this actor. 2. Research the details of Audie Murphy's life, focusing on his birth date since he was born around World War II when he became famous for his heroic actions during the war and later transitioned into acting after being awarded multiple medals including the Medal of Honor. 3. Determine that "To Hell and Back" is indeed about Audie Murphy's experiences in WWII as a soldier, making it essential to find out more details on him rather than just focusing on any other movie or event he might have been involved wi

'\n\n\nGiven the fields `question`, produce the fields `answer`.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let\'s think step by step in order to ${produce the answer}. We ...\nAnswer: ${answer}\n\n---\n\nQuestion: In what year was the star of To Hell and Back born?\nReasoning: Let\'s think step by step in order to 1. Identify which movie "To Hell and Back" is about, as it features Audie Murphy who starred in that film. The answer should be a birth year for this actor. 2. Research the details of Audie Murphy\'s life, focusing on his birth date since he was born around World War II when he became famous for his heroic actions during the war and later transitioned into acting after being awarded multiple medals including the Medal of Honor. 3. Determine that "To Hell and Back" is indeed about Audie Murphy\'s experiences in WWII as a soldier, making it essential to find out more details on him rather than just focusing on any other movie or event he might 

In [11]:
NUM_THREADS = 32
evaluate_hotpot = Evaluate(devset=dev, metric=metric_EM, num_threads=NUM_THREADS, display_progress=True, display_table=15)

In [12]:
evaluate_hotpot(cot_compiled)

Average Metric: 1 / 13  (7.7): 100%|██████████| 13/13 [00:35<00:00,  2.70s/it] 


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"1. Identify the professions and fields each author is involved with, focusing on their primary areas of expertise within writing and literature as well as...",E.L. Doctorow,False
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida","1. Identify which song ""Right Back At It Again"" is, as it features a well-known artist who has been involved with various music projects throughout...",Born in New York City,False
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,"1. Identify which political party represents a candidate who won that particular election, focusing on historical context and relevant information about both candidates from that...",The Progressive Party was founded in 1968.,False
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,"1. Identify who Anthony Dirrell is, focusing on his connection as a sibling within their family and sport context since he shares familial ties with...",Andre Dirrell,✔️ [True]
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"1. Identify who Oliver Cookson is and what he does, as this will provide context for his involvement with a specific sport or industry related...",Lancashire,False
5,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"February 13, 1980","1. Identify which movie ""First Wives Club"" is about, as it features Bette Midler, Diane Keaton, and Goldie Hawn among others who starred together in...",Jeremy Allen White,False
6,Kyle Moran was born in the town on what river?,Castletown River,1. Identify who Kyle Moran is and any notable details about his life or career that might be relevant for determining which river he was...,"The question does not provide enough specific context about Kyle Moran's birthplace for us to determine which river he might have been born near, so...",False
7,"The actress who played the niece in the Priest film was born in what city, country?","Surrey, England","1. Identify which movie ""Priest"" is about and its main characters for context while searching for birth information of an actress involved with this project.2....","Vancouver, Canada",False
8,Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.,Portrait of a Marriage,"1. Identify who ""Noel Harrison"" is, as he might be an actor or a character from some context that could help determine his relationship with...",Emily Mortimer,False
9,What year was the father of the Princes in the Tower born?,1442,"1. Identify who ""the fathers of the princes"" are referring to, as there were two young kings known as ""Princes in the Tower,"" Edward V...",Edward V - 1470 and Richard Duke of York - 1473,False


7.69

In [13]:
evaluate_hotpot(CoT())


Average Metric: 0 / 13  (0.0): 100%|██████████| 13/13 [01:01<00:00,  4.74s/it]


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Who has a broader scope of profession: E. L. Doctorow or Julia Peterkin?,E. L. Doctorow,"determine who has a broader scope of profession between E.L. Doctorow and Julia Peterkin, we need to consider their careers as writers and the breadth...",E.L. Doctorow has a broader scope of profession than Julia Peterkin due to his extensive body of work that spans various literary genres including novels...,False
1,Right Back At It Again contains lyrics co-written by the singer born in what city?,"Gainesville, Florida","1. Identify who wrote and sang ""Right Back at It Again"". This song is written and performed by Eminem, an American rapper known for his...","The singer of the song 'Right Back At It Again', Eminem who co-wrote the lyrics with Skylar Grey is from St. Joseph city which is...",False
2,What year was the party of the winner of the 1971 San Francisco mayoral election founded?,1828,"Answer: The Democratic Party, which won the 1971 San Francisco mayoral election, was founded on July 6-21, 1848.",Question: What year was the party of the winner of the 1975 New York City mayoral election founded? Reasoning: Let's think step by step in...,False
3,Anthony Dirrell is the brother of which super middleweight title holder?,Andre Dirrell,find out who Anthony Dirrell is related to as a sibling among super middleweight champions. We need to identify his siblings and their involvement in...,"1st step - Identify Anthony Dirrell's family members who are involved in professional sports, particularly boxing. In doing so, we can focus on those related...",False
4,The sports nutrition business established by Oliver Cookson is based in which county in the UK?,Cheshire,"1. Identify key information from the question - In this case, we need to find out where a specific sports nutrition company is located within...",​,False
5,Find the birth date of the actor who played roles in First Wives Club and Searching for the Elephant.,"February 13, 1980","1. Identify which actors appeared in both ""First Wives Club"" (1996) and ""Searching for the Elephant"" (2001). This will help narrow down potential candidates based...","96) and ""Searching for the Elephant"" (2001) are not publicly available due to privacy reasons. However, Goldie Hawn starred in both films; she was born...",False
6,Kyle Moran was born in the town on what river?,Castletown River,"1. Identify who Kyle Moran is and where he was born, which would provide context for determining his birthplace's river. This information can be sourced...",era and even up until the present day. This is because rivers often serve as vital sources for both drinking water needs (especially important during...,False
7,"The actress who played the niece in the Priest film was born in what city, country?","Surrey, England","1. Identify which movie and character we are talking about - ""The Priest"" is a British crime thriller film released in 2014. The actress who...","To find out where and which country she was born in, we would need to look up information about Lucy Boynton’s biography or personal details...",False
8,Name the movie in which the daughter of Noel Harrison plays Violet Trefusis.,Portrait of a Marriage,"1. Identify key elements from the question - we need to find a specific movie where an actress, who is also known as ""daughter of...",1. Identify key elements from the question - we need a specific film where an actress who is also known as 'daughter of Noel Harrison'...,False
9,What year was the father of the Princes in the Tower born?,1442,"1. Identify who is referred to as ""the Father of Princes"". In this context, it likely refers to King Edward III since he was both...",The father of Princes in the Tower was born around 1312.,False


0.0

# DOING A RAG

In [14]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_query = dspy.ChainOfThought("question -> search_query")
        self.generate_answer = dspy.ChainOfThought("context, question -> answer")

    def forward(self, question):
        search_query = self.generate_query(question=question).search_query
        passages = self.retrieve(search_query).passages

        return self.generate_answer(context=passages, question=question)



In [15]:
evaluate_hotpot(RAG(), display_table=5)

  0%|          | 0/13 [00:00<?, ?it/s]