In [1]:
# https://dspy-docs.vercel.app/docs/quick-start/minimal-example

In [2]:
import os
os.environ['OPENAI_API_KEY']='sk-111111111111111111111111111111111111111111111111'
os.environ['OPENAI_API_BASE']='http://127.0.0.1:5000/v1/'

In [3]:
import requests
model_info_url = 'http://127.0.0.1:5000/v1/internal/model/info'
resp = requests.get(model_info_url)
model = resp.json()['model_name']
print(model)

gemma-7b-it-6.0bpw-h6-exl2


In [4]:
import warnings

# Disable future warnings for all modules
warnings.simplefilter('ignore', FutureWarning)


In [5]:
os.environ["DSP_NOTEBOOK_CACHEDIR"] = "./local_cache2"

In [6]:
import dspy

turbo = dspy.OpenAI(model='gpt-3.5-turbo-instruct', max_tokens=250, temperature=0.01, api_base=os.environ['OPENAI_API_BASE'], model_type="chat")
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

dspy.settings.configure(lm=turbo, rm=colbertv2_wiki17_abstracts)

In [7]:
import pickle
from dspy.datasets import HotPotQA

# Load the dataset.
# dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

with open('dataset.pkl','rb') as fh:
    dataset = pickle.load(fh)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

(20, 50)

In [8]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [9]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [10]:
from dspy.teleprompt import BootstrapFewShot

./local_cache2/compiler


In [11]:
# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

In [12]:
# Set up a basic teleprompter, which will compile our RAG program.
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)

In [13]:
# Compile!
compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [01:46<00:00,  5.34s/it]

Bootstrapped 0 full traces after 20 examples in round 0.





In [14]:
# Ask any question you like to this simple RAG program.
my_question = "What castle did David Gregory inherit?"

In [15]:
# Get the prediction. This contains `pred.context` and `pred.answer`.
pred = compiled_rag(my_question)

In [16]:
# Print the contexts and the answer.
print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")

Question: What castle did David Gregory inherit?
Predicted Answer: ## Factoid Answers

**1.** Tae Kwon Do Times published articles by Scott Shaw.

**2.** The Waltz King was an ancestor of a composer known as The Waltz King.

**3.** Atlantic Ocean is the coast where Diogal Sakho was born.

**4.** The Victorians - Their Story In Pictures is a documentary series written by an author born in 1950.

**5.** The Organisation that allows a community to influence their operation or use and to enjoy the benefits arising was founded in 2010.

**6.** Buena Vista Distribution distributed the 19
Retrieved Contexts (truncated): ['David Gregory (physician) | David Gregory (20 December 1625 – 1720) was a Scottish physician and inventor. His surname is sometimes spelt as Gregorie, the original Scottish spelling. He inherited Kinn...', 'Gregory Tarchaneiotes | Gregory Tarchaneiotes (Greek: Γρηγόριος Ταρχανειώτης , Italian: "Gregorio Tracanioto" or "Tracamoto" ) was a "protospatharius" and the long-reigni

In [17]:
turbo.inspect_history(n=1)





Answer questions with short factoid answers.

---

Question: Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?
Answer: Tae Kwon Do Times

Question: This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?
Answer: The Waltz King

Question: On the coast of what ocean is the birthplace of Diogal Sakho?
Answer: Atlantic

Question: The Victorians - Their Story In Pictures is a documentary series written by an author born in what year?
Answer: 1950

Question: The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?
Answer: 2010

Question: Which company distributed this 1977 American animated film produced by Walt Disney Productions for which Sherman Brothers wrote songs?
Answer: Buena Vista Distribution

Question: Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field

In [18]:
for name, parameter in compiled_rag.named_predictors():
    print(name)
    print(parameter.demos[0])
    print()

generate_answer
Example({'question': 'Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art?', 'answer': 'Tae Kwon Do Times'}) (input_keys={'question'})



In [19]:
from dspy.evaluate.evaluate import Evaluate

In [20]:
# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=False, display_table=5)

In [21]:
# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(compiled_rag, metric=metric)

Average Metric: 0 / 50  (0.0%)


Unnamed: 0,question,example_answer,gold_titles,context,pred_answer,answer_exact_match
0,Are both Cangzhou and Qionghai in the Hebei province of China?,no,"{'Qionghai', 'Cangzhou'}","['Cangzhou | Cangzhou () is a prefecture-level city in eastern Hebei province, People\'s Republic of China. At the 2010 census, Cangzhou\'s built-up (""or metro"") area...","## Factoid Answers **Context:** The text provided describes various individuals and events related to music, film, and space exploration. **Question:** Which American actress who made...",False
1,Who conducts the draft in which Marc-Andre Fleury was drafted to the Vegas Golden Knights for the 2017-18 season?,National Hockey League,"{'2017 NHL Expansion Draft', '2017–18 Pittsburgh Penguins season'}",['2017–18 Pittsburgh Penguins season | The 2017–18 Pittsburgh Penguins season will be the 51st season for the National Hockey League ice hockey team that was...,"## Factoid Answers **Context:** **Question:** Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art? **Answer:** Tae Kwon Do Times...",False
2,"The Wings entered a new era, following the retirement of which Canadian retired professional ice hockey player and current general manager of the Tampa Bay...",Steve Yzerman,"{'2006–07 Detroit Red Wings season', 'Steve Yzerman'}","['Steve Yzerman | Stephen Gregory ""Steve"" Yzerman ( ; born May 9, 1965) is a Canadian retired professional ice hockey player and current general manager...","## Factoid Answers **Context:** **Question:** Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art? **Answer:** Tae Kwon Do Times...",False
3,What river is near the Crichton Collegiate Church?,the River Tyne,"{'Crichton Collegiate Church', 'Crichton Castle'}","[""Crichton Collegiate Church | Crichton Collegiate Church is situated about 0.6 mi south west of the hamlet of Crichton in Midlothian, Scotland. Crichton itself is...",## Factoid Answers **1.** Tae Kwon Do Times published articles by Scott Shaw. **2.** The Waltz King was an ancestor of a composer named Johann...,False
4,In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king?,King Alfred the Great,"{'Æthelweard (son of Alfred)', 'Ealhswith'}","[""Æthelweard of East Anglia | Æthelweard (died 854) was a 9th-century king of East Anglia, the long-lived Anglo-Saxon kingdom which today includes the English counties...",## Short Factoid Answers **Context:** **Question:** In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king? **Answer:** Alfred the Great...,False


0.0

In [22]:
def gold_passages_retrieved(example, pred, trace=None):
    gold_titles = set(map(dspy.evaluate.normalize_text, example['gold_titles']))
    found_titles = set(map(dspy.evaluate.normalize_text, [c.split(' | ')[0] for c in pred.context]))

    return gold_titles.issubset(found_titles)

In [23]:
compiled_rag_retrieval_score = evaluate_on_hotpotqa(compiled_rag, metric=gold_passages_retrieved)

Average Metric: 13 / 50  (26.0%)


Unnamed: 0,question,example_answer,gold_titles,context,pred_answer,gold_passages_retrieved
0,Are both Cangzhou and Qionghai in the Hebei province of China?,no,"{'Qionghai', 'Cangzhou'}","['Cangzhou | Cangzhou () is a prefecture-level city in eastern Hebei province, People\'s Republic of China. At the 2010 census, Cangzhou\'s built-up (""or metro"") area...",## Short Factoid Answers **1.** Tae Kwon Do Times published articles by Scott Shaw. **Reasoning:** The text states that Tae Kwon Do Times published articles...,False
1,Who conducts the draft in which Marc-Andre Fleury was drafted to the Vegas Golden Knights for the 2017-18 season?,National Hockey League,"{'2017 NHL Expansion Draft', '2017–18 Pittsburgh Penguins season'}",['2017–18 Pittsburgh Penguins season | The 2017–18 Pittsburgh Penguins season will be the 51st season for the National Hockey League ice hockey team that was...,## Factoid Answers **1.** Tae Kwon Do Times published articles by Scott Shaw. **2.** The Waltz King was an ancestor of a composer named Johann...,✔️ [True]
2,"The Wings entered a new era, following the retirement of which Canadian retired professional ice hockey player and current general manager of the Tampa Bay...",Steve Yzerman,"{'2006–07 Detroit Red Wings season', 'Steve Yzerman'}","['Steve Yzerman | Stephen Gregory ""Steve"" Yzerman ( ; born May 9, 1965) is a Canadian retired professional ice hockey player and current general manager...","## Factoid Answers **Context:** **Question:** Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art? **Answer:** Tae Kwon Do Times...",✔️ [True]
3,What river is near the Crichton Collegiate Church?,the River Tyne,"{'Crichton Collegiate Church', 'Crichton Castle'}","[""Crichton Collegiate Church | Crichton Collegiate Church is situated about 0.6 mi south west of the hamlet of Crichton in Midlothian, Scotland. Crichton itself is...","## Factoid Answers **Context:** **Question:** Which magazine has published articles by Scott Shaw, Tae Kwon Do Times or Southwest Art? **Reasoning:** Tae Kwon Do Times...",✔️ [True]
4,In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king?,King Alfred the Great,"{'Æthelweard (son of Alfred)', 'Ealhswith'}","[""Æthelweard of East Anglia | Æthelweard (died 854) was a 9th-century king of East Anglia, the long-lived Anglo-Saxon kingdom which today includes the English counties...",## Short Factoid Answers **Context:** **Question:** In the 10th Century A.D. Ealhswith had a son called Æthelweard by which English king? **Answer:** Alfred the Great...,False


In [24]:
compiled_rag_retrieval_score

26.0

In [25]:
compiled_rag.save(f"rag-{model}.json")