In [1]:
import dspy
from dotenv import load_dotenv

In [12]:
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

In [13]:
# llm = dspy.OpenAI(model='gpt-3.5-turbo-0125', api_key=OPENAI_API_KEY, max_tokens=300)
llm = dspy.OllamaLocal("zephyr-7b-beta", model_type="text", stop=['\n\n'], max_tokens=300)
dspy.settings.configure(lm=llm, rm=colbertv2_wiki17_abstracts, temperature=0.7)

In [14]:
colbertv2_wiki17_abstracts

<dsp.modules.colbertv2.ColBERTv2 at 0x7fbd77db42e0>

In [5]:
from dspy.datasets import HotPotQA


In [6]:
# Load dataset
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell Dspy that the 'question' field is the input. Any other fields are labels or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

Downloading builder script:   0%|          | 0.00/6.42k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/9.19k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/566M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/47.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/46.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/90447 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/7405 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7405 [00:00<?, ? examples/s]

  table = cls._concat_blocks(blocks, axis=0)


In [7]:
len(trainset), len(devset)

(20, 50)

In [10]:
trainset[0]

Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys={'question'})

In [8]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers"""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")
    

In [33]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [34]:
rag = RAG()

In [35]:
rag

generate_answer = ChainOfThought(GenerateAnswer(context, question -> answer
    instructions='Answer questions with short factoid answers'
    context = Field(annotation=str required=True json_schema_extra={'desc': 'may contain relevant facts', '__dspy_field_type': 'input', 'prefix': 'Context:'})
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    answer = Field(annotation=str required=True json_schema_extra={'desc': 'often between 1 and 5 words', '__dspy_field_type': 'output', 'prefix': 'Answer:'})
))

In [36]:
rag.generate_answer(context="apples are brown", question="what color are apples")

Prediction(
    rationale='determine the color of apples. We know that apples grow on trees, and we also know that when they ripen, their skin turns brown. Therefore, the answer is ...',
    answer='brown'
)

In [39]:
ans = rag.forward(question="What color is an apple?")
ans

Prediction(
    context=['Apple Green (disambiguation) | Apple green is a colour. Apple Green may also refer to:', 'Candy apple red (color) | Candy apple red (occasionally known as apple-candy red) is the name code used by manufacturing companies to define a shade of red similar to the red sugar coating on candied apples. The typical method for producing a candy apple finish is to apply a metallic base-coat, followed by a translucent color coat. A final clear coat adds additional gloss.', 'Bicolored apple | Bicolored apple is a popular definition in the horticultural branch referring to apples characterized by a non-uniform skin color. Main cultivations are produced in Europe. The varieties considered to be bicolored include Gala, Champion, Idared, Ligol, Jonagored, Najdared and Gloster.'],
    answer='Color varies, see context.'
)

In [43]:
len(ans.context), ans.answer

(3, 'Color varies, see context.')

In [None]:
dspy.ev

In [44]:
from dspy.teleprompt import BootstrapFewShot

# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM



In [45]:
# Set up basic teleprompter to compile
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)


In [48]:
# compile
compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

100%|██████████| 20/20 [01:52<00:00,  5.62s/it]

Bootstrapped 2 full traces after 20 examples in round 0.





In [49]:
llm.inspect_history(n=3)





Answer questions with short factoid answers

---

Question: Samantha Cristoforetti and Mark Shuttleworth are both best known for being first in their field to go where?
Answer: space

Question: Having the combination of excellent foot speed and bat speed helped Eric Davis, create what kind of outfield for the Los Angeles Dodgers?
Answer: "Outfield of Dreams"

Question: which American actor was Candace Kita guest starred with
Answer: Bill Murray

Question: Tombstone stared an actor born May 17, 1955 known as who?
Answer: Bill Paxton

Question: The Organisation that allows a community to influence their operation or use and to enjoy the benefits arisingwas founded in what year?
Answer: 2010

Question: Which is taller, the Empire State Building or the Bank of America Tower?
Answer: The Empire State Building

Question: This American guitarist best known for her work with the Iron Maidens is an ancestor of a composer who was known as what?
Answer: The Waltz King

Question: Which magazin