In [1]:
import os
import tqdm as notebook_tqdm
from dotenv import load_dotenv

import dsp
import dspy
from dotdict import dotdict
from dspy.datasets.gsm8k import GSM8K, gsm8k_metric
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFinetune

load_dotenv()
unify_api_key = os.getenv("UNIFY_KEY")



In [2]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

(20, 50)

In [3]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        passages = [dotdict({"long_text": passage}) for passage in context]
        prediction = self.generate_answer(context=passages, question=question)
        return dspy.Prediction(context=passages, answer=prediction.answer)

In [4]:
gemma = dsp.Unify(
    endpoint="gemma-7b-it@anyscale",
    max_tokens=150,
    api_key=unify_api_key,
)

claude = dsp.Unify(
    endpoint="claude-3-haiku@anthropic",
    max_tokens=150,
    api_key=unify_api_key,
)

unify_router = dsp.Unify(
    max_tokens=150,
    api_key=unify_api_key,
)


dspy.settings.configure(lm=gemma, rm=claude)

In [5]:
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

num_threads = 24

teleprompter = BootstrapFewShot(metric=validate_context_and_answer)

compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

  0%|          | 0/20 [00:00<?, ?it/s]ERROR:dspy.teleprompt.bootstrap:2024-07-16T17:06:37.471652Z [error    ] Failed to run or to evaluate example Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys={'question'}) with <function validate_context_and_answer at 0x00000162EF212F20> due to 'str' object has no attribute 'long_text'. [dspy.teleprompt.bootstrap] filename=bootstrap.py lineno=211
  5%|▌         | 1/20 [00:02<00:52,  2.79s/it]ERROR:dspy.teleprompt.bootstrap:2024-07-16T17:06:39.450174Z [error    ] Failed to run or to evaluate example Example({'question': 'which  American actor was Candace Kita  guest starred with ', 'answer': 'Bill Murray'}) (input_keys={'question'}) with <function validate_context_and_answer at 0x00000162EF212F20> due to 'str' object has no attribute 'long_text'. [dspy.teleprompt.bootstrap] filename=bootstrap.py lineno=211
 10%|█         | 2/20 [00:04<00:41,  2.31s/it]ERROR:dspy.te

AttributeError: 'str' object has no attribute 'long_text'

In [None]:
evaluate_hotpot = Evaluate(devset=devset[:1000], metric=validate_context_and_answer, num_threads=num_threads, display_progress=True, display_table=0)
evaluate_hotpot(gemma)