# Preparing the LLM

In [13]:
import dspy

llm = dspy.OpenAI(
    model_type="chat",
    # model="groq/llama3-70b-8192",
    # model="azure/gpt-35-turbo-1106",
    model="openai/gpt-3.5-turbo",
    # model="gpt-3.5-turbo",
    # model="anthropic/claude-3-haiku-20240307",
    api_base="http://localhost:8080/proxy/v1/",
    max_tokens=2048,
    temperature=0,
)

print("LLM test response:", llm("hello there"))

colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(lm=llm, rm=colbertv2_wiki17_abstracts)

LLM test response: ['Hello! How can I assist you today?']


# Preparing the Dataset

In [2]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=32, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

  table = cls._concat_blocks(blocks, axis=0)


(32, 50)

# Defining the model

In [3]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")


class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)


dev_example = devset[18]
print(f"[Devset] Question: {dev_example.question}")
print(f"[Devset] Answer: {dev_example.answer}")
print(f"[Devset] Relevant Wikipedia Titles: {dev_example.gold_titles}")

generate_answer = RAG()

pred = generate_answer(question=dev_example.question)

# Print the input and the prediction.
print(f"[Prediction] Question: {dev_example.question}")
print(f"[Prediction] Predicted Answer: {pred.answer}")

[Devset] Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
[Devset] Answer: English
[Devset] Relevant Wikipedia Titles: {'Restaurant: Impossible', 'Robert Irvine'}
[Prediction] Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
[Prediction] Predicted Answer: British


# Login to LangWatch

In [4]:
import langwatch

langwatch.endpoint = "http://localhost:3000"
langwatch.login()

LangWatch API key is already set, if you want to login again, please call as langwatch.login(relogin=True)


In [48]:
# Refetching langwatch modules for development

import sys

if "langwatch" in sys.modules:
    del sys.modules["langwatch"]
if "langwatch.dspy" in sys.modules:
    del sys.modules["langwatch.dspy"]

import langwatch
from langwatch.dspy import SerializableAndPydanticEncoder

# Start Training Session!

In [49]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch, LabeledFewShot, BootstrapFewShot, COPRO
import dspy.teleprompt
import dspy.evaluate

# make logger appear on jupyter notebook
dspy.logger.info = print

def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

# Set up a basic optimizer, which will compile our RAG program.
# optimizer = BootstrapFewShotWithRandomSearch(metric=validate_context_and_answer, max_rounds=1)
# optimizer = BootstrapFewShot(metric=validate_context_and_answer, max_bootstrapped_demos=10, max_labeled_demos=10, max_rounds=3)
optimizer = COPRO(metric=validate_context_and_answer, breadth=2, depth=3, track_stats=True)
# optimizer.num_candidate_sets = 0

langwatch.dspy.init(experiment="dspy-visualizer-example", optimizer=optimizer)

# Compile
compiled_rag = optimizer.compile(RAG(), trainset=trainset, eval_kwargs=dict(num_threads=64, display_progress=True, display_table=0))

Experiment initialized, run_id: great-weasel-of-action
Open http://localhost:3000/inbox-narrator/experiments/dspy-visualizer-example?runIds=great-weasel-of-action to track your DSPy training session live

Iteration Depth: 1/3.
At Depth 1/3, Evaluating Prompt Candidate #1/2 for Predictor 1 of 1.


Average Metric: 9 / 32  (28.1): 100%|██████████| 32/32 [00:00<00:00, 1205.31it/s]

Average Metric: 9 / 32 (28.1%)





At Depth 1/3, Evaluating Prompt Candidate #2/2 for Predictor 1 of 1.


Average Metric: 11 / 32  (34.4): 100%|██████████| 32/32 [00:00<00:00, 1176.77it/s]

Average Metric: 11 / 32 (34.4%)





Iteration Depth: 2/3.
At Depth 2/3, Evaluating Prompt Candidate #1/2 for Predictor 1 of 1.


Average Metric: 6 / 32  (18.8): 100%|██████████| 32/32 [00:00<00:00, 1002.41it/s]

Average Metric: 6 / 32 (18.8%)





At Depth 2/3, Evaluating Prompt Candidate #2/2 for Predictor 1 of 1.


Average Metric: 4 / 32  (12.5): 100%|██████████| 32/32 [00:00<00:00, 1120.10it/s]

Average Metric: 4 / 32 (12.5%)





Iteration Depth: 3/3.
At Depth 3/3, Evaluating Prompt Candidate #1/2 for Predictor 1 of 1.


Average Metric: 6 / 32  (18.8): 100%|██████████| 32/32 [00:00<00:00, 1555.44it/s]

Average Metric: 6 / 32 (18.8%)





At Depth 3/3, Evaluating Prompt Candidate #2/2 for Predictor 1 of 1.


Average Metric: 4 / 32  (12.5): 100%|██████████| 32/32 [00:00<00:00, 782.30it/s]

Average Metric: 4 / 32 (12.5%)





In [47]:
compiled_rag.save("test.json")