# Preparing the LLM

In [1]:
import dspy

llm = dspy.OpenAI(
    model_type="chat",
    # model="groq/llama3-70b-8192",
    # model="azure/gpt-35-turbo-1106",
    model="openai/gpt-3.5-turbo",
    # model="gpt-3.5-turbo",
    # model="anthropic/claude-3-haiku-20240307",
    api_base="http://localhost:8080/proxy/v1/",
    max_tokens=2048,
    temperature=0,
)

print("LLM test response:", llm("hello there"))

colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(lm=llm, rm=colbertv2_wiki17_abstracts)

LLM test response: ['Hello! How can I assist you today?']


# Preparing the Dataset

In [2]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=32, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

  table = cls._concat_blocks(blocks, axis=0)


(32, 50)

# Defining the model

In [3]:
# # Refetching langwatch modules for development

# import sys

# if "langwatch" in sys.modules:
#     del sys.modules["langwatch"]
# if "langwatch.dspy" in sys.modules:
#     del sys.modules["langwatch.dspy"]

# import langwatch
# from langwatch.dspy import SerializableAndPydanticEncoder

In [4]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")


class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)


dev_example = devset[18]
print(f"[Devset] Question: {dev_example.question}")
print(f"[Devset] Answer: {dev_example.answer}")
print(f"[Devset] Relevant Wikipedia Titles: {dev_example.gold_titles}")

generate_answer = RAG()

pred = generate_answer(question=dev_example.question)

# Print the input and the prediction.
print(f"[Prediction] Question: {dev_example.question}")
print(f"[Prediction] Predicted Answer: {pred.answer}")

[Devset] Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
[Devset] Answer: English
[Devset] Relevant Wikipedia Titles: {'Restaurant: Impossible', 'Robert Irvine'}
[Prediction] Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
[Prediction] Predicted Answer: British


# Login to LangWatch

In [5]:
import langwatch

langwatch.endpoint = "http://localhost:3000"
langwatch.login()

Please go to http://localhost:3000/authorize to get your API key
LangWatch API key set


# Start Training Session!

In [6]:
from dspy.teleprompt import (
    BootstrapFewShotWithRandomSearch,
    LabeledFewShot,
    BootstrapFewShot,
    COPRO,
    MIPRO,
)
import dspy.teleprompt
import dspy.evaluate


def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM


# Set up a basic optimizer, which will compile our RAG program.
optimizer = BootstrapFewShotWithRandomSearch(metric=validate_context_and_answer, max_rounds=1, max_bootstrapped_demos=4, max_labeled_demos=4)
# optimizer = BootstrapFewShot(metric=validate_context_and_answer, max_bootstrapped_demos=10, max_labeled_demos=10, max_rounds=3)
# optimizer = COPRO(metric=validate_context_and_answer, breadth=2, depth=3, track_stats=True)
# optimizer = MIPRO(metric=validate_context_and_answer, num_candidates=2, init_temperature=0.7)

langwatch.dspy.init(experiment="dspy-visualizer-example", optimizer=optimizer)

# Compile
compiled_rag = optimizer.compile(RAG(), trainset=trainset)
# compiled_rag = optimizer.compile(RAG(), trainset=trainset, eval_kwargs=dict(num_threads=64, display_progress=True, display_table=0))
# compiled_rag = optimizer.compile( RAG(),
#     trainset=trainset,
#     num_trials=10,
#     max_bootstrapped_demos=3,
#     max_labeled_demos=5,
#     eval_kwargs=dict(num_threads=16, display_progress=True, display_table=0),
# )


[LangWatch] Experiment initialized, run_id: smiling-idealistic-raptor
[LangWatch] Open http://localhost:3000/inbox-narrator/experiments/dspy-visualizer-example?runIds=smiling-idealistic-raptor to track your DSPy training session live



Average Metric: 11 / 32  (34.4): 100%|██████████| 32/32 [00:00<00:00, 428.34it/s]



logging





Average Metric: 12 / 32  (37.5): 100%|██████████| 32/32 [00:00<00:00, 479.87it/s]




logging




 34%|███▍      | 11/32 [00:00<00:00, 1016.62it/s]
Average Metric: 11 / 32  (34.4): 100%|██████████| 32/32 [00:00<00:00, 447.95it/s]




logging




 41%|████      | 13/32 [00:00<00:00, 665.50it/s]
Average Metric: 13 / 32  (40.6): 100%|██████████| 32/32 [00:00<00:00, 539.73it/s]




logging




 12%|█▎        | 4/32 [00:00<00:00, 559.97it/s]
Average Metric: 11 / 32  (34.4): 100%|██████████| 32/32 [00:00<00:00, 457.01it/s]




logging




  3%|▎         | 1/32 [00:01<00:59,  1.91s/it]


KeyboardInterrupt: 

In [47]:
compiled_rag.save("test.json")