In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os

# try: # When on google Colab, let's clone the notebook so we download the cache.
#     import google.colab  # noqa: F401
#     repo_path = 'dspy'
#     !git -C $repo_path pull origin || git clone https://github.com/stanfordnlp/dspy $repo_path
# except:
#     repo_path = '.'
repo_path = 'dspy'
if repo_path not in sys.path:
    sys.path.append(repo_path)

# Set up the cache for this notebook
os.environ["DSP_NOTEBOOK_CACHEDIR"] = os.path.join(repo_path, 'cache')

import pkg_resources # Install the package if it's not installed
if "dspy-ai" not in {pkg.key for pkg in pkg_resources.working_set}:
    !pip install -U pip
    !pip install dspy-ai==2.4.17
    !pip install openai~=0.28.1
    # !pip install -e $repo_path

import dspy

In [1]:
import openai
import dspy

api_key = "dummy"
# mini = dspy.OpenAI(model='gpt-4o-mini') 
# groq.api_key = "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"


mini = dspy.LM(
    model="openai/qwen2.5-3b-instruct-mlx",  # Add 'openai/' prefix
    api_base="http://localhost:1234/v1",
    api_key="api_key",
    max_tokens=5000,
    use_completion_for_chat=True
)
dspy.settings.configure(lm=mini)

p = dspy.Predict("question -> answer")
p(question="What is the capital of France?")

colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

dspy.settings.configure(lm=mini, rm=colbertv2_wiki17_abstracts)


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m




[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m


[1;31mProvider List: https://docs.litellm.ai/docs/providers[0m



In [3]:
# mini = dspy.LM(
#     model="openai/qwen2.5-3b-instruct-mlx",  # Add 'openai/' prefix
#     api_base="http://localhost:1234/v1",
#     api_key="sk-fake-key",
#     max_tokens=5000,
#     use_completion_for_chat=True
# )
# dspy.settings.configure(lm=mini)

# p = dspy.Predict("question -> answer")
# p(question="What is the capital of France?")

In [4]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

In [5]:
train_example = trainset[0]
print(f"Question : {train_example.question}")
print(f"Answer: {train_example.answer}")

In [6]:
dev_example = devset[18]
print(f"Question: {dev_example.question}")
print(f"Answer: {dev_example.answer}")
print(f"Relevant Wikipedia Titles: {dev_example.gold_titles}")

In [7]:
print(f"For this dataset, training examples have input keys {train_example.inputs().keys()} and label keys {train_example.labels().keys()}")
print(f"For this dataset, dev examples have input keys {dev_example.inputs().keys()} and label keys {dev_example.labels().keys()}")

In [8]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""
    
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [9]:
mini.inspect_history(n=1)

In [10]:
# Define the predictor.
generate_answer = dspy.Predict(BasicQA)

# Call the predictor on a particular input.
pred = generate_answer(question=dev_example.question)

# Print the input and the prediction.
print(f"Question: {dev_example.question}")
print(f"Predicted Answer: {pred.answer}")

print(f"pred: {pred}")

In [11]:
# Option 1: Modify the LM configuration to use text completion mode
mini = dspy.LM(
    model="openai/qwen2.5-3b-instruct-mlx",
    api_base="http://localhost:1234/v1",
    api_key="sk-fake-key",
    max_tokens=5000,
    use_completion_for_chat=True,
    chat_mode=False  # Add this to force text completion mode
)

# Option 2: Create a custom adapter that handles raw text responses
class SimpleTextAdapter(dspy.Adapter):
    def format(self, signature, demos, inputs):
        # Format the prompt as needed
        question = inputs['question']
        return f"Question: {question}\nAnswer:"
    
    def parse(self, signature, completion):
        # Parse raw text into the expected format
        return {"answer": completion.strip()}

dspy.settings.configure(
    lm=mini,
    adapter=SimpleTextAdapter()  # Use the custom adapter
)

# Then use your existing code
generate_answer = dspy.Predict(BasicQA)
pred = generate_answer(question=dev_example.question)

In [12]:
from dspy.evaluate.evaluate import Evaluate

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(generate_answer, metric=metric)

In [13]:
# # Define the predictor. Notice we're just changing the class. The signature BasicQA is unchanged.
# generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA)

# # Call the predictor on the same input.
# pred = generate_answer_with_chain_of_thought(question=dev_example.question)

# # Print the input, the chain of thought, and the prediction.
# print(f"Question: {dev_example.question}")
# print(f"Thought: {pred.rationale.split('.', 1)[1].strip()}")
# print(f"Predicted Answer: {pred.answer}")

In [14]:
class SimpleTextAdapter(dspy.Adapter):
    def format(self, signature, demos, inputs):
        if 'reasoning' in signature.output_fields:
            # Format for chain-of-thought
            return f"Question: {inputs['question']}\nLet's think about this step by step:\n1."
        else:
            # Format for direct answer
            return f"Question: {inputs['question']}\nAnswer:"
    
    def parse(self, signature, completion):
        if 'reasoning' in signature.output_fields:
            # Split into reasoning and answer
            parts = completion.split('\nTherefore, ')
            if len(parts) == 2:
                reasoning, answer = parts
            else:
                # Handle case where model doesn't use "Therefore"
                lines = completion.strip().split('\n')
                reasoning = '\n'.join(lines[:-1])
                answer = lines[-1].replace('Answer:', '').strip()
            
            return {
                "reasoning": reasoning.strip(),
                "answer": answer.strip()
            }
        else:
            # Just return answer for basic queries
            return {"answer": completion.strip()}

dspy.settings.configure(
    lm=mini,
    adapter=SimpleTextAdapter()
)

# Now try the chain of thought version again
generate_answer_with_chain_of_thought = dspy.ChainOfThought(BasicQA)
pred = generate_answer_with_chain_of_thought(question=dev_example.question)

print(f"Question: {dev_example.question}")
if hasattr(pred, 'reasoning'):
    print(f"Thought: {pred.reasoning}")
print(f"Predicted Answer: {pred.answer}")

In [15]:
print(pred.reasoning)

In [16]:
mini.inspect_history(n=1)

In [17]:
from dspy.evaluate.evaluate import Evaluate

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(generate_answer_with_chain_of_thought, metric=metric)

In [18]:
retrieve = dspy.Retrieve(k=3)
topK_passages = retrieve(dev_example.question).passages

print(f"Top {retrieve.k} passages for question: {dev_example.question} \n", '-' * 30, '\n')

for idx, passage in enumerate(topK_passages):
    print(f'{idx+1}]', passage, '\n')

In [19]:
retrieve("When was the first FIFA World Cup held?").passages[0]

In [20]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [21]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [22]:
rag_model = RAG()
pred = rag_model('When was the first rugby world cup?')

print(pred)

In [23]:
mini.inspect_history(n=1)

In [24]:
from dspy.evaluate.evaluate import Evaluate

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(rag_model, metric=metric)

In [25]:
from dspy.teleprompt import LabeledFewShot

labeled_fewshot_optimizer = LabeledFewShot(k=8)
rag_model_compiled = labeled_fewshot_optimizer.compile(student = rag_model, trainset = trainset)


In [26]:
from dspy.evaluate.evaluate import Evaluate

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(rag_model_compiled, metric=metric)

In [27]:
from dspy.teleprompt import BootstrapFewShot

# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

# Set up a basic teleprompter, which will compile our RAG program.
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)

# Compile!
compiled_rag = teleprompter.compile(RAG(), trainset=trainset)

In [28]:
import dspy
import time
import random
from functools import wraps
from dspy.teleprompt import BootstrapFewShot
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()

        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
    
    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)
    
def rate_limit_decorator(max_per_minute=28):  # Set slightly below limit of 30
    min_interval = 60.0 / max_per_minute
    last_called = [0.0]  # Using list to modify in closure

    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            now = time.time()
            elapsed = now - last_called[0]
            if elapsed < min_interval:
                sleep_time = min_interval - elapsed
                time.sleep(sleep_time)
            
            try:
                result = func(*args, **kwargs)
                last_called[0] = time.time()
                return result
            except Exception as e:
                if "429" in str(e) or "Connection error" in str(e):
                    sleep_time = random.uniform(1, 3)  # Random backoff
                    time.sleep(sleep_time)
                    return func(*args, **kwargs)  # Retry once
                raise
        return wrapper
    return decorator

# Apply rate limiting to LM
class RateLimitedLM(dspy.LM):
    @rate_limit_decorator()
    def __call__(self, *args, **kwargs):
        return super().__call__(*args, **kwargs)

# Configure DSPy with rate-limited LM
mini = RateLimitedLM(
    model="openai/qwen2.5-3b-instruct-mlx",
    api_base="http://localhost:1234/v1",
    api_key="sk-fake-key",
    max_tokens=5000,
    use_completion_for_chat=True,
    num_retries=3  # Add retries for robustness
)

# Configure DSPy settings with lower batch sizes
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')
dspy.settings.configure(
    lm=mini,
    rm=colbertv2_wiki17_abstracts,
    max_batch_size=5,  # Reduce batch size to avoid hitting rate limits
)


# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

# Then use your teleprompter with smaller batch sizes
teleprompter = BootstrapFewShot(
    metric=validate_context_and_answer,
    max_bootstrapped_demos=5,  # Reduce from default
    max_labeled_demos=5,  # Reduce from default
    # max_batch_size=3  # Control concurrent requests
)

# Compile with rate limiting in place
compiled_rag = teleprompter.compile(RAG(), trainset=trainset[:10])  # Start with smaller trainset

In [None]:
break