In [2]:
%load_ext autoreload
%autoreload 2

import os
from dotenv import load_dotenv
load_dotenv()
assert 'OPENAI_API_KEY' in os.environ

In [3]:
import dspy
turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=4000)
gpt4 = dspy.OpenAI(model='gpt-4', max_tokens=4000)
dspy.settings.configure(lm=turbo)

In [4]:
dspy.TypedPredictor("question -> answer")(question="What is the capital of France?")

Prediction(
    answer='Paris'
)

In [5]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

(20, 50)

In [6]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [7]:
from dspy.evaluate import Evaluate
from dspy.evaluate.metrics import answer_exact_match
from dspy.functional import TypedPredictor
from dspy.teleprompt.signature_opt_typed import optimize_signature

evaluator = Evaluate(devset=devset, metric=answer_exact_match, num_threads=10, display_progress=True)

program = optimize_signature(
    student=TypedPredictor(BasicQA),
    evaluator=evaluator,
    initial_prompts=4,
    n_iterations=8,
    verbose=True,
    prompt_model=gpt4,
)

Found 1 typed predictors to optimize.
Generating 4 initial signatures for base...

Running eval iteration 0...


Average Metric: 16 / 50  (32.0): 100%|██████████| 50/50 [00:00<00:00, 4290.32it/s]
  df = df.applymap(truncate_cell)


Average Metric: 16 / 50  (32.0%)

Running eval iteration 1...


Average Metric: 16 / 50  (32.0): 100%|██████████| 50/50 [00:02<00:00, 22.35it/s]


Average Metric: 16 / 50  (32.0%)

Running eval iteration 2...


Average Metric: 19 / 50  (38.0): 100%|██████████| 50/50 [00:04<00:00, 10.28it/s]


Average Metric: 19 / 50  (38.0%)

Running eval iteration 3...


Average Metric: 11 / 50  (22.0): 100%|██████████| 50/50 [00:05<00:00,  8.63it/s]


Average Metric: 11 / 50  (22.0%)

Running eval iteration 4...


Average Metric: 15 / 50  (30.0): 100%|██████████| 50/50 [00:02<00:00, 24.53it/s]


Average Metric: 15 / 50  (30.0%)
Generating new signature for base...

Running eval iteration 5...


Average Metric: 18 / 50  (36.0): 100%|██████████| 50/50 [00:02<00:00, 21.89it/s]


Average Metric: 18 / 50  (36.0%)
Generating new signature for base...

Running eval iteration 6...


Average Metric: 6 / 50  (12.0): 100%|██████████| 50/50 [00:03<00:00, 13.65it/s]


Average Metric: 6 / 50  (12.0%)
Generating new signature for base...

Running eval iteration 7...


Average Metric: 17 / 50  (34.0): 100%|██████████| 50/50 [00:02<00:00, 19.56it/s]

Average Metric: 17 / 50  (34.0%)





In [16]:
print(program.signature)

StringSignature(question -> answer
    instructions='You are highly intelligent. Please provide short, factual answers to the following questions.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Inquiry:', 'desc': '${question}'})
    answer = Field(annotation=str required=True json_schema_extra={'desc': 'usually between 1 and 5 words', '__dspy_field_type': 'output', 'prefix': 'Reply:'})
)
