In [1]:
%load_ext autoreload
%autoreload 2

import os
from dotenv import load_dotenv
load_dotenv()
assert 'OPENAI_API_KEY' in os.environ

In [2]:
import dspy
turbo = dspy.OpenAI(model='gpt-3.5-turbo', max_tokens=4000)
gpt4 = dspy.OpenAI(model='gpt-4', max_tokens=4000)
dspy.settings.configure(lm=turbo)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
dspy.TypedPredictor("question -> answer")(question="What is the capital of France?")

Prediction(
    answer='Paris'
)

In [4]:
from dspy.datasets import HotPotQA

# Load the dataset.
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

# Tell DSPy that the 'question' field is the input. Any other fields are labels and/or metadata.
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

len(trainset), len(devset)

(20, 50)

In [5]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

In [6]:
from dspy.evaluate import Evaluate
from dspy.evaluate.metrics import answer_exact_match
from dspy.functional import TypedPredictor
from dspy.teleprompt.signature_opt_typed import optimize_signature

evaluator = Evaluate(devset=devset, metric=answer_exact_match, num_threads=10, display_progress=True)

program = optimize_signature(
    student=TypedPredictor(BasicQA),
    evaluator=evaluator,
    initial_prompts=2,
    n_iterations=8,
    verbose=True,
    prompt_model=gpt4,
)

Found 1 typed predictors to optimize.
Generating 2 initial signatures for base...

Running eval iteration 0...


Average Metric: 16 / 50  (32.0): 100%|██████████| 50/50 [00:00<00:00, 2233.25it/s]
  df = df.applymap(truncate_cell)


Average Metric: 16 / 50  (32.0%)

Running eval iteration 1...


Average Metric: 14 / 50  (28.0): 100%|██████████| 50/50 [00:02<00:00, 24.02it/s]


Average Metric: 14 / 50  (28.0%)

Running eval iteration 2...


Average Metric: 15 / 50  (30.0): 100%|██████████| 50/50 [00:02<00:00, 20.98it/s]


Average Metric: 15 / 50  (30.0%)
Generating new signature for base...

Running eval iteration 3...


Average Metric: 7 / 50  (14.0): 100%|██████████| 50/50 [00:36<00:00,  1.36it/s]


Average Metric: 7 / 50  (14.0%)
Generating new signature for base...

Running eval iteration 4...


Average Metric: 2 / 5  (40.0):   8%|▊         | 4/50 [00:00<00:04,  9.85it/s]

In [None]:
gpt4.inspect_history(n=10)





Given the fields `basic_signature`, produce the fields `proposed_signatures`.

---

Follow the following format.

Basic Signature: ${basic_signature}
Reasoning: Let's think step by step in order to ${produce the proposed_signatures}. We ...
Proposed Signatures: A list of 2 very different variations of the basic signature. Respond with a single JSON object. JSON Schema: {"$defs": {"SignatureInfo_BasicQA_": {"properties": {"instructions": {"description": "The instructions for the task", "title": "Instructions", "type": "string"}, "question_prefix": {"description": "The prefix for question", "title": "Question Prefix", "type": "string"}, "question_desc": {"description": "The description for question", "title": "Question Desc", "type": "string"}, "answer_prefix": {"description": "The prefix for answer", "title": "Answer Prefix", "type": "string"}, "answer_desc": {"description": "The description for answer", "title": "Answer Desc", "type": "string"}}, "required": ["instructions", "quest

In [None]:
turbo.inspect_history(n=10)

In [None]:
from dspy.teleprompt.signature_opt_typed import GenerateSignature
GenerateSignature.instructions

"You are an instruction optimizer for large language models.\n\n    I will give some task instructions I've tried, along with their corresponding validation scores.\n    - The instructions are arranged in order based on their scores, where higher scores indicate better quality.\n    - Your task is to propose a new instruction that will lead a good language model to perform the task even better.\n    - Be creative, and think out of the box.\n    - Don't repeat instructions, descriptions and prefixes that have already been attempted.\n    "

In [None]:
dspy.TypedPredictor(GenerateSignature[BasicQA])()

Prediction(
    analysis='The previous instructions were clear and provided a specific format to follow for the response.',
    proposed_signature=BasicQA(question='What are the fields to produce?', answer='analysis, proposed_signature, score'),
    score=4.5
)

In [None]:
turbo.inspect_history(n=1)





Given the fields , produce the fields `analysis`, `proposed_signature`, `score`.

---

Follow the following format.

Analysis: Consider what made the previous instructions good or bad.
Proposed Signature: A signature that will likely lead to a high score.. Respond with a single JSON object. JSON Schema: {"description": "Answer questions with short factoid answers.", "properties": {"question": {"__dspy_field_type": "input", "desc": "${question}", "prefix": "Question:", "title": "Question", "type": "string"}, "answer": {"__dspy_field_type": "output", "desc": "often between 1 and 5 words", "prefix": "Answer:", "title": "Answer", "type": "string"}}, "required": ["question", "answer"], "title": "BasicQA", "type": "object"}
Score: The expected score for the new signature. Don't write anything after this number. (Respond with a single float value)

---

Analysis:[32m The previous instructions were clear and provided a specific format to follow for the response.

Proposed Signature:
```js

In [None]:
GenerateSignature[BasicQA]

GenerateSignature[BasicQA]( -> analysis, proposed_signature, score
    instructions='Given the fields , produce the fields `analysis`, `proposed_signature`, `score`.'
    analysis = Field(annotation=str required=True json_schema_extra={'desc': 'Consider what made the previous instructions good or bad.', '__dspy_field_type': 'output', 'prefix': 'Analysis:'})
    proposed_signature = Field(annotation=BasicQA required=True json_schema_extra={'desc': 'A signature that will likely lead to a high score.', '__dspy_field_type': 'output', 'prefix': 'Proposed Signature:'})
    score = Field(annotation=float required=True json_schema_extra={'desc': "The expected score for the new signature. Don't write anything after this number.", '__dspy_field_type': 'output', 'prefix': 'Score:'})
)

In [None]:
GenerateSignature

GenerateSignature( -> analysis, proposed_signature, score
    instructions="You are an instruction optimizer for large language models.\n\n    I will give some task instructions I've tried, along with their corresponding validation scores.\n    - The instructions are arranged in order based on their scores, where higher scores indicate better quality.\n    - Your task is to propose a new instruction that will lead a good language model to perform the task even better.\n    - Be creative, and think out of the box.\n    - Don't repeat instructions, descriptions and prefixes that have already been attempted.\n    "
    analysis = Field(annotation=str required=True json_schema_extra={'desc': 'Consider what made the previous instructions good or bad.', '__dspy_field_type': 'output', 'prefix': 'Analysis:'})
    proposed_signature = Field(annotation=~T required=True json_schema_extra={'desc': 'A signature that will likely lead to a high score.', '__dspy_field_type': 'output', 'prefix': 'Propo