In [100]:
import os
import dspy
import phoenix as px
from utils import launch_app_and_start_project
import json

In [2]:
px.launch_app()

üåç To view the Phoenix app in your browser, visit http://localhost:6006/
üìñ For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


<phoenix.session.session.ThreadSession at 0x10e3f8fd0>

In [26]:
launch_app_and_start_project(project_name='openai_dspy_test', llm_provider='dspy')

üî≠ OpenTelemetry Tracing Details üî≠
|  Phoenix Project: openai_dspy_test
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



<phoenix.otel.otel.TracerProvider at 0x149f21c10>

In [27]:
llm = dspy.OpenAI(model='gpt-4o-mini', temperature=0.0001,
                api_key=os.getenv("OPENAI_API_KEY"))

In [29]:
# llm("Who was the first president of United States?")

['The first president of the United States was George Washington. He served from April 30, 1789, to March 4, 1797.']

In [5]:
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url='http://20.102.90.50:2017/wiki17_abstracts')

In [6]:
dspy.settings.configure(lm=llm, rm=colbertv2_wiki17_abstracts)

In [7]:
# Examples

In [8]:
from dspy.datasets import HotPotQA

dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=2023, dev_size=50, test_size=0)

  table = cls._concat_blocks(blocks, axis=0)


In [9]:
print(dataset.train[0])

Example({'question': 'At My Window was released by which American singer-songwriter?', 'answer': 'John Townes Van Zandt'}) (input_keys=None)


In [12]:
# Note that each Example has a question and an answer
# We need to tell dspy which part is the input. In this case it is question
# The remaining fields are labels and/or metadata

In [10]:
trainset = [x.with_inputs('question') for x in dataset.train]
devset = [x.with_inputs('question') for x in dataset.dev]

In [11]:
train_example = trainset[0]
print(f"Question: {train_example.question}")
print(f"Answer: {train_example.answer}")

Question: At My Window was released by which American singer-songwriter?
Answer: John Townes Van Zandt


In [12]:
dev_example = devset[18]
print(f"Question: {dev_example.question}")
print(f"Answer: {dev_example.answer}")
print(f"Relevant Wikipedia Titles: {dev_example.gold_titles}")

Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Answer: English
Relevant Wikipedia Titles: {'Restaurant: Impossible', 'Robert Irvine'}


In [13]:
# The dev set has a third field called titles of the relevant Wiki articles.

# Building Blocks
## How to define a Signature
- A minimal description of the sub-task the LM is supposed to solve.
- A description of one or more input fields (e.g., input question) that we will give to the LM.
- A description of one or more output fields (e.g., the question's answer) that we will expect from the LM.

In [14]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

## How to define a Predictor
- Its a module.
- Knows how to use the LLM to implement a signature

In [15]:
generate_answer = dspy.Predict(BasicQA)

In [16]:
pred = generate_answer(question = dev_example.question)

In [17]:
pred

Prediction(
    answer='American'
)

In [18]:
llm.inspect_history(n=1)




Answer questions with short factoid answers.

---

Follow the following format.

Question: ${question}
Answer: often between 1 and 5 words

---

Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Answer:[32m American[0m





'\n\n\nAnswer questions with short factoid answers.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nAnswer: often between 1 and 5 words\n\n---\n\nQuestion: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?\nAnswer:\x1b[32m American\x1b[0m\n\n\n'

In [19]:
# Also use Chain of Thought
generate_chain_of_thought = dspy.ChainOfThought(BasicQA)
pred = generate_chain_of_thought(question=dev_example.question)
print(pred)

Prediction(
    rationale="determine the chef's nationality. The chef featured in Restaurant: Impossible is Robert Irvine, who is from the United Kingdom.",
    answer='British'
)


In [20]:
llm.inspect_history(n=1)




Answer questions with short factoid answers.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: often between 1 and 5 words

---

Question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?
Reasoning: Let's think step by step in order to[32m determine the chef's nationality. The chef featured in Restaurant: Impossible is Robert Irvine, who is from the United Kingdom.  
Answer: British[0m





"\n\n\nAnswer questions with short factoid answers.\n\n---\n\nFollow the following format.\n\nQuestion: ${question}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: often between 1 and 5 words\n\n---\n\nQuestion: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible?\nReasoning: Let's think step by step in order to\x1b[32m determine the chef's nationality. The chef featured in Restaurant: Impossible is Robert Irvine, who is from the United Kingdom.  \nAnswer: British\x1b[0m\n\n\n"

### As shown above, the predictor can be used with any type of Signature.
- The predictors are basically different ways in which LLMs can be used.
- The Signatures are basically, tasks that need to be done, and they define the task, and the expected inputs and output.


In [21]:
# Using Retrieval
retrieve = dspy.Retrieve(k=3)
topK_passages = retrieve(dev_example.question).passages

In [22]:
print(f"Top {retrieve.k} passages for question: {dev_example.question} \n", '-' * 30, '\n')

for idx, passage in enumerate(topK_passages):
    print(f'{idx+1}]', passage, '\n')

Top 3 passages for question: What is the nationality of the chef and restaurateur featured in Restaurant: Impossible? 
 ------------------------------ 

1] Restaurant: Impossible | Restaurant: Impossible is an American reality television series, featuring chef and restaurateur Robert Irvine, that aired on Food Network from 2011 to 2016. 

2] Jean Joho | Jean Joho is a French-American chef and restaurateur. He is chef/proprietor of Everest in Chicago (founded in 1986), Paris Club Bistro & Bar and Studio Paris in Chicago, The Eiffel Tower Restaurant in Las Vegas, and Brasserie JO in Boston. 

3] List of Restaurant: Impossible episodes | This is the list of the episodes for the American cooking and reality television series "Restaurant Impossible", produced by Food Network. The premise of the series is that within two days and on a budget of $10,000, celebrity chef Robert Irvine renovates a failing American restaurant with the goal of helping to restore it to profitability and prominence.

In [23]:
# Basic RAG

In [24]:
class GenerateAnswer(dspy.Signature):
    """Answer question with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

## Now lets make a program
- Basically, we will make a rag.
- So we will need 1) Retireval and 2) A Predictor
- Then we will define how to use the 2 elements, so that they can be evaluated and optimized.

In [30]:
class RAG(dspy.Module):
    def __init__(self, num_passages=3):
        super().__init__()
        self.retrieve = dspy.Retrieve(k=num_passages)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)

    def forward(self, question):
        context = self.retrieve(question).passages
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [31]:
import dspy.evaluate
from dspy.teleprompt import BootstrapFewShot

# Validation logic: check that the predicted answer is correct.
# Also check that the retrieved context does actually contain that answer.

# This is like the metric or error we want to use to optimize on
def validate_context_and_answer(example, pred, trace=None):
    answer_EM = dspy.evaluate.answer_exact_match(example, pred)
    answer_PM = dspy.evaluate.answer_passage_match(example, pred)
    return answer_EM and answer_PM

# This is like optimizer (SGD or ADAM etc)
teleprompter = BootstrapFewShot(metric=validate_context_and_answer)

compile_rag = teleprompter.compile(RAG(), trainset=trainset)


 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 11/20 [00:12<00:10,  1.16s/it]

Bootstrapped 4 full traces after 12 examples in round 0.





In [32]:
# Ask any question you like to this simple RAG program.
my_question = "What castle did David Gregory inherit?"

# Get the prediction. This contains `pred.context` and `pred.answer`.
pred = compile_rag(my_question)

# Print the contexts and the answer.
print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")

Question: What castle did David Gregory inherit?
Predicted Answer: Kinnairdy Castle
Retrieved Contexts (truncated): ['David Gregory (physician) | David Gregory (20 December 1625 ‚Äì 1720) was a Scottish physician and inventor. His surname is sometimes spelt as Gregorie, the original Scottish spelling. He inherited Kinn...', 'Gregory Tarchaneiotes | Gregory Tarchaneiotes (Greek: ŒìœÅŒ∑Œ≥œåœÅŒπŒøœÇ Œ§Œ±œÅœáŒ±ŒΩŒµŒπœéœÑŒ∑œÇ , Italian: "Gregorio Tracanioto" or "Tracamoto" ) was a "protospatharius" and the long-reigning catepan of Italy from 998 t...', 'David Gregory (mathematician) | David Gregory (originally spelt Gregorie) FRS (? 1659 ‚Äì 10 October 1708) was a Scottish mathematician and astronomer. He was professor of mathematics at the University ...']


In [33]:
llm.inspect_history(n=1)




Who was the first president of United States?[32m The first president of the United States was George Washington. He served from April 30, 1789, to March 4, 1797.[0m





'\n\n\nWho was the first president of United States?\x1b[32m The first president of the United States was George Washington. He served from April 30, 1789, to March 4, 1797.\x1b[0m\n\n\n'

In [34]:
llm.history[-1]

{'prompt': 'Who was the first president of United States?',
 'response': {'id': 'chatcmpl-AAkDfXgCiTF2jipgvOGLC1Y3HVgMX',
  'choices': [{'finish_reason': 'stop',
    'index': 0,
    'logprobs': None,
    'message': {'content': 'The first president of the United States was George Washington. He served from April 30, 1789, to March 4, 1797.',
     'role': 'assistant',
     'function_call': None,
     'tool_calls': None,
     'refusal': None}}],
  'created': 1727124311,
  'model': 'gpt-4o-mini-2024-07-18',
  'object': 'chat.completion',
  'system_fingerprint': 'fp_1bb46167f9',
  'usage': {'completion_tokens': 31,
   'prompt_tokens': 16,
   'total_tokens': 47,
   'completion_tokens_details': {'reasoning_tokens': 0}}},
 'kwargs': {'stringify_request': '{"temperature": 0.0001, "max_tokens": 150, "top_p": 1, "frequency_penalty": 0, "presence_penalty": 0, "n": 1, "model": "gpt-4o-mini", "messages": [{"role": "user", "content": "Who was the first president of United States?"}]}'},
 'raw_kwargs'

In [35]:
for name, parameter in compile_rag.named_predictors():
    print(name)
    print(parameter.demos[0])
    print()

generate_answer
Example({'augmented': True, 'context': ['Tae Kwon Do Times | Tae Kwon Do Times is a magazine devoted to the martial art of taekwondo, and is published in the United States of America. While the title suggests that it focuses on taekwondo exclusively, the magazine also covers other Korean martial arts. "Tae Kwon Do Times" has published articles by a wide range of authors, including He-Young Kimm, Thomas Kurz, Scott Shaw, and Mark Van Schuyver.', "Kwon Tae-man | Kwon Tae-man (born 1941) was an early Korean hapkido practitioner and a pioneer of the art, first in Korea and then in the United States. He formed one of the earliest dojang's for hapkido in the United States in Torrance, California, and has been featured in many magazine articles promoting the art.", 'Hee Il Cho | Cho Hee Il (born October 13, 1940) is a prominent Korean-American master of taekwondo, holding the rank of 9th "dan" in the martial art. He has written 11 martial art books, produced 70 martial art tra

In [36]:
from dspy.evaluate.evaluate import Evaluate

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_hotpotqa = Evaluate(devset=devset, num_threads=1, display_progress=True, display_table=5)

# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_hotpotqa(compile_rag, metric=metric)

Average Metric: 28 / 50  (56.0): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [00:44<00:00,  1.13it/s]


Unnamed: 0,question,example_answer,gold_titles,context,pred_answer,answer_exact_match
0,Are both Cangzhou and Qionghai in the Hebei province of China?,no,"{'Cangzhou', 'Qionghai'}","['Cangzhou | Cangzhou () is a prefecture-level city in eastern Hebei province, People\'s Republic of China. At the 2010 census, Cangzhou\'s built-up (""or metro"") area...",No,‚úîÔ∏è [True]
1,Who conducts the draft in which Marc-Andre Fleury was drafted to the Vegas Golden Knights for the 2017-18 season?,National Hockey League,"{'2017 NHL Expansion Draft', '2017‚Äì18 Pittsburgh Penguins season'}",['2017‚Äì18 Pittsburgh Penguins season | The 2017‚Äì18 Pittsburgh Penguins season will be the 51st season for the National Hockey League ice hockey team that was...,National Hockey League,‚úîÔ∏è [True]
2,"The Wings entered a new era, following the retirement of which Canadian retired professional ice hockey player and current general manager of the Tampa Bay...",Steve Yzerman,"{'2006‚Äì07 Detroit Red Wings season', 'Steve Yzerman'}","['Steve Yzerman | Stephen Gregory ""Steve"" Yzerman ( ; born May 9, 1965) is a Canadian retired professional ice hockey player and current general manager...",Steve Yzerman,‚úîÔ∏è [True]
3,What river is near the Crichton Collegiate Church?,the River Tyne,"{'Crichton Castle', 'Crichton Collegiate Church'}","[""Crichton Collegiate Church | Crichton Collegiate Church is situated about 0.6 mi south west of the hamlet of Crichton in Midlothian, Scotland. Crichton itself is...",River Tyne,‚úîÔ∏è [True]
4,In the 10th Century A.D. Ealhswith had a son called √Üthelweard by which English king?,King Alfred the Great,"{'√Üthelweard (son of Alfred)', 'Ealhswith'}","[""√Üthelweard of East Anglia | √Üthelweard (died 854) was a 9th-century king of East Anglia, the long-lived Anglo-Saxon kingdom which today includes the English counties...",King Alfred the Great,‚úîÔ∏è [True]


56.0

In [42]:
# Create a Phoenix client
client = px.Client()

# Get the trace dataset
trace_dataset = client.get_trace_dataset(project_name='openai_dspy_test')


  df_attributes = pd.DataFrame.from_records(
  df_attributes = pd.DataFrame.from_records(


In [64]:
trace_df = trace_dataset.dataframe

In [116]:
collect = []
def output_parse(row):
    output_value = None
    if row.span_kind == 'LLM':
        output_value = json.loads(row['attributes.output.value'])['usage']
    return output_value
        

for _, i in trace_df.iterrows():
    collect.append(output_parse(i))

sum([sample['total_tokens'] for sample in collect if sample is not None])

157984