In [1]:
import dspy

from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

import os
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [2]:
turbo = dspy.OpenAI(model='gpt-3.5-turbo', api_key=OPENAI_API_KEY)
dspy.settings.configure(lm=turbo)

In [3]:
sentences_data_test = [
    ("Bitcoin is for $7,094 this morning, which CoinDesk says.", "No"),
    ("Bitcoin goes for $7,094 this morning, according to CoinDesk.", "Yes"),
    ("The effect of widespread dud targets two face up attack position monsters on the field.", "No"),
    ("The effect of 'widespread dud' targets two face-up attack position monsters on the field.", "Yes"),
    ("tax on sales of stores for non residents are set at 21% for 2014 and 20% in 2015 payable on sales tentatively earned from the difference of the property value some time of purchase (price differences according to working time) and theyear to which sale couples (sales costs), based on the approved annual on the base approved by law).", "No"),
    ("Capital Gains tax on the sale of properties for non-residents is set at 21% for 2014 and 20% in 2015 payable on profits earned on the difference of the property value between the year of purchase (purchase price plus costs) and the year of sale (sales price minus costs), based on the approved annual percentage increase on the base value approved by law.", "Yes"),
    ("Much many brands and sellers still in the market.", "No"),
    ("Many brands and sellers still in the market.", "Yes"),
    ("this is is the latest Maintenance release of Samba 3.6", "No"),
    ("This is is the latest maintenance release of Samba 3.6.", "Yes")]

dataset = []
for sent in sentences_data_test:
    dataset.append(dspy.Example(text=sent[0], answer=sent[1]).with_inputs("text"))
print(dataset[:3])

class GC(dspy.Signature):
    """You are given a text. You must indicate with Yes/No whether the text is grammatically correct or not """
    text = dspy.InputField()
    answer = dspy.OutputField(desc="Yes or No")



class CoT(dspy.Module):  # let's define a new module
    def __init__(self):
        super().__init__()

        # here we declare the chain of thought sub-module, so we can later compile it (e.g., teach it a prompt)
        self.generate_answer = dspy.ChainOfThought(GC)
    
    def forward(self, text):
        return self.generate_answer(text=text)  # here we use the module

# Set up a basic teleprompter, which will compile our RAG program.
from dspy.teleprompt import BootstrapFewShot
metric_EM = dspy.evaluate.answer_exact_match

teleprompter = BootstrapFewShot(metric=metric_EM)
# Compile!
compiled_rag = teleprompter.compile(CoT(), trainset=dataset)



[Example({'text': 'Bitcoin is for $7,094 this morning, which CoinDesk says.', 'answer': 'No'}) (input_keys={'text'}), Example({'text': 'Bitcoin goes for $7,094 this morning, according to CoinDesk.', 'answer': 'Yes'}) (input_keys={'text'}), Example({'text': 'The effect of widespread dud targets two face up attack position monsters on the field.', 'answer': 'No'}) (input_keys={'text'})]


 50%|█████     | 5/10 [00:00<00:00, 560.47it/s]

Bootstrapped 4 full traces after 6 examples in round 0.





In [4]:
# plain rag

rag = CoT()

text = "this is correct not will do"
pred = compiled_rag(text)
print('compiled',pred)
pred = rag(text)
print('plain',pred)

text = "You can read well"
pred = compiled_rag(text)
print(pred)
pred = rag(text)
print('plain',pred)

compiled Prediction(
    rationale='Answer: No',
    answer='No'
)
plain Prediction(
    rationale='Answer: No',
    answer='No'
)
Prediction(
    rationale='Answer: No',
    answer='No'
)
plain Prediction(
    rationale='Answer: Yes',
    answer='Yes'
)


In [5]:
turbo.inspect_history(n=2)




You are given a text. You must indicate with Yes/No whether the text is grammatically correct or not

---

Follow the following format.

Text: ${text}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: Yes or No

---

Text: You can read well
Reasoning: Let's think step by step in order to[32m Answer: Yes[0m





You are given a text. You must indicate with Yes/No whether the text is grammatically correct or not

---

Follow the following format.

Text: ${text}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: Yes or No

---

Text: You can read well
Reasoning: Let's think step by step in order to Answer: Yes
Answer:[32m Yes[0m





"\n\n\nYou are given a text. You must indicate with Yes/No whether the text is grammatically correct or not\n\n---\n\nFollow the following format.\n\nText: ${text}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: Yes or No\n\n---\n\nText: You can read well\nReasoning: Let's think step by step in order to\x1b[32m Answer: Yes\x1b[0m\n\n\n\n\n\nYou are given a text. You must indicate with Yes/No whether the text is grammatically correct or not\n\n---\n\nFollow the following format.\n\nText: ${text}\nReasoning: Let's think step by step in order to ${produce the answer}. We ...\nAnswer: Yes or No\n\n---\n\nText: You can read well\nReasoning: Let's think step by step in order to Answer: Yes\nAnswer:\x1b[32m Yes\x1b[0m\n\n\n"

In [6]:
from dspy.evaluate.evaluate import Evaluate

# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_mydata = Evaluate(devset=dataset, num_threads=1, display_progress=True, display_table=5)

# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_mydata(compiled_rag, metric=metric)

Average Metric: 10 / 10  (100.0): 100%|██████████| 10/10 [00:00<00:00, 907.70it/s]


Unnamed: 0,text,example_answer,rationale,pred_answer,answer_exact_match
0,"Bitcoin is for $7,094 this morning, which CoinDesk says.",No,Answer: No,No,✔️ [True]
1,"Bitcoin goes for $7,094 this morning, according to CoinDesk.",Yes,Answer: Yes,Yes,✔️ [True]
2,The effect of widespread dud targets two face up attack position monsters on the field.,No,Answer: Yes,No,✔️ [True]
3,The effect of 'widespread dud' targets two face-up attack position monsters on the field.,Yes,Answer: Yes,Yes,✔️ [True]
4,tax on sales of stores for non residents are set at 21% for 2014 and 20% in 2015 payable on sales tentatively earned from the...,No,Answer: No,No,✔️ [True]


100.0

In [7]:
# Set up the `evaluate_on_hotpotqa` function. We'll use this many times below.
evaluate_on_mydata = Evaluate(devset=dataset, num_threads=1, display_progress=True, display_table=5)

# Evaluate the `compiled_rag` program with the `answer_exact_match` metric.
metric = dspy.evaluate.answer_exact_match
evaluate_on_mydata(rag, metric=metric)

Average Metric: 7 / 10  (70.0): 100%|██████████| 10/10 [00:00<00:00, 1095.15it/s]


Unnamed: 0,text,example_answer,rationale,pred_answer,answer_exact_match
0,"Bitcoin is for $7,094 this morning, which CoinDesk says.",No,"Text: Bitcoin is for $7,094 this morning, which CoinDesk says. Reasoning: Let's think step by step in order to determine if this sentence is grammatically...",No,✔️ [True]
1,"Bitcoin goes for $7,094 this morning, according to CoinDesk.",Yes,Answer: Yes,Yes,✔️ [True]
2,The effect of widespread dud targets two face up attack position monsters on the field.,No,Answer: No,No,✔️ [True]
3,The effect of 'widespread dud' targets two face-up attack position monsters on the field.,Yes,Answer: No,No,False
4,tax on sales of stores for non residents are set at 21% for 2014 and 20% in 2015 payable on sales tentatively earned from the...,No,Answer: No,No,✔️ [True]


70.0