In [2]:
import dspy
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch, BootstrapFinetune
from dspy.primitives.assertions import assert_transform_module, backtrack_handler
import random
import pandas as pd
import psycopg2
from psycopg2 import Error
import os

In [3]:
from pathlib import Path
from dotenv import load_dotenv


dotenv_path = Path(r'C:\Users\Aashrith\CoE Internship\real-assistant\app\.env')
load_dotenv(dotenv_path=dotenv_path)

AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
DB_HOST=os.getenv("DB_HOST")
POSTGRES_DB=os.getenv("POSTGRES_DB")
POSTGRES_USER=os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD=os.getenv("POSTGRES_PASSWORD")

In [5]:
gpt4_mini = dspy.AzureOpenAI(api_base=AZURE_OPENAI_ENDPOINT, api_version='2024-02-15-preview', model='gpt-4o-mini-global-128k',  api_key=AZURE_OPENAI_API_KEY)
dspy.configure(lm=gpt4_mini)

In [4]:
def get_records(DB_HOST, POSTGRES_DB, POSTGRES_USER, POSTGRES_PASSWORD):
    try:
        connection = psycopg2.connect(database=POSTGRES_DB, user=POSTGRES_USER, password=POSTGRES_PASSWORD, host=DB_HOST)
        cursor = connection.cursor()
        cursor.execute("select currentsection, previoussection, ismerge from merges")
        records = cursor.fetchall()
        return records
    except Error as e:
        print(f"Connection error: {e}")

In [25]:
records = get_records(DB_HOST, POSTGRES_DB, POSTGRES_USER, POSTGRES_PASSWORD)

template = "The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based on the linguistic flow, content and section headers (if available).\n\n"

questions = [f"{template}Section 1:\n{record[1]}\n\nSection 2:\n{record[0]}" for record in records]

answers = ["Yes" if record[2] == 1 else "No" for record in records]

examples = [(questions[i], answers[i]) for i in range(len(questions))]

In [26]:
# print(questions[0])
random.shuffle(examples)

In [27]:
train = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in examples[:25]]
dev = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in examples[25:]]

In [17]:
class BasicQA(dspy.Signature):
    """Answer questions."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="only contains the answer")

In [18]:
class CoT(dspy.Module):  
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.ChainOfThought('question -> answer')
    
    def forward(self, question):
        return self.generate_answer(question=question)

In [28]:
metric_EM = dspy.evaluate.answer_exact_match

teleprompter1 = BootstrapFewShotWithRandomSearch(metric=metric_EM, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=32)
cot_compiled = teleprompter1.compile(CoT(), trainset=train)

Going to sample between 1 and 2 traces per predictor.
Will attempt to train 8 candidate sets.


Average Metric: 15 / 25  (60.0): 100%|█████████████████████████████████████████████████| 25/25 [00:09<00:00,  2.72it/s]


Average Metric: 15 / 25  (60.0%)
Score: 60.0 for set: [0]
New best score: 60.0 for seed -3
Scores so far: [60.0]
Best score: 60.0


Average Metric: 20 / 25  (80.0): 100%|█████████████████████████████████████████████████| 25/25 [00:09<00:00,  2.57it/s]


Average Metric: 20 / 25  (80.0%)
Score: 80.0 for set: [16]
New best score: 80.0 for seed -2
Scores so far: [60.0, 80.0]
Best score: 80.0


 12%|█████████▉                                                                         | 3/25 [00:05<00:38,  1.76s/it]


Bootstrapped 2 full traces after 4 examples in round 0.


Average Metric: 22 / 25  (88.0): 100%|█████████████████████████████████████████████████| 25/25 [00:48<00:00,  1.95s/it]


Average Metric: 22 / 25  (88.0%)
Score: 88.0 for set: [16]
New best score: 88.0 for seed -1
Scores so far: [60.0, 80.0, 88.0]
Best score: 88.0
Average of max per entry across top 1 scores: 0.88
Average of max per entry across top 2 scores: 1.0
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  8%|██████▋                                                                            | 2/25 [00:05<01:04,  2.80s/it]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 21 / 25  (84.0): 100%|█████████████████████████████████████████████████| 25/25 [00:13<00:00,  1.79it/s]


Average Metric: 21 / 25  (84.0%)
Score: 84.0 for set: [16]
Scores so far: [60.0, 80.0, 88.0, 84.0]
Best score: 88.0
Average of max per entry across top 1 scores: 0.88
Average of max per entry across top 2 scores: 0.96
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  8%|██████▋                                                                            | 2/25 [00:03<00:41,  1.81s/it]


Bootstrapped 1 full traces after 3 examples in round 0.


Average Metric: 17 / 20  (85.0):  80%|███████████████████████████████████████▏         | 20/25 [00:04<00:01,  4.56it/s]

Backing off 0.4 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}
Backing off 0.6 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}
Backing off 0.6 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}
Backing off 0.0 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}


Average Metric: 20 / 25  (80.0): 100%|█████████████████████████████████████████████████| 25/25 [00:47<00:00,  1.89s/it]


Average Metric: 20 / 25  (80.0%)
Score: 80.0 for set: [16]
Scores so far: [60.0, 80.0, 88.0, 84.0, 80.0]
Best score: 88.0
Average of max per entry across top 1 scores: 0.88
Average of max per entry across top 2 scores: 0.96
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  4%|███▎                                                                               | 1/25 [00:01<00:47,  1.99s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 20 / 25  (80.0): 100%|█████████████████████████████████████████████████| 25/25 [00:12<00:00,  1.94it/s]


Average Metric: 20 / 25  (80.0%)
Score: 80.0 for set: [16]
Scores so far: [60.0, 80.0, 88.0, 84.0, 80.0, 80.0]
Best score: 88.0
Average of max per entry across top 1 scores: 0.88
Average of max per entry across top 2 scores: 0.96
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  8%|██████▋                                                                            | 2/25 [00:03<00:44,  1.91s/it]


Bootstrapped 1 full traces after 3 examples in round 0.


Average Metric: 20 / 25  (80.0): 100%|█████████████████████████████████████████████████| 25/25 [00:41<00:00,  1.65s/it]


Average Metric: 20 / 25  (80.0%)
Score: 80.0 for set: [16]
Scores so far: [60.0, 80.0, 88.0, 84.0, 80.0, 80.0, 80.0]
Best score: 88.0
Average of max per entry across top 1 scores: 0.88
Average of max per entry across top 2 scores: 0.96
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  4%|███▎                                                                               | 1/25 [00:05<02:04,  5.18s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 17 / 21  (81.0):  80%|███████████████████████████████████████▏         | 20/25 [00:08<00:02,  2.22it/s]

Backing off 0.1 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}


Average Metric: 21 / 25  (84.0): 100%|█████████████████████████████████████████████████| 25/25 [00:16<00:00,  1.52it/s]


Average Metric: 21 / 25  (84.0%)
Score: 84.0 for set: [16]
Scores so far: [60.0, 80.0, 88.0, 84.0, 80.0, 80.0, 80.0, 84.0]
Best score: 88.0
Average of max per entry across top 1 scores: 0.88
Average of max per entry across top 2 scores: 0.96
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 12%|█████████▉                                                                         | 3/25 [00:11<01:21,  3.72s/it]


Bootstrapped 2 full traces after 4 examples in round 0.


Average Metric: 23 / 25  (92.0): 100%|█████████████████████████████████████████████████| 25/25 [00:35<00:00,  1.41s/it]


Average Metric: 23 / 25  (92.0%)
Score: 92.0 for set: [16]
New best score: 92.0 for seed 5
Scores so far: [60.0, 80.0, 88.0, 84.0, 80.0, 80.0, 80.0, 84.0, 92.0]
Best score: 92.0
Average of max per entry across top 1 scores: 0.92
Average of max per entry across top 2 scores: 1.0
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  4%|███▎                                                                               | 1/25 [00:02<00:56,  2.33s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 11 / 14  (78.6):  56%|███████████████████████████▍                     | 14/25 [00:05<00:05,  1.94it/s]

Backing off 0.8 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}


Average Metric: 12 / 15  (80.0):  60%|█████████████████████████████▍                   | 15/25 [00:08<00:12,  1.24s/it]

Backing off 0.3 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}
Backing off 0.2 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}
Backing off 1.0 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}


Average Metric: 13 / 17  (76.5):  64%|███████████████████████████████▎                 | 16/25 [00:10<00:11,  1.32s/it]

Backing off 0.3 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}
Backing off 1.0 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}


Average Metric: 15 / 19  (78.9):  76%|█████████████████████████████████████▏           | 19/25 [00:20<00:13,  2.24s/it]

Backing off 0.2 seconds after 2 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}


Average Metric: 19 / 25  (76.0): 100%|█████████████████████████████████████████████████| 25/25 [00:25<00:00,  1.00s/it]


Average Metric: 19 / 25  (76.0%)
Score: 76.0 for set: [16]
Scores so far: [60.0, 80.0, 88.0, 84.0, 80.0, 80.0, 80.0, 84.0, 92.0, 76.0]
Best score: 92.0
Average of max per entry across top 1 scores: 0.92
Average of max per entry across top 2 scores: 1.0
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 12%|█████████▉                                                                         | 3/25 [00:06<00:50,  2.28s/it]


Bootstrapped 2 full traces after 4 examples in round 0.


Average Metric: 15 / 20  (75.0):  80%|███████████████████████████████████████▏         | 20/25 [00:25<00:13,  2.67s/it]

Backing off 0.3 seconds after 1 tries calling function <function AzureOpenAI.request at 0x0000016012AEF8B0> with kwargs {}


Average Metric: 20 / 25  (80.0): 100%|█████████████████████████████████████████████████| 25/25 [00:29<00:00,  1.19s/it]

Average Metric: 20 / 25  (80.0%)
Score: 80.0 for set: [16]
Scores so far: [60.0, 80.0, 88.0, 84.0, 80.0, 80.0, 80.0, 84.0, 92.0, 76.0, 80.0]
Best score: 92.0
Average of max per entry across top 1 scores: 0.92
Average of max per entry across top 2 scores: 1.0
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0
11 candidate programs found.





In [29]:
NUM_THREADS = 32
evaluate_hotpot = Evaluate(devset=dev[:], metric=metric_EM, num_threads=NUM_THREADS, display_progress=True, display_table=15)

evaluate_hotpot(cot_compiled)

Average Metric: 10 / 10  (100.0): 100%|████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.76it/s]

Average Metric: 10 / 10  (100.0%)





Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",Yes,"determine if these two sections should be combined. The first section introduces the article, while the second section provides specific details about the effectiveness and...",Yes,✔️ [True]
1,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",Yes,"determine if these two sections should be combined. The first section introduces the article, while the second section provides specific details about the subordination of...",Yes,✔️ [True]
2,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",Yes,"determine if these two sections should be combined. The first section introduces the topic of environmental policies and regulations, specifically addressing the handling of hazardous...",Yes,✔️ [True]
3,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",Yes,"determine if these two sections should be combined. The first section introduces the article, while the second section elaborates on the Company's obligations regarding the...",Yes,✔️ [True]
4,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",Yes,"determine if these two sections should be combined. The first section introduces the article, while the second section provides important information about the entire agreement...",Yes,✔️ [True]
5,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",No,"determine if these two sections should be combined. The first section discusses the insurance requirements and responsibilities of the Tenant and Landlord, detailing the types...",No,✔️ [True]
6,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",Yes,determine if these two sections should be combined. The first section discusses the contributions required from both the Company and the Lessor for the operation...,Yes,✔️ [True]
7,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",Yes,"determine if these two sections should be combined. The first section introduces the article, while the second section provides specific details about the recording of...",Yes,✔️ [True]
8,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",Yes,"determine if these two sections should be combined. The first section introduces the article, while the second section provides important legal stipulations regarding the governing...",Yes,✔️ [True]
9,"The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based...",No,determine if these two sections should be combined. The first section discusses the responsibilities of the Tenant and Landlord in the event of damage to...,No,✔️ [True]


100.0

In [31]:
print(dev[9].question)

The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based on the linguistic flow, content and section headers (if available).

Section 1:
SECTION 19
PREMISES UNTENANTABLE
If the Premises be damaged by fire or other causes, Tenant shall immediately notify
Landlord thereof, and the damages shall be repaired by Landlord, at its own expense; provided,
however, that if the damage or destruction was caused by the act or omission of Tenant or any
Invitee, then Tenant shall pay the amount by which such expenses exceed the insurance proceeds
actually received by Landlord on account of such damage or destruction. Landlord need only
repair the damaged structural parts of the Premises and is not required to repair or replace any
equipment, fixtures, furnishings or decorations unless originally installed by Landlord. Landlord
is also not responsible for delays due to settling insurance claims, obtaining estimates, l

: 

In [30]:
gpt4_mini.inspect_history(n=1)





Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: The contents of two document parts are listed below. If the two sections should be combined into one, return 'Yes'; if not, return 'No' based on the linguistic flow, content and section headers (if available). Section 1: money, Lessor will accept payments of rent from such Leasehold Mortgagee and this Agreement will not terminate, but will remain in full force and effect, pending Leasehold Mortgagee's cure of such default within the time periods described herein or resort to foreclosure or sale proceedings under its deed of trust or other security instruments. 2.18.4 If any default has been cured by a Leasehold Mortgagee or Assignee, Lessor agrees that upon completion of any foreclosure proceedings or sale under the deed of trust or other security instrument