In [5]:
import dspy
from dspy.evaluate import Evaluate
from dspy.teleprompt import BootstrapFewShot, BootstrapFewShotWithRandomSearch, BootstrapFinetune
from dspy.primitives.assertions import assert_transform_module, backtrack_handler
import random
import pandas as pd
import psycopg2
from psycopg2 import Error
import os

In [6]:
data = pd.read_excel(r"C:\Users\Aashrith\CoE Internship\real-assistant\lm-refinement\tenant-summary-data-examples.xlsx")
data.head()

Unnamed: 0,documentid,tenant,summary,label
0,1558,Vaia-Dax LLC.,# Lease Abstract\n\n## Asset Class\n- **Indust...,Vaia-Dax LLC.
1,1556,Vaia-Dax LLC.,# Lease Abstract\n\n## Asset Class\n- **Indust...,Vaia-Dax LLC.
2,2049,Toughtam Inc,# Lease Abstract\n\n## Asset Class\n- **Type:*...,Toughtam Inc
3,2048,Tessiture Corp,# Lease Abstract\n\n## Asset Class\n- **Type:*...,Tessiture Corp
4,2047,Highvivalux Inc.,# Lease Abstract\n\n## Asset Class\n- **Type:*...,Highvivalux Inc.


In [None]:
# print(len(data))

In [7]:
template = "Extract the data from the following lease abstract.  The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and modify/change that information so that the output has only the tenant/lessee name and all other extra information (description about the tenant/lessee name or company, acronyms, etc.) related to the name is discarded.\n"

questions = [(template+i) for i in list(data["summary"])]

answers = list(data["label"])

examples = [(questions[i], answers[i]) for i in range(len(questions))]

In [19]:
def get_records(DB_HOST, POSTGRES_DB, POSTGRES_USER, POSTGRES_PASSWORD):
    try:
        connection = psycopg2.connect(database=POSTGRES_DB, user=POSTGRES_USER, password=POSTGRES_PASSWORD, host=DB_HOST)
        cursor = connection.cursor()
        cursor.execute("select id, (metadata ->> 'summary') as summary from documents where id >= 352 order by id")
        records = cursor.fetchall()
        return records
    except Error as e:
        print(f"Connection error: {e}")

In [20]:
from pathlib import Path
from dotenv import load_dotenv


dotenv_path = Path(r'C:\Users\Aashrith\CoE Internship\real-assistant\app\.env')
load_dotenv(dotenv_path=dotenv_path)

AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
DB_HOST=os.getenv("DB_HOST")
POSTGRES_DB=os.getenv("POSTGRES_DB")
POSTGRES_USER=os.getenv("POSTGRES_USER")
POSTGRES_PASSWORD=os.getenv("POSTGRES_PASSWORD")


records = get_records(DB_HOST, POSTGRES_DB, POSTGRES_USER, POSTGRES_PASSWORD)

admin


In [21]:
records_idx = [0, 3, 14, 20, 21, 25, 30, 56, 57, 69, 72, 74, 78, 88, 102]
test_questions = [(template+records[i][1]) for i in records_idx]
test_answers = ["United States of America", "BSREP II Cypress MT LLC", "Kller Williams Realty", "Salmon River Mobile Vet LLC", "KBSII Fountainhead, LLC", "Oceanside Community Association", "Diane Huynh and Don Huynh, jointly and severally, dba 'Pho Mai'", "Westfield Community Development Corporation", "Rocky Mountain Institute", "Industrial Realty Group, LLC", "NorthShore University HealthSystem", "United Entertainment LLC", "Department of Water and Power, City of Los Angeles", "ABCDE, LLC (dba All Things Coffee, Coffee Shop)", "City of Seattle"]

test_examples = [(test_questions[i],test_answers[i]) for i in range(len(test_questions))]

random.shuffle(test_examples)

# test = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in test_examples] 

# for q in test_questions:
#     pred = cot_compiled_with_assertions(q)
#     print(pred.answer)

In [22]:
# random.shuffle(examples)
examples_idx = [7,8,11,16,19,23,25,31,34,35,37,39,42,44,48,49,51,53]
train_examples = [examples[i] for i in examples_idx]
non_train_examples = [examples[i] for i in range(len(examples)) if i not in examples_idx]
random.shuffle(train_examples)
random.shuffle(non_train_examples)
train_set = train_examples[:15]
train_set.extend(non_train_examples[:7])
test_set = train_examples[15:]
test_set.extend(non_train_examples[7:20])
train_set.extend(test_examples[:10])
test_set.extend(test_examples[10:])

In [23]:
train = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in train_set]
dev = [dspy.Example(question=question, answer=answer).with_inputs('question') for question, answer in test_set]

In [24]:
gpt4_mini = dspy.AzureOpenAI(api_base=AZURE_OPENAI_ENDPOINT, api_version='2024-02-15-preview', model='gpt-4o-mini-global-128k',  api_key=AZURE_OPENAI_API_KEY)
dspy.configure(lm=gpt4_mini)

In [25]:
class BasicQA(dspy.Signature):
    """Answer questions."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="only contains the answer")

In [26]:
class CoT(dspy.Module):  
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.ChainOfThought('question -> answer')
    
    def forward(self, question):
        return self.generate_answer(question=question)

In [27]:
def check_acronyms(pred):
    words = pred.answer.split(" ")
    if (words[0] == "(") and (words[-1] == ")"):
        return False
    return True

In [28]:
class CoT_with_Assertions(dspy.Module):  
    def __init__(self):
        super().__init__()
        self.generate_answer = dspy.ChainOfThought('question -> answer')
    
    def forward(self, question):
        pred = self.generate_answer(question=question)
        dspy.Suggest(check_acronyms(pred), f"Output shouldn't have any acronym/abbreviation/short hand notation of the tenant/lessee name in the parenthesis. Please remove them and provide the output.", )
        return pred

In [29]:
metric_EM = dspy.evaluate.answer_exact_match

teleprompter1 = BootstrapFewShotWithRandomSearch(metric=metric_EM, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=32)
# cot_compiled = teleprompter.compile(CoT(), trainset=train)

Going to sample between 1 and 2 traces per predictor.
Will attempt to train 8 candidate sets.


In [30]:
cot = CoT()

teleprompter2 = BootstrapFewShot(metric=metric_EM, max_bootstrapped_demos=4)
cot_compiled_with_assertions = teleprompter1.compile(student=cot, teacher = assert_transform_module(CoT_with_Assertions(), backtrack_handler), trainset=train)

Average Metric: 20 / 32  (62.5): 100%|█████████████████████████████████████████████████| 32/32 [00:06<00:00,  4.64it/s]


Average Metric: 20 / 32  (62.5%)
Score: 62.5 for set: [0]
New best score: 62.5 for seed -3
Scores so far: [62.5]
Best score: 62.5


Average Metric: 23 / 32  (71.9): 100%|█████████████████████████████████████████████████| 32/32 [00:05<00:00,  5.90it/s]


Average Metric: 23 / 32  (71.9%)
Score: 71.88 for set: [16]
New best score: 71.88 for seed -2
Scores so far: [62.5, 71.88]
Best score: 71.88


 12%|██████████▍                                                                        | 4/32 [00:01<00:13,  2.13it/s]


Bootstrapped 2 full traces after 5 examples in round 0.


Average Metric: 24 / 32  (75.0): 100%|█████████████████████████████████████████████████| 32/32 [00:56<00:00,  1.76s/it]


Average Metric: 24 / 32  (75.0%)
Score: 75.0 for set: [16]
New best score: 75.0 for seed -1
Scores so far: [62.5, 71.88, 75.0]
Best score: 75.0
Average of max per entry across top 1 scores: 0.75
Average of max per entry across top 2 scores: 0.875
Average of max per entry across top 3 scores: 0.96875
Average of max per entry across top 5 scores: 0.96875
Average of max per entry across top 8 scores: 0.96875
Average of max per entry across top 9999 scores: 0.96875


  6%|█████▏                                                                             | 2/32 [00:03<00:47,  1.59s/it]


Bootstrapped 2 full traces after 3 examples in round 0.


Average Metric: 23 / 32  (71.9): 100%|█████████████████████████████████████████████████| 32/32 [00:04<00:00,  7.72it/s]


Average Metric: 23 / 32  (71.9%)
Score: 71.88 for set: [16]
Scores so far: [62.5, 71.88, 75.0, 71.88]
Best score: 75.0
Average of max per entry across top 1 scores: 0.75
Average of max per entry across top 2 scores: 0.875
Average of max per entry across top 3 scores: 0.9375
Average of max per entry across top 5 scores: 0.96875
Average of max per entry across top 8 scores: 0.96875
Average of max per entry across top 9999 scores: 0.96875


  3%|██▌                                                                                | 1/32 [00:51<26:32, 51.38s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 25 / 32  (78.1): 100%|█████████████████████████████████████████████████| 32/32 [00:07<00:00,  4.34it/s]


Average Metric: 25 / 32  (78.1%)
Score: 78.12 for set: [16]
New best score: 78.12 for seed 1
Scores so far: [62.5, 71.88, 75.0, 71.88, 78.12]
Best score: 78.12
Average of max per entry across top 1 scores: 0.78125
Average of max per entry across top 2 scores: 0.96875
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  3%|██▌                                                                                | 1/32 [00:01<00:44,  1.42s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 23 / 32  (71.9): 100%|█████████████████████████████████████████████████| 32/32 [00:55<00:00,  1.74s/it]


Average Metric: 23 / 32  (71.9%)
Score: 71.88 for set: [16]
Scores so far: [62.5, 71.88, 75.0, 71.88, 78.12, 71.88]
Best score: 78.12
Average of max per entry across top 1 scores: 0.78125
Average of max per entry across top 2 scores: 0.96875
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  6%|█████▏                                                                             | 2/32 [00:03<00:50,  1.67s/it]


Bootstrapped 1 full traces after 3 examples in round 0.


Average Metric: 21 / 32  (65.6): 100%|█████████████████████████████████████████████████| 32/32 [00:04<00:00,  7.68it/s]


Average Metric: 21 / 32  (65.6%)
Score: 65.62 for set: [16]
Scores so far: [62.5, 71.88, 75.0, 71.88, 78.12, 71.88, 65.62]
Best score: 78.12
Average of max per entry across top 1 scores: 0.78125
Average of max per entry across top 2 scores: 0.96875
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  6%|█████▏                                                                             | 2/32 [00:03<00:57,  1.92s/it]


Bootstrapped 1 full traces after 3 examples in round 0.


Average Metric: 22 / 32  (68.8): 100%|█████████████████████████████████████████████████| 32/32 [00:56<00:00,  1.77s/it]


Average Metric: 22 / 32  (68.8%)
Score: 68.75 for set: [16]
Scores so far: [62.5, 71.88, 75.0, 71.88, 78.12, 71.88, 65.62, 68.75]
Best score: 78.12
Average of max per entry across top 1 scores: 0.78125
Average of max per entry across top 2 scores: 0.96875
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  9%|███████▊                                                                           | 3/32 [00:11<01:48,  3.72s/it]


Bootstrapped 2 full traces after 4 examples in round 0.


Average Metric: 21 / 32  (65.6): 100%|█████████████████████████████████████████████████| 32/32 [00:40<00:00,  1.27s/it]


Average Metric: 21 / 32  (65.6%)
Score: 65.62 for set: [16]
Scores so far: [62.5, 71.88, 75.0, 71.88, 78.12, 71.88, 65.62, 68.75, 65.62]
Best score: 78.12
Average of max per entry across top 1 scores: 0.78125
Average of max per entry across top 2 scores: 0.96875
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  3%|██▌                                                                                | 1/32 [00:04<02:31,  4.89s/it]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 22 / 32  (68.8): 100%|█████████████████████████████████████████████████| 32/32 [00:17<00:00,  1.84it/s]


Average Metric: 22 / 32  (68.8%)
Score: 68.75 for set: [16]
Scores so far: [62.5, 71.88, 75.0, 71.88, 78.12, 71.88, 65.62, 68.75, 65.62, 68.75]
Best score: 78.12
Average of max per entry across top 1 scores: 0.78125
Average of max per entry across top 2 scores: 0.96875
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  9%|███████▊                                                                           | 3/32 [00:05<00:51,  1.78s/it]


Bootstrapped 2 full traces after 4 examples in round 0.


Average Metric: 6 / 13  (46.2):  41%|████████████████████▎                             | 13/32 [00:04<00:10,  1.84it/s]

Backing off 0.0 seconds after 1 tries calling function <function AzureOpenAI.request at 0x000001EEEF4CF790> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function AzureOpenAI.request at 0x000001EEEF4CF790> with kwargs {}


Average Metric: 21 / 32  (65.6): 100%|█████████████████████████████████████████████████| 32/32 [00:53<00:00,  1.69s/it]

Average Metric: 21 / 32  (65.6%)
Score: 65.62 for set: [16]
Scores so far: [62.5, 71.88, 75.0, 71.88, 78.12, 71.88, 65.62, 68.75, 65.62, 68.75, 65.62]
Best score: 78.12
Average of max per entry across top 1 scores: 0.78125
Average of max per entry across top 2 scores: 0.96875
Average of max per entry across top 3 scores: 1.0
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0
11 candidate programs found.





In [34]:
NUM_THREADS = 32
evaluate_hotpot = Evaluate(devset=dev[:], metric=metric_EM, num_threads=NUM_THREADS, display_progress=True, display_table=15)

evaluate_hotpot(cot_compiled_with_assertions)

Average Metric: 18 / 21  (85.7): 100%|█████████████████████████████████████████████████| 21/21 [00:00<00:00, 26.32it/s]


Average Metric: 18 / 21  (85.7%)


Unnamed: 0,question,example_answer,rationale,pred_answer,answer_exact_match
0,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,"Crush Technology LLC (later renamed Mo-Kan Rock & Gravel Company LLC, and then TEAM Excavating LLC)","extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is ""Crush Technology LLC,"" which later changed its name to ""Mo-Kan Rock & Gravel...",TEAM Excavating LLC,False
1,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,King County,"extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is ""King County, a municipal corporation organized under the laws of the State of...",King County,✔️ [True]
2,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,Puget Sound Clean Air Agency,"extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is the ""Puget Sound Clean Air Agency."" We will discard any additional information related...",Puget Sound Clean Air Agency,✔️ [True]
3,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,DaimlerChrysler Manufacturing International LLC,"extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is ""DaimlerChrysler Manufacturing International LLC."" We will discard any additional information related to the...",DaimlerChrysler Manufacturing International LLC,✔️ [True]
4,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,Nimlux Inc.,"extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is ""Nimlux Inc."" We will discard any additional information related to the tenant's designation...",Nimlux Inc.,✔️ [True]
5,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,Causedunsure Corp,"extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is ""Causedunsure Corp."" We will discard any additional information related to the tenant's designation...",Causedunsure Corp,✔️ [True]
6,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,Federal Express Corporation,"extract the tenant/lessee name. The relevant section indicates that the lessee is ""Federal Express Corporation."" We will discard any additional information related to the tenant's...",Federal Express Corporation,✔️ [True]
7,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,United States Postal Service,"extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is the ""United States Postal Service."" We will discard any additional information related to...",United States Postal Service,✔️ [True]
8,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,Relishtreasure LLC.,"extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is ""Relishtreasure LLC."" We will discard any additional information related to the tenant's designation...",Relishtreasure LLC,✔️ [True]
9,Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and...,Pastpumpkin Corp,"extract the tenant/lessee name. The relevant section indicates that the tenant/lessee is ""Pastpumpkin Corp."" We will discard any additional information related to the tenant's designation...",Pastpumpkin Corp,✔️ [True]


85.71

: 

In [33]:
gpt4_mini.inspect_history(n=5)





Given the fields `question`, produce the fields `answer`.

---

Follow the following format.

Question: ${question}
Reasoning: Let's think step by step in order to ${produce the answer}. We ...
Answer: ${answer}

---

Question: Extract the data from the following lease abstract. The extracted data should contain only the Tenant/Lessee name. Extract the information from the relevant section and modify/change that information so that the output has only the tenant/lessee name and all other extra information (description about the tenant/lessee name or company, acronyms, etc.) related to the name is discarded. # Lease Abstract Summary ## Asset Class - **Office** ## Parties - **Landlord:** 900 Fourth Avenue Property LLC, a Delaware limited liability company - **Tenant:** City of Seattle, a municipal corporation of the State of Washington ## Property Address - **901 Fifth Avenue, Seattle, WA 98164** ## Occupied Areas | Area Description | Square Feet | |------------------|-------------| 

In [None]:
# NUM_THREADS = 32
# evaluate = Evaluate(devset=test, metric=metric_EM, num_threads=NUM_THREADS, display_progress=True, display_table=15)

# evaluate(cot_compiled_with_assertions)