In [1]:
import sys
from IPython.display import Markdown
from src_index.token_catcher import Usage
from src_index.ho3_sample_policy_index import build_ho3_sample_policy_index
from src_index.ho3_sub_query_engine import build_ho3_sub_query_engine

In [2]:
from llama_index import SimpleDirectoryReader

In [3]:
sample_ho3_policy_docs = SimpleDirectoryReader(input_files=['./data//HO3_sample.pdf']).load_data()
tx_doi_docs = SimpleDirectoryReader(input_files=['./data/tx_doi_code/TX_INSURANCE_CODE_All_3872.pdf']).load_data()
bldg_codes_docs = SimpleDirectoryReader(input_files=['./data/building_codes/building_codes.pdf']).load_data()

In [4]:
from llama_index import ServiceContext
from llama_index.llms import OpenAI

# add model specs to defaults
service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo", temperature=0))

In [5]:
usage = Usage()

In [6]:
from llama_index import StorageContext, VectorStoreIndex, load_index_from_storage

try:
    ho3_index = load_index_from_storage(StorageContext.from_defaults(persist_dir="./ho3_sample_policy_meta_index"))
    tx_doi_index = load_index_from_storage(StorageContext.from_defaults(persist_dir="./tx_doi_index"))
    bldg_codes_index = load_index_from_storage(StorageContext.from_defaults(persist_dir="./_property_index_storage"))
    
except:
    ho3_index = build_ho3_sample_policy_index(sample_ho3_policy_docs, llm_metadata=True)
    ho3_index.storage_context.persist(persist_dir="./ho3_sample_policy_meta_index")
    
    tx_doi_index = VectorStoreIndex.from_documents(tx_doi_docs)
    tx_doi_index.storage_context.persist(persist_dir="./tx_doi_index")
    
    bldg_codes_index = VectorStoreIndex.from_documents(bldg_codes_docs)
    bldg_codes_index.storage_context.persist(persist_dir="./_property_index_storage")

In [7]:
usage.total_tokens()

2163272

In [8]:
from llama_index.tools import QueryEngineTool

# create a query engine tool for each folder
ho3_tool = QueryEngineTool.from_defaults(
    query_engine=ho3_index.as_query_engine(), 
    name="Insurance Policy", 
    description="Useful for answering questions about insurance coverage or general policy wording. "
)

doi_tool = QueryEngineTool.from_defaults(
    query_engine=tx_doi_index.as_query_engine(),
    name="TX DOI",
    description="Useful for answering questions about insurance related rules or regulations. "
)

bldg_tool = QueryEngineTool.from_defaults(
    query_engine=bldg_codes_index.as_query_engine(),
    name="Building Codes",
    description="Useful for answering questions about repairing buildings or other structures. "
)

In [9]:
# needed for notebooks
import nest_asyncio
nest_asyncio.apply()

from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.response_synthesizers import get_response_synthesizer

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=[
        ho3_tool,
        doi_tool,
        bldg_tool,
    ],
    verbose=True
)

In [10]:
response = query_engine.query("Does the policy cover damage from a plumbing leak? If so are there any limitations?")
Markdown(f"{response}")

Generated 2 sub questions.
[36;1m[1;3m[Insurance Policy] Q: Does the policy cover damage from a plumbing leak?
[0m[33;1m[1;3m[Insurance Policy] Q: Are there any limitations on coverage for damage from a plumbing leak?
[0m[36;1m[1;3m[Insurance Policy] A: Yes, the policy does cover damage from a plumbing leak.
[0m[33;1m[1;3m[Insurance Policy] A: Yes, there are limitations on coverage for damage from a plumbing leak. The policy does not cover loss to the system or appliance from which the water or steam escaped. Additionally, loss caused by mold, fungus, or wet rot is not covered unless it is hidden within the walls, ceilings, floors, or above the ceilings of a structure.
[0m


Yes, the policy does cover damage from a plumbing leak, but there are limitations. The policy does not cover loss to the system or appliance from which the water or steam escaped, and loss caused by mold, fungus, or wet rot is not covered unless it is hidden within the walls, ceilings, floors, or above the ceilings of a structure.

In [12]:
Markdown(f"{response.source_nodes[1].node.text}")

Sub question: Are there any limitations on coverage for damage from a plumbing leak?
Response: Yes, there are limitations on coverage for damage from a plumbing leak. The policy does not cover loss to the system or appliance from which the water or steam escaped. Additionally, loss caused by mold, fungus, or wet rot is not covered unless it is hidden within the walls, ceilings, floors, or above the ceilings of a structure.

### Generate Evaluation Dataset

In [46]:
from llama_index import Document

all_text = ""

for doc in sample_ho3_policy_docs:
    all_text += doc.text

text_document = Document(text=all_text)

In [47]:
import os
import random
random.seed(42)

from llama_index import ServiceContext
from llama_index.prompts import Prompt
from llama_index.llms import OpenAI
from llama_index.evaluation import DatasetGenerator

gpt4_service_context = ServiceContext.from_defaults(llm=OpenAI(llm="gpt-4", temperature=0))

question_dataset = []
if os.path.exists("question_dataset.txt"):
    with open("question_dataset.txt", "r") as f:
        for line in f:
            question_dataset.append(line.strip())
else:
    # generate questions
    data_generator = DatasetGenerator.from_documents(
        [text_document],
        text_question_template=Prompt(
            "A sample from the Insurance Policy documentation is below.\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "Using the documentation sample, carefully follow the instructions below:\n"
            "{query_str}"
        ),
        question_gen_query=(
            "You are an evaluator for a search pipeline. Your task is to write a single question "
            "using the provided documentation sample above to test the search pipeline. The question should "
            "reference specific coverage terms or provisions. Restrict the question to the "
            "context information provided.\n"
            "Question: "
        ),
        # set this to be low, so we can generate more questions
        service_context=gpt4_service_context
    )
    generated_questions = data_generator.generate_questions_from_nodes()

    # randomly pick 10 questions from each dataset
    generated_questions = random.sample(generated_questions, 10)
    question_dataset.extend(generated_questions)

    print(f"Generated {len(question_dataset)} questions.")

    # save the questions!
    with open("question_dataset.txt", "w") as f:
        for question in question_dataset:
            f.write(f"{question.strip()}\n")

Generated 10 questions.


In [48]:
print(random.sample(question_dataset, 5))

['Does the insurance policy cover damage caused by freezing to a plumbing system or household appliance?', 'What is the coverage limit for loss by theft of silverware, silver-plated ware, goldware, gold-plated ware, platinumware, platinum-plated ware and pewterware?', 'What is the limit of liability for debris removal under Coverage D?', 'What is the maximum amount of coverage for loss assessment under Coverage E?', 'What is the time frame for submitting a sworn statement of loss for C. Damage To Property Of Others under Section II – Additional Coverages?']


### Evaluate Responses for Hallucination

In [54]:
import time
import asyncio
import nest_asyncio
nest_asyncio.apply()

from llama_index import Response

def evaluate_query_engine(evaluator, query_engine, questions):
    async def run_query(query_engine, q):
        try:
            return await query_engine.aquery(q)
        except:
            return Response(response="Error, query failed.")

    total_correct = 0
    all_results = []
    for batch_size in range(0, len(questions), 5):
        batch_qs = questions[batch_size:batch_size+5]

        tasks = [run_query(query_engine, q) for q in batch_qs]
        responses = asyncio.run(asyncio.gather(*tasks))
        print(f"finished batch {(batch_size // 5) + 1} out of {len(questions) // 5}")

        for response in responses:
            eval_result = 1 if "YES" in evaluator.evaluate(response) else 0
            total_correct += eval_result
            all_results.append(eval_result)
        
        # helps avoid rate limits
        time.sleep(1)

    return total_correct, all_results

In [50]:
from llama_index.evaluation import ResponseEvaluator

# gpt-4 evaluator!
evaluator = ResponseEvaluator(service_context=gpt4_service_context)

total_correct, all_results = evaluate_query_engine(evaluator, query_engine, question_dataset)

print(f"Hallucination? Scored {total_correct} out of {len(question_dataset)} questions correctly.")

Generated 1 sub questions.
[36;1m[1;3m[Insurance Policy] Q: What is the definition of 'Motor Vehicle Liability' in the Insurance Policy?
[0mGenerated 2 sub questions.
[36;1m[1;3m[Insurance Policy] Q: What is the maximum amount of coverage for loss assessment under Coverage E according to the Insurance Policy tool?
[0m[33;1m[1;3m[TX DOI] Q: What is the maximum amount of coverage for loss assessment under Coverage E according to the TX DOI tool?
[0m[36;1m[1;3m[Insurance Policy] A: 
Motor Vehicle Liability is defined as any liability arising from the use of a motor vehicle that is registered for use on public roads or property, or is required to be registered for use on public roads or property, or is being operated in, or practicing for, any prearranged or organized race, speed contest or other competition; rented to others; used to carry persons or cargo for a charge; or used for any business purpose except for a motorized golf cart while on a golfing facility.
[0m[36;1m[1

In [51]:
import numpy as np

hallucinated_questions = np.array(question_dataset)[np.array(all_results) == 0]
print(hallucinated_questions)

['What is the maximum amount of coverage for loss assessment under Coverage E?'
 'Does the insurance policy cover loss caused by the discharge, leakage, or overflow from within a plumbing, heating, air conditioning, or automatic fire protective sprinkler system or of a household appliance due to freezing?'
 'What is the coverage for "bodily injury" or "property damage" caused by a nuclear weapon?'
 'What is the limit of liability for debris removal under Coverage D?'
 'What is the time frame for submitting a sworn statement of loss for C. Damage To Property Of Others under Section II – Additional Coverages?'
 'Does the insurance policy cover damage caused by freezing to a plumbing system or household appliance?']


In [52]:
response = query_engine.query('Does the insurance policy cover damage caused by freezing to a plumbing system or household appliance?')
print(str(response))
print("-----------------")
print(response.get_formatted_sources(length=1000))

Generated 7 sub questions.
[36;1m[1;3m[Insurance Policy] Q: What is the coverage for damage caused by freezing to a plumbing system?
[0m[33;1m[1;3m[Insurance Policy] Q: What is the coverage for damage caused by freezing to a household appliance?
[0m[38;5;200m[1;3m[Insurance Policy] Q: Are there any specific exclusions or limitations for damage caused by freezing in the insurance policy?
[0m[32;1m[1;3m[TX DOI] Q: What are the rules and regulations regarding coverage for damage caused by freezing according to TX DOI?
[0m[31;1m[1;3m[TX DOI] Q: Are there any specific requirements or conditions for coverage of damage caused by freezing according to TX DOI?
[0m[36;1m[1;3m[Building Codes] Q: What are the building codes related to preventing damage caused by freezing?
[0m[33;1m[1;3m[Building Codes] Q: What are the building codes related to repairing damage caused by freezing?
[0m[32;1m[1;3m[TX DOI] A: 
There are no rules and regulations regarding coverage for damage caus

In [53]:
response = query_engine.query('Does the insurance policy cover loss caused by the discharge, leakage, or overflow from within a plumbing, heating, or air conditioning system?')
print(str(response))
print("-----------------")
print(response.get_formatted_sources(length=1000))

Generated 3 sub questions.
[36;1m[1;3m[Insurance Policy] Q: Does the insurance policy cover loss caused by the discharge, leakage, or overflow from within a plumbing system?
[0m[33;1m[1;3m[Insurance Policy] Q: Does the insurance policy cover loss caused by the discharge, leakage, or overflow from within a heating system?
[0m[38;5;200m[1;3m[Insurance Policy] Q: Does the insurance policy cover loss caused by the discharge, leakage, or overflow from within an air conditioning system?
[0m[36;1m[1;3m[Insurance Policy] A: 
Yes, the insurance policy covers loss caused by the discharge, leakage, or overflow from within a plumbing system. This is stated in Section I – Exclusion A. 3. Water Damage, c. (5) and (6).
[0m[33;1m[1;3m[Insurance Policy] A: 
Yes, the insurance policy covers loss caused by the discharge, leakage, or overflow from within a heating system. This is covered under Peril 13. Sudden And Accidental Tearing Apart, Cracking, Burning Or Bulging and Peril 14. Freezing.

### Answer Quality

In [57]:
import time
import asyncio
import nest_asyncio
nest_asyncio.apply()
from llama_index import Response

def evaluate_query_engine(evaluator, query_engine, questions):
    async def run_query(query_engine, q):
        try:
            return await query_engine.aquery(q)
        except:
            return Response(response="Error, query failed.")

    total_correct = 0
    all_results = []
    for batch_size in range(0, len(questions), 5):
        batch_qs = questions[batch_size:batch_size+5]

        tasks = [run_query(query_engine, q) for q in batch_qs]
        responses = asyncio.run(asyncio.gather(*tasks))
        print(f"finished batch {(batch_size // 5) + 1} out of {len(questions) // 5}")

        for question, response in zip(batch_qs, responses):
            eval_result = 1 if "YES" in evaluator.evaluate(question, response) else 0
            total_correct += eval_result
            all_results.append(eval_result)
        
        # helps avoid rate limits
        time.sleep(1)

    return total_correct, all_results

In [56]:
from llama_index.evaluation import QueryResponseEvaluator

evaluator = QueryResponseEvaluator(service_context=gpt4_service_context)

total_correct, all_results = evaluate_query_engine(evaluator, query_engine, question_dataset)

print(f"Response satisfies the query? Scored {total_correct} out of {len(question_dataset)} questions correctly.")

Generated 1 sub questions.
[36;1m[1;3m[Insurance Policy] Q: What is the definition of 'Motor Vehicle Liability' in the Insurance Policy?
[0mGenerated 2 sub questions.
[36;1m[1;3m[Insurance Policy] Q: What is the maximum amount of coverage for loss assessment under Coverage E according to the Insurance Policy tool?
[0m[33;1m[1;3m[TX DOI] Q: What is the maximum amount of coverage for loss assessment under Coverage E according to the TX DOI tool?
[0m[36;1m[1;3m[Insurance Policy] A: 
Motor Vehicle Liability is defined as any liability arising from the use of a motor vehicle that is registered for use on public roads or property, or is required to be registered for use on public roads or property, or is being operated in, or practicing for, any prearranged or organized race, speed contest or other competition; rented to others; used to carry persons or cargo for a charge; or used for any business purpose except for a motorized golf cart while on a golfing facility.
[0m[36;1m[1

In [58]:
import numpy as np

unanswered_queries = np.array(question_dataset)[np.array(all_results) == 0]
print(unanswered_queries)

['What is the maximum amount of coverage for loss assessment under Coverage E?'
 'What is the coverage for "bodily injury" or "property damage" caused by a nuclear weapon?'
 'What is the limit of liability for debris removal under Coverage D?'
 'What is the time frame for submitting a sworn statement of loss for C. Damage To Property Of Others under Section II – Additional Coverages?'
 'Does the insurance policy cover damage caused by freezing to a plumbing system or household appliance?'
 'What is the coverage provided for losses resulting from a nuclear hazard?']


In [59]:
response = query_engine.query('What is the limit of liability for debris removal under Coverage D?')
print(str(response))
print("-----------------")
print(response.get_formatted_sources(length=256))

Generated 7 sub questions.
[36;1m[1;3m[Insurance Policy] Q: What is the limit of liability for debris removal under Coverage D?
[0m[33;1m[1;3m[Insurance Policy] Q: What is the coverage for debris removal under Coverage D?
[0m[38;5;200m[1;3m[Insurance Policy] Q: What are the policy wording for debris removal under Coverage D?
[0m[32;1m[1;3m[TX DOI] Q: What are the rules and regulations for debris removal under Coverage D?
[0m[31;1m[1;3m[TX DOI] Q: What are the insurance related rules and regulations for debris removal under Coverage D?
[0m[36;1m[1;3m[Building Codes] Q: What are the building codes for debris removal under Coverage D?
[0m[33;1m[1;3m[Building Codes] Q: What are the regulations for repairing buildings or other structures under Coverage D?
[0m[32;1m[1;3m[TX DOI] A: 
There are no rules and regulations for debris removal under Coverage D of the Texas Insurance Code.
[0m[36;1m[1;3m[Insurance Policy] A: 
The limit of liability for debris removal under C