## Installing Llama index

In [None]:
!pip uninstall llama-index  # run this if upgrading from v0.9.x or older
!pip install -U llama-index --upgrade --no-cache-dir --force-reinstall

## Using LlamaParser to parse the documents
Register here [LlamaParser](https://cloud.llamaindex.ai) to get access to their API

In [None]:
import nest_asyncio
nest_asyncio.apply()
from getpass import getpass

from llama_parse import LlamaParse

parser = LlamaParse(
    api_key=getpass(),  # can also be set in your env as LLAMA_CLOUD_API_KEY
    result_type="markdown",  # "markdown" and "text" are available
    num_workers=4, # if multiple files passed, split in `num_workers` API calls
    verbose=True,
    language="en" # Optionaly you can define a language, default=en
)

··········


In [None]:
individual_documents = parser.load_data(["./277713967-Health-Insurance-Policy.pdf"])
individual_documents[0].metadata.update({"policy_id":"P/700002/01/2015/007530"}) ##assuming we have policy_id for all the documents

Started parsing the file under job_id 29740ad3-038c-4008-b1f0-1f69256c0ca0


In [None]:
group_policy_documents = parser.load_data(["./564130930-Individual-Health-Insurance-Policy-Prospectus.pdf"])

Started parsing the file under job_id ba14aa97-ee43-4129-b41b-2c81b13b5a0a


## Setting up a vector DB

In [None]:
!pip install langchain
!pip install llama-index-embeddings-langchain
!pip install sentence_transformers

## Setting up `BAAI/bge-base-en` as our embedding model

In [None]:
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from llama_index.core import Settings

Settings.embed_model = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


## Setting up


*   Chunk size as `512` tokens and `128` as overlapping chunks to preserve the context
*   LLM model `gpt-3.5-turbo` with temprature as `0`



In [None]:
from llama_index.llms.openai import OpenAI
import os
os.environ['OPENAI_API_KEY'] = getpass()
api_key = os.getenv('OPENAI_API_KEY')
Settings.llm = OpenAI(model='gpt-3.5-turbo',temperature=0,api_key=api_key)

# build index
Settings.chunk_size = 512
Settings.chunk_overlapchunk=128

In [None]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage,
)

## Setting up individual policy vector database

In [None]:
individual_nodes = Settings.node_parser.get_nodes_from_documents(individual_documents)
# initialize storage context (by default it's in-memory)
indi_storage_context = StorageContext.from_defaults()
indi_storage_context.docstore.add_documents(individual_nodes)
ind_vector_index = VectorStoreIndex(individual_nodes, storage_context=indi_storage_context)

In [None]:
%pip install llama-index-postprocessor-rankgpt-rerank

## Setting up Individual policy retrival + ranking with meta-data filtering

In [None]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
from llama_index.postprocessor.rankgpt_rerank import RankGPTRerank

def individual_retrieved_nodes(
    query_str,filter,vector_index=ind_vector_index, vector_top_k=10, reranker_top_n=3, with_reranker=False
):
    query_bundle = QueryBundle(query_str)
    # configure retriever
    retriever = VectorIndexRetriever(
        index=vector_index,
        similarity_top_k=vector_top_k,
        filters=filter
    )
    retrieved_nodes = retriever.retrieve(query_bundle)

    if with_reranker:
        # configure reranker
        reranker = RankGPTRerank(
            llm=OpenAI(
                model="gpt-3.5-turbo-16k",
                temperature=0.0,
                api_key=api_key,
            ),
            top_n=reranker_top_n,
            verbose=True,
        )
        retrieved_nodes = reranker.postprocess_nodes(
            retrieved_nodes, query_bundle
        )

    return retrieved_nodes

## Sample function call with meta-data filtering

In [None]:
from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters
filters = MetadataFilters(filters=[ExactMatchFilter(key="policy_id", value="P/700002/01/2015/007530")])
answer = individual_retrieved_nodes("Sum Insured or Base Cover or LIMIT OF COVERAGE",vector_index=ind_vector_index,filter = filters,with_reranker=True)

After Reranking, new rank list for nodes: [3, 0, 1, 2, 4, 5, 6, 7]

## Group Policy Vector database

In [None]:
group_policy_nodes = Settings.node_parser.get_nodes_from_documents(group_policy_documents)
# initialize storage context (by default it's in-memory)
group_storage_context = StorageContext.from_defaults()
group_storage_context.docstore.add_documents(group_policy_nodes)
group_vector_index = VectorStoreIndex(group_policy_nodes, storage_context=group_storage_context)

## Group Policy retrival + Ranking function

In [None]:
def group_retrieved_nodes(
    query_str,vector_index=group_vector_index, vector_top_k=10, reranker_top_n=3, with_reranker=True
):
    query_bundle = QueryBundle(query_str)
    # configure retriever
    retriever = VectorIndexRetriever(
        index=vector_index,
        similarity_top_k=vector_top_k
    )
    retrieved_nodes = retriever.retrieve(query_bundle)

    if with_reranker:
        # configure reranker
        reranker = RankGPTRerank(
            llm=OpenAI(
                model="gpt-3.5-turbo-16k",
                temperature=0.0,
                api_key=api_key,
            ),
            top_n=reranker_top_n,
            verbose=True,
        )
        retrieved_nodes = reranker.postprocess_nodes(
            retrieved_nodes, query_bundle
        )

    return retrieved_nodes

## Sample function call

In [None]:
answer = group_retrieved_nodes("claim document requirements",with_reranker=True)

After Reranking, new rank list for nodes: [0, 6, 3, 4, 1, 2, 5, 7, 8, 9]

## Setting up LLM tools

In [None]:
from llama_index.core.tools import QueryEngineTool
from llama_index.core.tools import BaseTool, FunctionTool

def group_document_search(query: str)-> str:
  """
  This function takes the user query as an input
        very very important, do not ignore - Use this function to find any general detailed of group policy
        where you can find answers like, below are some few examples
  1. what are common terms for policy?
  2. What is covered and not covered in this policy?
  3. what is the claim process?
  4. what is supporting documents required for the claim?

  Use nicely formatted answers for the user (Mostly in markdown text)
  """
  top_retrival = group_retrieved_nodes(query,with_reranker=True)
  result_dicts = []
  for node in top_retrival:
      result_dict = {"Score": node.score, "Text": node.node.get_text()}
      result_dicts.append(result_dict)
  return result_dicts

group_document_search_tool = FunctionTool.from_defaults(fn=group_document_search)

def individual_document_search(query:str,policy_id:str)->str:
  """
  This function takes the user query and policy id as inputs, if the policy id is not present then use policy_api tool to find the policy id.

very very important only use this function to find policy details of individual owners for a policy, Below are some examples
1. policy individual names
2. policy sum insured
3. Details of Insured Persons
very very important -- do not call this function unless you require very specific details above personal details given in point 1 to 3
  """
  filters = MetadataFilters(filters=[ExactMatchFilter(key="policy_id", value=policy_id)]) ## always use policy id as filter
  top_retrival = individual_retrieved_nodes(query,filter = filters,with_reranker=True)
  result_dicts = []
  for node in top_retrival:
      result_dict = {"Score": node.score, "Text": node.node.get_text()}
      result_dicts.append(result_dict)
  return result_dicts

individual_document_search_tool = FunctionTool.from_defaults(fn=individual_document_search)

def coverage_left(total_claimed: int, total_covered: int) -> int:
    """This function provides how much coverage is left on a policy
    total_claimed - can be found from tool policy_api
    total_covered - can be found out from individual_document_search tool
    """
    return total_covered - total_claimed

coverage_left_tool = FunctionTool.from_defaults(fn=coverage_left)

def policy_api(name) -> str:
    """Use this function return policy number for a given user name, if there is
    no policy number given in the document always use this function to find policy number
    very very important, if no name is provided then ask for the name from user
    very important to call individual_document_search tool to find total sum insured of a policy
    """
    return {"Policy_id":"P/700002/01/2015/007530","claimed_amount":"10000"}


policy_api_tool = FunctionTool.from_defaults(fn=policy_api)

## Setting up LLM Agents

In [None]:
from llama_index.agent.openai import OpenAIAgent

agent = OpenAIAgent.from_tools(
    name="Policy Bot",
    instructions="""You are a bot designed to answer questions about the given policy.
     Always look for user name or policy number in the query before using any tool
     If user name is provided then use policy_api tool to find correct policy number
     very very important not to call any tool before getting user's name or policy number
     """,
    tools=[group_document_search_tool,individual_document_search_tool,coverage_left_tool,policy_api_tool],
    verbose=True,
    run_retrieve_sleep_time=1.0,
)

## Chatting with policy Bot

## Question 1

It does the below things to get the answer


* LLM agent understands the query and calls the `group_document_search` tool
* The tool internally queries group policy vector DB & ranks the results and then passes the top N  (3 in this case) context
* The LLM then writes an answer based on the provided context.



In [None]:
response = agent.chat("what are the supporting documents required for the claim?")
print(str(response))

Added user message to memory: what are the supporting documents required for the claim?
=== Calling Function ===
Calling function: group_document_search with args: {"query":"supporting documents required for the claim"}
After Reranking, new rank list for nodes: [0, 1, 2, 5, 6, 7, 8, 9, 3, 4]Got output: [{'Score': 0.8412200689579963, 'Text': 'ii. The Company shall only accept bills/invoices/medical treatment related documents only in the Insured Person’s name for whom the claim is submitted.\n\niii. The Insured Person shall also give the TPA / Company such additional information and assistance as the TPA / Company may require in dealing with the claim including an authorization to obtain Medical and other records from the hospital, lab, etc.\n\niv. All the documents submitted to TPA shall be electronically collected by us for settlement/denial of the claims by the appropriate authority.\n\nv. Any medical practitioner or Authorized Person authorized by the TPA / Company shall be allowed 

## Question 2

In [None]:
response = agent.chat("Does plastic surgery covered in my insurance?")
print(str(response))

Added user message to memory: Does plastic surgery covered in my insurance?
=== Calling Function ===
Calling function: group_document_search with args: {"query":"coverage for plastic surgery"}
After Reranking, new rank list for nodes: [4, 7, 0, 1, 2, 3, 5, 6, 8, 9]Got output: [{'Score': 0.8295149659660707, 'Text': '1,00,000, whichever is less|Up to 25% of SI or Rs. 1,00,000, whichever is less|\n|Major Surgeries#|Actuals|Up to 70% of SI|Up to 70% of SI|\n|Pre-Hospitalisation|30 Days|30 Days subject to max of 10% of SI|30 Days subject to max of 10% of SI|\n|Post-Hospitalisation|60 Days|60 Days subject to max of 10% of SI|60 Days subject to max of 10% of SI|\n|Domiciliary Hospitalisation|Covered|Covered|Covered|\n|Ayurvedic Treatment|Covered|Covered|Covered|\n|Modern Treatment Methods#|Covered|Covered|Covered|\n|Cost of Health Check Up|Insured Person subject to a maximum of Rs. 5000.|Insured Person subject to a maximum of Rs. 5000.|Insured Person subject to a maximum of Rs. 5000.|\n\n|Opt

## Question 3
* LLM agent understands the query and calls `policy_api` tool
* Since policy API needs `name` as mandatory input, LLM responds by asking the name of the user.

In [None]:
response = agent.chat("what is my policy number?")
print(str(response))

Added user message to memory: what is my policy number?
=== Calling Function ===
Calling function: policy_api with args: {}
Got output: Error: policy_api() missing 1 required positional argument: 'name'

I need your name to find your policy number. Can you please provide me with your name?


## Follow up question

In [None]:
response = agent.chat("My name is ASHWANI KUMAR RAI?")
print(str(response))

Added user message to memory: My name is ASHWANI KUMAR RAI?
=== Calling Function ===
Calling function: policy_api with args: {"name":"ASHWANI KUMAR RAI"}
Got output: {'Policy_id': 'P/700002/01/2015/007530', 'claimed_amount': '10000'}

Your policy number is P/700002/01/2015/007530. If you have any specific queries regarding this policy, feel free to ask.


## Question 4
* The agent correctly identifies `individual_document_search` tools and finds context matching the total sum insured.
* It correctly identifies that the total covered is 300000
* Since in an earlier conversation it already has `claimed_amount` it directly calls `coverage_left` tool to calculate remaining coverage.

In [None]:
response = agent.chat("how much total coverage do I have left?")
print(str(response))

Added user message to memory: how much total coverage do I have left?
=== Calling Function ===
Calling function: individual_document_search with args: {"query":"total covered","policy_id":"P/700002/01/2015/007530"}
After Reranking, new rank list for nodes: [0, 1, 2, 3, 4, 5, 6, 7]Got output: [{'Score': 0.8065974808576539, 'Text': "Cancer|No|\n|---|---|\n|b. Chronic Kidney Disease|No|\n|c. Brain Stroke/CVA|No|\n|d. Parkinson's Disease|No|\n|e. Alzheimer's Disease|No|\n|f. Renal Complications|No|\n|g. Heart Diseases|No|\n\nSocial Status: No\n\n## Premium Calculation\n\n|Cover Description|Sum Insured|Premium|\n|---|---|---|\n|Base Cover|300000|5400|\n|TOTAL PREMIUM| |5400|\n|STAMP DUTY| |1|\n|ADD: SERVICE TAX| |667|\n|TOTAL AMOUNT| |6067|\n\nDeclaration\n\nI hereby confirm that all the above information is true and correct according to my belief. I also agree that my policy is for cancellation in case any of the above entered information is found to be false/intentionally misrepresented.\

## Question 5

In [None]:
response = agent.chat("Give me details of details of insured persons and their id card number as a table")
print(str(response))

Added user message to memory: Give me details of details of insured persons and their id card number as a table
=== Calling Function ===
Calling function: individual_document_search with args: {"query":"insured persons","policy_id":"P/700002/01/2015/007530"}
After Reranking, new rank list for nodes: [0, 1, 2, 3, 4, 5, 6, 7]Got output: [{'Score': 0.8692126776714286, 'Text': 'No.|Name of the Insured|Sex|Date of Birth|Age-Yrs/Mths|Relationship with Proposer|Pre Existing Disease/s|ID Card No|\n|---|---|---|---|---|---|---|---|\n|1|ASHWANI KUMAR RAI|MALE|10-07-1985|28 Yrs 11 Mths|SELF|NONE|3629867-1|\n|2|SUSHAMA RAI|FEMALE|01-07-1987|27 Yrs 0 Mths|SPOUSE|NONE|3629867-2|\n|3|ASHUTOSH RAI|MALE|08-11-2012|1 Yrs 7 Mths|DEPENDANT|NONE|3629867-3|\n\nWarranted that in case of dishonor of premium cheque(s), the Company shall not be liable under the policy and the policy shall be void abinitio (from inception).\n\nExpenses relating to the hospitalisation will be in proportion to the room rent stated

## Question 7

In [None]:
## Implement “appointments tool” and try to ask the question “Please book an appointment with a Dermatologist”