## Step 1 : Import the necessary libraries

In [1]:
# Installing OpenAI, LlamaIndex
!pip install -U -qq llama-index openai

In [2]:
## Installing additional supporting libraries as required

In [3]:
# Importinging the LLAMA libraries
from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage


In [4]:
# Importing common libraries
import os, openai
import pandas as pd, numpy as np

## Step 2:  Set the API key

### Key Toggle

In [5]:
api_key = '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'

In [6]:
%%writefile "config.py"
api_key = api_key
api_key

Writing config.py


In [7]:
#from config import api_key
openai.api_key = api_key
openai.api_key

'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'

### Step 3 - Data Loading

In [8]:
# Importing the necessary loader
from llama_index.core import SimpleDirectoryReader


In [9]:
# Initiating the reader with the input directory to the files

reader = SimpleDirectoryReader(
    input_files=[r"C:\Users\ribhu\Downloads\Policy\Policy.pdf"]
)
documents = reader.load_data()

print(f"Loaded {len(documents)} docs")



Loaded 44 docs


In [10]:
# Creating document objects and loading the data and perviewing the first doc
type(documents)

documents[0]

Document(id_='4365b32d-8561-44c1-80c8-2c815dcc86d5', embedding=None, metadata={'page_label': '1', 'file_name': 'Policy.pdf', 'file_path': 'C:\\Users\\ribhu\\Downloads\\Policy\\Policy.pdf', 'file_type': 'application/pdf', 'file_size': 1990500, 'creation_date': '2023-09-29', 'last_modified_date': '2025-01-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text=' \n \nPART A: Covering Letter with Policy Schedule \n__________________                                                                                                                                <dd-mm-yyyy> \n__________________ \n__________________ \n__________________ \n___________

In [11]:
!pip install PyMuPDF




[notice] A new release of pip is available: 24.1.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Step 4 - Building the query engine

The general process for creating the query_engine is:
- Load the documents
- Create nodes from the documents
- Create index from documents
- Initialise the Query Engine
- Query the index with the prompt
- Generate the response using the retrieved nodes

In [12]:
# Importing the necessary libraries
from IPython.display import display, HTML
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SimpleNodeParser


# Creating parser / parse document to nodes
parser = SimpleNodeParser.from_defaults()
nodes = parser.get_nodes_from_documents(documents)

# Index building
index = VectorStoreIndex(nodes)

# Constructing the Query Engine
query_engine = index.as_query_engine()


In [13]:
type(index)

llama_index.core.indices.vector_store.base.VectorStoreIndex

In [14]:
response = query_engine.query("What is the minimum age for the policy?")

In [15]:
type(response)

llama_index.core.base.response.schema.Response

In [16]:
response.response

'The minimum age for the policy is 12 years.'

In [17]:
response = query_engine.query("What is the maximum age for the policy?")
type(response)
response.response

'The maximum age for the policy is between 37 to 54 years.'

### Step 5 - Creating a Response Pipeline



A Query Response pipeline encapsulates all the necssary steps to build a RAG pipeline. Modify the functions `query_response` and `initialize_conv()`  below. The `query_response` functions return the query response from the query engine along with the supporting documents and the `initialize_conv()` function creates an interactive chatbot.

In [18]:
## Query response function
def query_response(user_input):
    
    #Generate a response based on user input by querying the query 
    # engine and retrieving metadata from the source nodes.
    response = query_engine.query(user_input)
    file_name = response.source_nodes[0].node.metadata['file_name'] + '\'' + " on page no. " + response.source_nodes[0].node.metadata['page_label'] + "," + response.source_nodes[1].node.metadata['page_label']

    # Args:    user_input (str): The input query provided by the user.
   
    # final_response (str): The final response generated by the query engine, including a
    # reference to the source file names and page numbers.
    final_response = response.response + ' You can check the file \'' + file_name 
    return final_response

### Step 6 Initializing a conversation

In [19]:
def initialize_conv():
    """
    Initializing a conversation with the user, allowing them to ask questions
    about the policy documents. The user can type 'exit' to end the
    conversation.
    """
    print('Please ask queries about the policy, and exit when you are satisfied')
    while True:
        user_input = input()
        if user_input.lower() == 'exit':
          print('Hope your queries are resolved... Thank you.. bYe...')
          break
        else:
          response = query_response(user_input)
    return response

In [20]:
initialize_conv()

Please ask queries about the policy, and exit when you are satisfied


 How much will the policy cost?
 exit


Hope your queries are resolved... Thank you.. bYe...


"The policy cost will depend on the specific terms and conditions outlined in the policy document. The Paid-up Addition Factor per Re. 1 Cash Bonus utilized for outstanding Policy Term of 19 to 36 years and 37 to 54 years will also play a role in determining the overall cost of the policy. You can check the file 'Policy.pdf' on page no. 33,32"

## Step 7  - Build a Testing Pipeline

Here we feed a series of questions to the Q/A bot and store the responses along with the feedback on whether it's accurate or not from the user. Creating questions and store them in the `questions` list to be queried by the RAG system using the `testing_pipeline` function.

In [21]:
# Args:     questions (list): A list of questions to be tested.
questions = ['How much will the policy cost?',  'What\'s covered if I become disabled?',
             'Will my premiums change over time?',  'Will the policy cover hospitalize bills?',
             'Will I get the reimbursement if i cancel the policy?'
            ]

In [22]:
def testing_pipeline(questions):
    # Conduct a testing pipeline for a series of questions, collecting user feedback on the responses provided by the bot.
    
    test_feedback  = []
    for i in questions:
        print(i)
        print(query_response(i))
        print('\n Your feedback for the bot is appriciated')
        user_input = input()
        page = query_response(i).split()[-1]
        test_feedback.append((i,query_response(i),page,user_input))
    #   Returns:  pd.DataFrame: A DataFrame containing the questions, their corresponding responses, the page number from the response, and the user feedback
    #   indicating whether the response was good or bad.
    feedback = pd.DataFrame(test_feedback, columns =['Question', 'Response', 'Page','Good or Bad'])
    return feedback
    


In [23]:
testing_pipeline(questions)

How much will the policy cost?
The policy cost will depend on various factors such as the policy term, the amount of cash bonus utilized, and the specific paid-up addition factor associated with the outstanding policy term. You can check the file 'Policy.pdf' on page no. 33,32

 Your feedback for the bot is appriciated


 Good


What's covered if I become disabled?
If you become disabled, the documents required for a claim would typically include the Disability Certificate issued by the competent authority, the original Policy Document, identification proof of the person receiving the Benefit and the Life Assured, medical records related to the disability, and bank account details of the claimant for payment processing. Additionally, depending on the cause or nature of the disability claim, the insurance company may request any other necessary documents or information to process the claim satisfactorily. You can check the file 'Policy.pdf' on page no. 19,7

 Your feedback for the bot is appriciated


 Good


Will my premiums change over time?
Premiums will not change over time if the Policyholder chooses the monthly premium payment mode. Three months' premiums are collected in advance at the start of the Policy and adjusted towards the Policy only on the due dates. These advance premiums are non-refundable, except in the case of Free Look Cancellation of the Policy. You can check the file 'Policy.pdf' on page no. 10,7

 Your feedback for the bot is appriciated


 Good


Will the policy cover hospitalize bills?
The policy does not mention coverage for hospital bills specifically. It outlines the requirements for death claims, issuance of duplicate policy, force majeure events, payment of premiums, and payment of benefits. You can check the file 'Policy.pdf' on page no. 19,10

 Your feedback for the bot is appriciated


 Good


Will I get the reimbursement if i cancel the policy?
If you cancel the policy within the specified free look period mentioned in the policy document, you will receive a reimbursement of the premium paid, subject to deductions for certain expenses like proportionate risk premium, medical examination fees, and stamp duty charges. You can check the file 'Policy.pdf' on page no. 11,10

 Your feedback for the bot is appriciated


 Good


Unnamed: 0,Question,Response,Page,Good or Bad
0,How much will the policy cost?,The policy cost will depend on various factors...,3332,Good
1,What's covered if I become disabled?,"If you become disabled, the documents required...",197,Good
2,Will my premiums change over time?,Premiums will not change over time if the Poli...,107,Good
3,Will the policy cover hospitalize bills?,The policy does not mention coverage for hospi...,1910,Good
4,Will I get the reimbursement if i cancel the p...,If you cancel the policy within the specified ...,1110,Good


### Part 3 - Additional Steps


3.1 - Building a custom prompt template

You can try building a custom prompt template for improving the generation capabilities of the RAG system.

In [24]:
response = query_engine.query("Which is the most cost efficient offer?")
response.response

'The most cost-efficient offer is Option B: Income Option.'

In [25]:
retrieved = response.source_nodes[0].node.text + response.source_nodes[1].node.text

In [26]:
messages = [
    {"role":"system", "content":"You are an AI assistant to user."},
    {"role":"user", "content":f"""How the nominee is benefitted? Check in '{retrieved}' """},
          ]

In [27]:
response2 = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        seed=1,
        temperature= 0)


response2.choices[0].message.content

'Based on the information provided, the nominee of the policyholder can benefit in the following ways:\n\n1. **Loan Facility**: The nominee can avail a loan against the policy, where the maximum loan amount will not exceed 80% of the available Surrender Value at that point of time. The interest rate on the loan is currently at 9.50% p.a., calculated based on the Average Annualised 10-year benchmark G-Sec Yield plus 2%. The interest rate is reviewed half-yearly, and any changes will be effective from specific dates each year.\n\n2. **Survival Benefits**: The nominee may receive Survival Benefits based on the Guaranteed Benefit Option chosen by the policyholder at the inception of the policy. The Survival Benefits are payable by the company to the policyholder if the life assured survives during the policy term. The benefits vary based on the Guaranteed Benefit Option selected, such as Lump Sum Option, Income Option, Lump Sum with Income Option, or Income with Lump Sum Option.\n\n3. **De

3.2 - Recommendations on how to further improve RAG pipeline:

- Based on the testing pipeline's feedback, you can further develop a strategy on how to improve the RAG pipeline further
- This can be through building a better/cleaner dataset, or utilizing better data pre-processing techniques
- If the accuracy is good enough, think of implementing some more use cases and user stories. Maybe a set of users want to do XYZ, and that requirement needs to be implemented in the solution. Think of a proper tool that can enable you to do that.

# Thank You