In [35]:
from langchain_community.document_loaders import PyMuPDFLoader

# Initialize the loader with the file path
loader = PyMuPDFLoader("BAJHLIP23020V012223.pdf")


# Load the documents
docs = loader.load()


In [36]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=512,
            chunk_overlap=50,
            length_function=len
        )

# Split the documents into chunks

print(f"Split into {len(docs)} chunks using RecursiveCharacterTextSplitter.")

Split into 49 chunks using RecursiveCharacterTextSplitter.


In [37]:
print("\n--- Content of the first chunk ---")
print(docs[0].page_content)


--- Content of the first chunk ---
UIN- BAJHLIP23020V012223 
                                Global Health Care/ Policy Wordings/Page 1 
 
 
Bajaj Allianz General Insurance Co. Ltd.                       
Bajaj Allianz House, Airport Road, Yerawada, Pune - 411 006. Reg. No.: 113 
For more details, log on to: www.bajajallianz.com | E-mail: bagichelp@bajajallianz.co.in or 
Call at: Sales - 1800 209 0144 / Service - 1800 209 5858 (Toll Free No.) 
GLOBAL HEALTH CARE 
 
Policy Wordings 
 
UIN- BAJHLIP23020V012223 
SECTION A) PREAMBLE 
 
Whereas the Insured described in the Policy Schedule hereto (hereinafter called the ‘Insured’ or “Policyholder” or 
“Insured Person”) has made to Bajaj Allianz General Insurance Company Limited (hereinafter called the “Company” 
or “Insurer” or “Insurance Company”) a proposal or Proposal as mentioned in the transcript of the Proposal, which 
shall be the basis of this Contract and is deemed to be incorporated herein, containing certain undertakings, 
declar

In [38]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)


In [48]:
# from langchain_qdrant import Qdrant
# qdrant_location = ":memory:"
# collection_name = "my_document_collection"

# vectorstore = Qdrant.from_documents(
#     documents=docs,
#     embedding=embeddings,
#     location=qdrant_location,
#     collection_name=collection_name,
# )

from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="demo_collection",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=embeddings,
)


Vectorstore created with 49 documents in collection 'my_document_collection' at location ':memory:'.


In [50]:
from langchain.retrievers import ParentDocumentRetriever
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=500)

# This splitter will create the small child chunks from the parent chunks
# It should create small, semantically focused chunks.
# Fix: Set a chunk_overlap smaller than the chunk_size.
child_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100, 
    chunk_overlap=20  # A value smaller than 100
)

In [51]:
from langchain.storage import InMemoryStore

docstore = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=vector_store,
    docstore=docstore,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)


In [52]:
retriever.add_documents(docs, ids=None)

In [53]:
query = "Is eye surgery covered?"
retrieved_docs = retriever.invoke(query)

print(f"\n--- Retrieved Documents for query: '{query}' ---")
for doc in retrieved_docs:
    print("\n--- Parent Chunk ---")
    print(doc.page_content)
    # Notice the metadata contains the original source info
    print(doc.metadata)


--- Retrieved Documents for query: 'Is eye surgery covered?' ---

--- Parent Chunk ---
coverage. 
d. Coverage under the Policy after the expiry of 36 months for any Pre-Existing Disease is subject to the same 
being declared at the time of application and accepted by Insurer. 
 
2) Specified disease/procedure waiting period (Code - Excl02) 
a. Expenses related to the treatment of the listed Conditions, surgeries/treatments shall be excluded until the 
expiry of 24 months of continuous coverage after the date of inception of the first Global Health Care Policy
{'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2022-06-16T20:06:13+05:30', 'source': 'BAJHLIP23020V012223.pdf', 'file_path': 'BAJHLIP23020V012223.pdf', 'total_pages': 49, 'format': 'PDF 1.5', 'title': '', 'author': 'Vinay Dhanokar/Head Office Pune/Corporate Communication/General', 'subject': '', 'keywords': '', 'moddate': '2022-06-16T20:06:13+05:30', 'trapped': '', 'modDate': "D:202206162

  return forward_call(*args, **kwargs)


In [54]:
import os
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
load_dotenv = True  # Ensure .env is loaded if needed

# --- Prerequisite: Your existing 'retriever' object ---
# This assumes you have the 'retriever' (ParentDocumentRetriever) 
# ready from our previous steps.

# --- 1. Set up Groq API Key ---

if "GROQ_API_KEY" not in os.environ:
    print("Error: GROQ_API_KEY environment variable not set.")
else:
    # --- 2. Initialize the Groq LLM ---
    # Llama3 is a great, fast choice on Groq
    llm = ChatGroq(model_name="llama3-8b-8192")

    # --- 3. Create a Prompt Template ---
    prompt_template = """
    You are an insurance claim evaluator. Based on the policy clauses and query details provided, 
    make a decision about the insurance claim.
    Context:
    {context}
        

    Question:
    {input}
    """
    prompt = ChatPromptTemplate.from_template(prompt_template)

    # --- 4. Create the RAG Chain ---
    
    # This chain takes the question and retrieved documents and generates an answer.
    Youtube_chain = create_stuff_documents_chain(llm, prompt)

    # This is the final chain that combines the retriever and the question-answer chain.
    rag_chain = create_retrieval_chain(retriever, Youtube_chain)

    # --- 5. Ask a Question ---
    
    query = "Is eye surgery covered by this policy?"
    
    print(f"Asking question: {query}")
    
    response = rag_chain.invoke({"input": query})

    print("\n--- Answer ---")
    print(response["answer"])

Asking question: Is eye surgery covered by this policy?


  return forward_call(*args, **kwargs)



--- Answer ---
Based on the policy clauses and query details provided, I would make the following decision:

* The policy specifically excludes coverage for "Cost of Artificial Limbs" under the clause for "The cost of spectacles, contact lenses, hearing aids, crutches, dentures, artificial teeth and all other external appliances and/or devices..."
* However, it does not explicitly exclude coverage for eye surgery. In fact, the policy mentions "spectacles" and "contact lenses" which are related to eye care.
* Additionally, the policy covers medical expenses incurred due to "Act of Terrorism", which could include eye injuries or surgeries resulting from such an event.
* However, it also mentions a "Specified disease/procedure waiting period" of 24 months for certain treatments and surgeries, including listed Conditions.

In light of this information, I would conclude that eye surgery may be covered under the policy, but only if it is not related to a pre-existing condition or a listed C

In [None]:
def get_answer(query):
    """
    Function to be called by the Gradio interface.
    It takes a query and returns the answer from the RAG chain.
    """
    if not rag_chain:
        return "Error: The RAG chain is not initialized. Please check your API keys and setup."
    
    print(f"Received query: {query}")
    response = rag_chain.invoke({"input": query})
    return response["answer"]


In [None]:
import gradio as gr
iface = gr.Interface(
    fn=get_answer,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question about your policy..."),
    outputs="text",
    title="Policy Document Q&A 📄",
    description="This is a demo of a RAG system. Ask a question about the sample policy, and the AI will find the answer within the document.",
    examples=[
        ["Is eye surgery covered by this policy?"],
        ["What is the annual deductible?"],
        ["Are dental procedures covered?"]
    ]
)

# Launch the web UI
print("Launching Gradio interface...")
iface.launch()

Launching Gradio interface...
* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




In [47]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="demo_collection",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="demo_collection",
    embedding=embeddings,
)