In [2]:
import os
import json
import re
from langchain.chat_models import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from langchain.vectorstores import Chroma
from dotenv import load_dotenv

In [5]:
load_dotenv()

# Access the environment variables
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")

In [6]:
llm = AzureChatOpenAI(
      temperature=0.5,
      model="gpt-4o",
      openai_api_version="2024-02-01",
      azure_deployment="gpt4o",
      max_tokens=700)

embedding_model = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-3-small",  # Your Azure deployment name for embeddings
    openai_api_version="2024-02-01",
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"]
)

In [None]:
# from langchain.vectorstores import FAISS
# import faiss
# import pickle
# from langchain_community.docstore.in_memory import InMemoryDocstore

# index = faiss.read_index("./data/vector_data/faiss_index")

# with open("./data/vector_data/faiss_documents.pkl", "rb") as f:
#     docstore_dict = pickle.load(f)

# vector_store_new = FAISS(
#     embedding_function=embedding_model,
#     index=index,
#     docstore=InMemoryDocstore(docstore_dict),
#     index_to_docstore_id=dict(zip(range(len(docstore_dict)), docstore_dict.keys()))
# )

In [7]:
vector_store = Chroma(persist_directory="./data/vector_data/chroma_db", embedding_function=embedding_model)

  vector_store = Chroma(persist_directory="./data/vector_data/chroma_db", embedding_function=embedding_model)


In [8]:
retriever = vector_store.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 5}
    )
    
res = retriever.get_relevant_documents("Best policy for older people")

  res = retriever.get_relevant_documents("Best policy for older people")


In [10]:
def clean_json_response(response: str) -> str:
    """Extracts valid JSON from an LLM response, removing markdown formatting and extra text."""
    match = re.search(r"```json\n(.*?)\n```", response, re.DOTALL)
    return match.group(1) if match else response.strip()

def restructure_query(original_query: str) -> dict:
    prompt = f"""
    You are an expert in restructuring queries to retrieve data from a vector store. Your goal is to extract policy name, plan name, and query intent.

    Given the query: "{original_query}"

    Instructions:
    - Extract explicitly mentioned policies and plans.
    - For comparative queries, break them into structured subqueries covering benefits, waiting periods, and exclusions.
    - If a policy or plan is missing, include all relevant policies (ADITYA_BIRLA, CARE, HDFC_ERGO) or plans.
    - Be innovative: restructure, add, or subtract details as needed to generate meaningful subqueries.
    - Follow the same structure but do not copy exact queries; create diverse and insightful subqueries.
    - Return output strictly in JSON format.

    Available Plans:
    1. {{"company": "ADITYA_BIRLA", "plan": "activ_one_max"}}
    2. {{"company": "ADITYA_BIRLA", "plan": "activ_fit_plus"}}
    3. {{"company": "CARE", "plan": "supreme"}}
    4. {{"company": "CARE", "plan": "supreme_value_for_money"}}
    5. {{"company": "HDFC_ERGO", "plan": "optima_secure"}}
    6. {{"company": "HDFC_ERGO", "plan": "optima_super_secure"}}

    Examples:

    Query: "Waiting period of PED in case of HDFC ERGO Optima Secure."
    Output:
    {{"query1": {{"policy": "HDFC_ERGO", "plan": "optima_secure", "query": "Waiting period of Preexisting Diseases"}}}}

    Query: "Compare HDFC ERGO Optima Secure with Aditya Birla Activ One Max."
    Output:
    {{
        "query1": {{"policy": "HDFC_ERGO", "plan": "optima_secure", "query": "Benefits covered"}},
        "query2": {{"policy": "HDFC_ERGO", "plan": "optima_secure", "query": "Waiting Period and Exclusions"}},
        "query3": {{"policy": "ADITYA_BIRLA", "plan": "activ_one_max", "query": "Benefits covered"}},
        "query4": {{"policy": "ADITYA_BIRLA", "plan": "activ_fit_plus", "query": "Waiting Period and Exclusions"}}
    }}

    Query: "Which policy is best for older people"
    Output:
    {{
        "query1": {{"policy": "HDFC_ERGO", "plan": "optima_secure", "query": "Guidelines for older people (>50 years)"}},
        "query2": {{"policy": "ADITYA_BIRLA", "plan": "activ_fit_plus", "query": "Guidelines for older people (>50 years)"}},
        "query3": {{"policy": "HDFC_ERGO", "plan": "optima_super_secure", "query": "Guidelines for older people (>50 years)"}},
        "query4": {{"policy": "CARE", "plan": "supreme", "query": "Guidelines for older people (>50 years)"}},
        "query5": {{"policy": "CARE", "plan": "supreme_value_for_money", "query": "Guidelines for older people (>50 years)"}},
        "query6": {{"policy": "HDFC_ERGO", "plan": "optima_secure", "query": "Premium and affordability for senior citizens"}},
        "query7": {{"policy": "ADITYA_BIRLA", "plan": "activ_fit_plus", "query": "Pre-existing condition coverage for seniors"}},
        "query8": {{"policy": "CARE", "plan": "supreme", "query": "Lifetime renewability options for seniors"}},
        "query9": {{"policy": "HDFC_ERGO", "plan": "optima_super_secure", "query": "Special benefits for elderly policyholders"}},
        "query10": {{"policy": "ADITYA_BIRLA", "plan": "activ_one_max", "query": "Comparison of premium rates for senior citizens"}}
    }}

    Output Format:
    {{"query1": {{"policy": "", "plan": "", "query": ""}}}}
    Strictly follow this output format.
    """



    sub_queries_response = llm.predict(prompt)
    try:
        cleaned_response = clean_json_response(sub_queries_response)
        return json.loads(cleaned_response)
    except json.JSONDecodeError:
        print("Error: Invalid JSON from LLM!")
        return {}

In [None]:
# # query = "Waiting period of pre existing disease of all the plans"
# query = "Which Plan is best for older people"
# res = restructure_query(query)

In [11]:
def get_relevant_doc(policy, plan, query):
    filter_conditions = []
    if policy:
        filter_conditions.append({"company": {"$eq": policy}})
    if plan:
        filter_conditions.append({"plan": {"$eq": plan}})
    
    # Apply filter correctly
    if len(filter_conditions) > 1:
        combined_filter = {"$and": filter_conditions}
    elif len(filter_conditions) == 1:
        combined_filter = filter_conditions[0]  # Use single condition directly
    else:
        combined_filter = None  # No filter
    
    retriever = vector_store.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 5, "filter": combined_filter}
    )
    
    return retriever.get_relevant_documents(query)

def get_relevant_docs(res):
    docs = []
    if res.get('query'):
        docs.extend(get_relevant_doc(
            policy=res.get('policy'), 
            plan=res.get('plan'), 
            query=res.get('query')
        ))
    else:
        for v in res.values():
            docs.extend(get_relevant_doc(
                policy=v.get('policy'), 
                plan=v.get('plan'), 
                query=v.get('query')
            ))

    doc_entries = [
        f'"company": {doc.metadata["company"]} \n "plan": {doc.metadata["plan"]} \n "content": {doc.page_content}' for doc in docs
    ]

    return doc_entries

In [12]:
def refine_document_with_llm(query: str, retrieved_doc: str) -> str:
    prompt = f"""You are a grader assessing relevance of a retrieved document to a user query. If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. Give a score in between 0 to 10 indicate whether the document is relevant to the question.

    Instructions:
    - Score the retrieved document as 10 if it is directly related to the query.
    - Score the retrieved document as 0 if it is completely irrelevant to the query.
    - For all other cases, assign a score between 1(mostly irrelevant) and 9(mostly relevant) based on relevance.
    - In certain cases, direct information may not be available. For example, if a query concerns benefits or disadvantages for older people, the retrieved documents might only contain general policy information for this demographic. In such cases, these documents should be given a higher score(>5).

    - Only return the score no resoning no extra explanation otherwise you will be highly penalized
    
    Here is the retrieved document: \n\n {retrieved_doc} \n\n Here is the user question: \n\n {query}
    """

    res = llm.predict(prompt)

    return res

def refine_documents_with_llm(query, doc_entries):
    new_doc_entries = []

    for doc_entry in doc_entries:
        res = refine_document_with_llm(query, doc_entry)
        if int(res) >= 2:
            new_doc_entries.append(doc_entry)
        # else:
        #     print(doc_entry)

    new_doc_entries_str = "\n\n".join(new_doc_entries)
    return new_doc_entries_str

In [16]:
def final_content_refinement_tool(query, retrieved_docs):
    prompt = f"""
    You are an expert in content refinement. Given a user query and relevant documents retrieved from the database, generate a well-structured and comprehensive response using all available information. Ensure clarity, conciseness, coherence, and factual accuracy(based on retrieved_docs).

    User Query:
    {query}

    Retrieved Documents:
    {retrieved_docs}

    Instructions:
    - If `retrieved_docs` is empty, respond: "There is not enough information to answer your question."
    - If the provided documents do not adequately address the query, respond: "Sorry, we don't have enough data to address your question."

    Generate a refined and well-structured response to the original query based on the retrieved information.
    """
    
    res = llm.predict(prompt)
    return res


In [13]:
def GradeHallucinations(documents: str, generation: str) -> str:
    """
    Grades a student's answer based on given FACTS.
    
    - Score 1: The student's answer is grounded in the FACTS.
    - Score 0: The student's answer contains hallucinated information.
    
    Returns "0" or "1" as a string.
    """
    
    prompt = f"""
        You are a teacher grading a quiz. You will be given FACTS and a STUDENT ANSWER.
        
        Grading Criteria:
        1. Ensure the STUDENT ANSWER is grounded in the FACTS.
        2. Ensure the STUDENT ANSWER does not contain "hallucinated" information outside the scope of the FACTS.

        Score:
        - "1" if the student's answer is based on the FACTS.
        - "0" if the student's answer is not based on the FACTS.

        NOTE: Only return "0" or "1", otherwise you will be highly penalized.

        FACTS:
        {documents}
        
        STUDENT ANSWER:
        {generation}
        """
    
    return llm.predict(prompt).strip()

In [14]:
def generate_response(query):
    """
    Generates a response based on the query using a multi-step RAG pipeline.
    The response is saved to a Markdown file if it passes the hallucination check.
    """
    # Step 1: Restructure the query for better retrieval
    refined_query = restructure_query(query)
    
    # Step 2: Retrieve relevant documents based on the refined query
    relevant_docs = get_relevant_docs(refined_query)
    
    # Step 3: Refine the retrieved documents using an LLM
    refined_docs = refine_documents_with_llm(query, relevant_docs)
    
    # Step 4: Generate the final response based on the refined documents
    response = final_content_refinement_tool(query, refined_docs)
    
    # Step 5: Check if the response is based on the retrieved documents
    if GradeHallucinations(refined_docs, response):
        safe_query = re.sub(r'[<>:"/\\|?*]', '_', query)
        output_dir = "./data/generated_output"
        os.makedirs(output_dir, exist_ok=True)  # Ensure directory exists
        output_path = os.path.join(output_dir, f"{safe_query}.md")
        
        with open(output_path, "w", encoding="utf-8") as file:
            file.write(response)
        print(f"Success: Response saved to {output_path}")
    else:
        print("Error: The generated response is not factually grounded.")


In [17]:
query = "Does HDFC ERGO optima secure covers AYUSH treatment?"
generate_response(query)

Success: Response saved to ./data/generated_output\Does HDFC ERGO optima secure covers AYUSH treatment_.md


In [36]:
from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

In [None]:
# Web search
query = "All details about CARE Supre OPD treatment"
docs = web_search_tool.invoke({"query": query})

In [38]:
docs

[{'title': 'Care Supreme Must-look Features & Claims Settlement Ratio',
  'url': 'https://1finance.co.in/product-scoring/health-insurance/care-supreme?gender=Male&age=46-50&family=self-&sum=10%20Lacs',
  'content': '... Finance: Explore the essential must-look features of Care Supreme and delve into their claims settlement ratio ... Claim Settlement Ratio (CSR) - Number92.81%.',
  'score': 0.91563576},
 {'title': 'Care Insurance Care Supreme Plan Features, Benefits, Review ...',
  'url': 'https://www.beshak.org/insurance/health-insurance/best-health-insurance-plans/care-insurance-care-supreme/',
  'content': 'Care Insurance Health Insurance Company has a claim settlement ratio of 90.50%. Network hospitals: Care Insurance Health Insurance Company has',
  'score': 0.7613660444444444},
 {'title': 'Care Health Insurance UPDATED Review 2025 - YouTube',
  'url': 'https://www.youtube.com/watch?v=gnJyocoSiMA',
  'content': "With a claim settlement ratio of 90% and updated operational metrics, 