In [2]:
import streamlit as st
from llama_index.core import (
    Settings,
    StorageContext,
    VectorStoreIndex,
    get_response_synthesizer,
)      


from llama_index.core.memory import ChatSummaryMemoryBuffer

from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core import VectorStoreIndex

import os
from llama_index.core import Settings , Document 
from dotenv import load_dotenv
from llama_index.retrievers.bm25 import BM25Retriever
import nest_asyncio
import streamlit as st
from llama_index.core.response_synthesizers.type import ResponseMode
import nest_asyncio
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_cloud_services import LlamaParse
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
from llama_index.llms.groq import Groq
from llama_index.core.node_parser import SentenceSplitter
import pandas as pd
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama


resource module not available on Windows


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
nest_asyncio.apply()

In [4]:
load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")
llama_parse_api_key = os.getenv("LLAMA_PARSE_API_KEY")

#llm = Groq(model = "llama-3.1-8b-instant", api_key = "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" )
#embed_model = HuggingFaceEmbedding(model_name = "nomic-ai/nomic-embed-text-v1" ,trust_remote_code=True)

llm = Ollama(model="llama3.2:3b", request_timeout=120.0)
embed_model = OllamaEmbedding(
    model_name="jina/jina-embeddings-v2-base-en",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0},
)

Settings.llm = llm
Settings.embed_model = embed_model

In [5]:
prompt = """Key Fields to Extract:
Policyholder Info: Name, Address, Contact
Policy Details: Policy Number, Customer ID, Start & Expiry Dates, Plan Type, Renewal Date
Coverage: Base Sum Insured, Safeguard, Booster Benefit, Total Sum Insured
Premium: Net Premium, Taxes, Gross Premium (in numbers & words)
Nominee & Intermediary: Name, Relationship, Intermediary Contact
Claims & Grievance: Settlement Time, Submission Process, Customer Support Links
Exclusions & Waiting Periods: Pre-existing Conditions, Specific Waiting Times, Permanent Exclusions
Benefits & Riders: Cashless Claims, Room Rent, Air Ambulance, Health Checkups, No-Claim Bonus
"""
documents_with_instruction = LlamaParse(
    result_type="markdown",
    api_key = llama_parse_api_key,
    content_guideline_instruction = prompt
    ).load_data("Sample HI Policy.pdf")

Started parsing the file under job_id d50fa479-0a0e-4337-9220-6289db5c932b


In [6]:
with open("niva_bupa_policy.md", "w", encoding="utf-8") as md_file:
    for doc in documents_with_instruction:
        md_file.write(doc.text + "\n\n")


In [45]:
documents = SimpleDirectoryReader(input_files=['niva_bupa_policy.md']).load_data()

index = VectorStoreIndex.from_documents(
    documents=documents,
    show_progress=True,
    transformations=[SentenceSplitter(chunk_size= 512, chunk_overlap=128)],
    num_workers = 1,
)

bm_ret = BM25Retriever.from_defaults(index = index , similarity_top_k = 10)

retriever = QueryFusionRetriever(
    [
        index.as_retriever(similarity_top_k=10),
        bm_ret,
    ],
    similarity_top_k=5,
    num_queries = 1,
    use_async=True,
    verbose=True
)
response_synthesizer = get_response_synthesizer(structured_answer_filtering=False)
engine = RetrieverQueryEngine(retriever , response_synthesizer= response_synthesizer)

Parsing nodes: 100%|██████████| 1/1 [00:00<00:00, 13.66it/s]
Generating embeddings: 100%|██████████| 115/115 [00:04<00:00, 25.72it/s]


In [8]:
df = pd.read_excel("RAG_Test_Questions.xlsx")
test_list = df.values.tolist()

data = []

for question in test_list:
    answer = engine.query(question[0])
    data.append({"question" : question , "answer" : answer.response})

new_df = pd.DataFrame(data)
    

KeyboardInterrupt: 

In [None]:
new_df.to_csv("test_result.csv")

In [9]:
!deepeval set-ollama llama3.2:3b

🙌 Congratulations! You're now using a local Ollama model for all evals that 
require an LLM.


E0000 00:00:1740924997.993280   14820 init.cc:232] grpc_wait_for_shutdown_with_timeout() timed out.


In [46]:
from deepeval.integrations.llama_index import DeepEvalFaithfulnessEvaluator, DeepEvalAnswerRelevancyEvaluator
from deepeval import evaluate
from deepeval.metrics import HallucinationMetric
from deepeval.test_case import LLMTestCase

evaluator1 = DeepEvalFaithfulnessEvaluator()
evaluator2 = DeepEvalAnswerRelevancyEvaluator()
metric = HallucinationMetric(threshold=0.5)


queries = [
    "What is the policy number?",
    "What is the name of the insurance product?",
    "What is the policy number mentioned in the document?",
    "What is the coverage for alternative treatments like Ayurveda or Homeopathy?",
    "Where can the insured check the list of network hospitals?"
]

eval_results = []

for query in queries:
    
    response_object = engine.query(query)
    #actual_output = response_object.response
    
    retrieved_docs = retriever.retrieve(query)
    context = [doc.text for doc in retrieved_docs]

    faithfulness_score = evaluator1.evaluate_response(query=query, response=response_object)
    relevance_score = evaluator2.evaluate_response(query=query, response=response_object)

    test_case = LLMTestCase(input=query, actual_output=response_object, context=context)
    metric.measure(test_case)

    eval_results.append({
        "query": query,
        "hallucination_score": metric.score,
        "faithfulness_score": faithfulness_score.score,  
        "relevance_score": relevance_score.score,  
        "reason": metric.reason
    })

for result in eval_results:
    print(f"Query: {result['query']}")
    print(f"Faithfulness Score: {result['faithfulness_score']}")
    print(f"Relevance Score: {result['relevance_score']}")
    print(f"Hallucination Score: {result['hallucination_score']}")
    print(f"Reason: {result['reason']}")
    print("=" * 50)


Query: What is the policy number?
Faithfulness Score: 0.0
Relevance Score: 1.0
Hallucination Score: 0.0
Reason: The hallucination score of 0.00 indicates no alignment/contradictions between the actual output and contexts, as there are no provided factual alignments or contradictions.
Query: What is the name of the insurance product?
Faithfulness Score: 1.0
Relevance Score: 1.0
Hallucination Score: 0.0
Reason: The hallucination score of 0.00 indicates no contradictions or factual alignments found, implying that the actual output does not diverge from known contexts.
Query: What is the policy number mentioned in the document?
Faithfulness Score: 1.0
Relevance Score: 0.5
Hallucination Score: 0
Reason: The hallucination score of 0.00 indicates that there are no alignments or contradictions between the actual output and contexts, suggesting that all responses were accurate and reliable.
Query: What is the coverage for alternative treatments like Ayurveda or Homeopathy?
Faithfulness Score: 1

In [17]:
eval_df = pd.DataFrame(eval_results)
eval_df.to_csv("eval.csv")