The following changed have been made:
1. PDF cleanup
2. MultiQueryRetriever

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import re
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers import MultiQueryRetriever
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, AIMessage
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from secret_key import my_openapi_key

os.environ["OPENAI_API_KEY"] = "my_openapi_key"

def get_pdf_text(pdf_paths):
    text = ""
    for pdf_path in pdf_paths:
        pdf_reader = PdfReader(pdf_path)
        for page in pdf_reader.pages:
            text += page.extract_text()

    # Clean the text: remove newlines, extra spaces, and other noise
    cleaned_text = re.sub(r'\s+', ' ', text)  # Replace multiple whitespace with a single space
    return cleaned_text

def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300, chunk_overlap=100
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

def get_conversation_chain(vectorstore):
    llm = ChatOpenAI()
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Define the prompt template for the LLMChain
    QUERY_PROMPT = PromptTemplate(template="You are a Churchill car insurance agent. Your task is to generate five\
    different versions of the given user {question} to retrieve relevant documents from\
    a vector database.Please provide variations of the query: {question} \
    to retrieve relevant documents from a vector database. By generating multiple perspectives on \
    the user question, your goal is to help the user overcome some of the limitations of the distance-based \
    similarity search.", input_variables=["question"])

    # Create the LLMChain for MultiQueryRetriever
    llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT)

    retriever = MultiQueryRetriever.from_llm(
        vectorstore.as_retriever(),
        llm,
        prompt=QUERY_PROMPT
    )

    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm, retriever=retriever, memory=memory
    )
    return conversation_chain

def handle_userinput(conversation_chain, user_question, citation):
    response = conversation_chain({"question": user_question})
    response["citation"] = citation
    return response

# Specify the path to PDF file
pdf_paths = ["Doc/policy-booklet-0923.pdf"]

# Extract text from the PDF
raw_text = get_pdf_text(pdf_paths)

# Split the text into chunks
text_chunks = get_text_chunks(raw_text)

# Create a vector store from the text chunks
vectorstore = get_vectorstore(text_chunks)

# Create a conversation chain with the vector store
conversation_chain = get_conversation_chain(vectorstore)

# Example user input
user_question = "What is covered in case of theft?"

# Specify the citation (will be improved to extract from the document)
citation = "Source: Policy document"

# Handle the user input and get a response
response = handle_userinput(conversation_chain, user_question, citation)

# Display the response
for message in response["chat_history"]:
    if isinstance(message, HumanMessage):
        print(f"User: {message.content}")
    elif isinstance(message, AIMessage):
        print(f"Bot: {message.content}")

# Print the citation
print(response["citation"])


User: What is covered in case of theft?
Bot: If your car is stolen, damaged as a result of theft, or damaged by fire, lightning, or an explosion, your insurance may cover the costs. The cover for theft typically includes repairing the damage, replacing stolen items, or settling the claim with a payment. However, specific details may vary based on the type of insurance you have, such as TPFT Essential Comp or Comp+. It's best to refer to your car insurance policy details for precise information on what is covered in case of theft.
Source: Policy document


### Using synthetic dataset for training
(Few-shots prompting)

In [None]:
import os
import re
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers import MultiQueryRetriever
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, AIMessage
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

os.environ["OPENAI_API_KEY"] = "my_openapi_key"

def get_pdf_text(pdf_paths):
    text = ""
    for pdf_path in pdf_paths:
        pdf_reader = PdfReader(pdf_path)
        for page in pdf_reader.pages:
            text += page.extract_text()

    # Clean the text: remove newlines, extra spaces, and other noise
    cleaned_text = re.sub(r'\s+', ' ', text)  # Replace multiple whitespace with a single space
    return cleaned_text

def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300, chunk_overlap=100
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

def get_conversation_chain(vectorstore):
    llm = ChatOpenAI()
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Define the prompt template for the LLMChain
    QUERY_PROMPT = PromptTemplate(template="You are a Churchill car insurance agent. Your task is to generate five\
    different versions of the given user {question} to retrieve relevant documents from\
    a vector database.Please provide variations of the query: {question} \
    to retrieve relevant documents from a vector database. By generating multiple perspectives on \
    the user question, your goal is to help the user overcome some of the limitations of the distance-based \
    similarity search.", input_variables=["question"])

    # Create the LLMChain for MultiQueryRetriever
    llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT)

    retriever = MultiQueryRetriever.from_llm(
        vectorstore.as_retriever(),
        llm,
        prompt=QUERY_PROMPT
    )

    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm, retriever=retriever, memory=memory
    )
    return conversation_chain

def handle_userinput(conversation_chain, user_question, citation):
    response = conversation_chain({"question": user_question})
    response["citation"] = citation
    return response

# Specify the path to your PDF file
pdf_paths = ["Doc/policy-booklet-0923.pdf"]

# Extract text from the PDF
raw_text = get_pdf_text(pdf_paths)

# Split the text into chunks
text_chunks = get_text_chunks(raw_text)

# Create a vector store from the text chunks
vectorstore = get_vectorstore(text_chunks)

# Load additional dataset
dataset_path = "Updated_Testcase_Dataset_v2.csv"
df = pd.read_csv(dataset_path)

# Prepare additional dataset text chunks
additional_text_chunks = []
for index, row in df.iterrows():
    question_excerpt_answer = f"Question: {row['Question']} Excerpt: {row['Excerpt']} Answer: {row['Answer']}"
    additional_text_chunks.extend(get_text_chunks(question_excerpt_answer))

# Combine text chunks from PDF and additional dataset
all_text_chunks = text_chunks + additional_text_chunks

# Create a new vector store with combined text chunks
vectorstore = get_vectorstore(all_text_chunks)

# Create a new conversation chain with the updated vector store
conversation_chain = get_conversation_chain(vectorstore)

# Example user input
user_question = "Give me the numebr to call If I need to make a claim"

# Specify the citation
citation = "Source: Policy document and additional dataset"

# Handle the user input and get a response
response = handle_userinput(conversation_chain, user_question, citation)

# Display the response
for message in response["chat_history"]:
    if isinstance(message, HumanMessage):
        print(f"User: {message.content}")
    elif isinstance(message, AIMessage):
        print(f"Bot: {message.content}")

# Print the citation
print(response["citation"])


User: Give me the numebr to call If I need to make a claim
Bot: If you need to make a claim, you should call 0345 878 6261.
Source: Policy document and additional dataset


### Adding metadata
Trying to add more context

In [None]:
!pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/232.6 kB[0m [31m3.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [None]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.2.4-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.21.3-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.2/49.2 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensio

In [None]:
!pip install langchain

Collecting langchain
  Downloading langchain-0.2.3-py3-none-any.whl (974 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m974.0/974.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_core-0.2.5-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.7/314.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.1-py3-none-any.whl (23 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.77-py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.2/125.2 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.0->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting packaging<24.0,>=23.2 (from langchain-

In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m50.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0


In [None]:
!pip -q install tiktoken

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.1 MB[0m [31m4.2 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m16.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install openai

Collecting openai
  Downloading openai-1.33.0-py3-none-any.whl (325 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.5/325.5 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully installed h11-0.14.0 httpcore-1.0.5 ht

### Final RAG code

In [2]:
import os
import re
import pandas as pd
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.retrievers import MultiQueryRetriever
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, AIMessage
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [3]:
os.environ["OPENAI_API_KEY"] = "my_openapi_key"

def get_pdf_text(pdf_paths):
    text = ""
    for pdf_path in pdf_paths:
        pdf_reader = PdfReader(pdf_path)
        for page in pdf_reader.pages:
            text += page.extract_text()

    # Clean the text: remove newlines, extra spaces, and other noise
    cleaned_text = re.sub(r'\s+', ' ', text)  # Replace multiple whitespace with a single space
    return cleaned_text

def get_text_chunks_with_metadata(text, source, page_number=None):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
    chunks = text_splitter.split_text(text)

    chunk_metadata = []

    for i, chunk in enumerate(chunks):
        metadata = {
            "source": source,
            "page_number": page_number,
            "chunk_index": i
        }
        chunk_metadata.append((chunk, metadata))

    return chunk_metadata

def get_vectorstore(text_chunks_with_metadata):
    embeddings = OpenAIEmbeddings()
    texts = [text for text, metadata in text_chunks_with_metadata]
    metadatas = [metadata for text, metadata in text_chunks_with_metadata]
    vectorstore = FAISS.from_texts(texts=texts, embedding=embeddings, metadatas=metadatas)
    return vectorstore

def get_conversation_chain(vectorstore):
    llm = ChatOpenAI(temperature=0)
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Define the prompt template for the LLMChain
    QUERY_PROMPT = PromptTemplate(template="You are a Churchill car insurance agent. Your task is to generate five\
    different versions of the given user {question} to retrieve relevant documents from\
    a vector database.Please provide variations of the query: {question} \
    to retrieve relevant documents from a vector database. By generating multiple perspectives on \
    the user question, your goal is to help the user overcome some of the limitations of the distance-based \
    similarity search.", input_variables=["question"])

    # Create the LLMChain for MultiQueryRetriever
    llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT)

    retriever = MultiQueryRetriever.from_llm(
        vectorstore.as_retriever(),
        llm,
        prompt=QUERY_PROMPT
    )

    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm, retriever=retriever, memory=memory
    )
    return conversation_chain

def handle_userinput(conversation_chain, user_question, citation):
    response = conversation_chain({"question": user_question})
    response["citation"] = citation
    return response

# Specify the path to your PDF file
pdf_paths = ["/content/sample_data/policy-booklet-0923.pdf"]

# Extract text from the PDF and add metadata
text_chunks_with_metadata = []
for pdf_path in pdf_paths:
    raw_text = get_pdf_text([pdf_path])
    text_chunks_with_metadata.extend(get_text_chunks_with_metadata(raw_text, pdf_path))

# Create a vector store from the text chunks with metadata
vectorstore = get_vectorstore(text_chunks_with_metadata)

# Load additional dataset
dataset_path = "/content/sample_data/TrainingSet_10.csv"
df = pd.read_csv(dataset_path)

# Prepare additional dataset text chunks with metadata
additional_text_chunks_with_metadata = []
for index, row in df.iterrows():
    question_excerpt_answer = f"Question: {row['Question']} Excerpt: {row['Excerpt']} Answer: {row['Answer']}"
    additional_text_chunks_with_metadata.extend(get_text_chunks_with_metadata(question_excerpt_answer, "dataset"))

# Combine text chunks from PDF and additional dataset
all_text_chunks_with_metadata = text_chunks_with_metadata + additional_text_chunks_with_metadata

# Create a new vector store with combined text chunks with metadata
vectorstore = get_vectorstore(all_text_chunks_with_metadata)

# Create a new conversation chain with the updated vector store
conversation_chain = get_conversation_chain(vectorstore)

# Example user input
user_question = "What is covered in case of theft?"

# Specify the citation
citation = "Source: Policy document and additional dataset"

# Handle the user input and get a response
response = handle_userinput(conversation_chain, user_question, citation)

# Display the response
for message in response["chat_history"]:
    if isinstance(message, HumanMessage):
        print(f"User: {message.content}")
    elif isinstance(message, AIMessage):
        print(f"Bot: {message.content}")

# Print the citation
print(response["citation"])


  warn_deprecated(
  warn_deprecated(
  warn_deprecated(
  warn_deprecated(


User: What is covered in case of theft?
Bot: In case of theft, the policy covers situations where your car is stolen, damaged by an attempted theft, or damaged by fire.
Source: Policy document and additional dataset


### Evaluation 
Using local LLM and prompt engineering, using the same old dataset.

In [None]:
from langchain_community.llms.ollama import Ollama

In [None]:
!ollama list

NAME                   	ID          	SIZE  	MODIFIED    
llama3:latest          	a6990ed6be41	4.7 GB	6 weeks ago	
mistral:latest         	61e88e884507	4.1 GB	6 weeks ago	
nomic-embed-text:latest	0a109f422b47	274 MB	6 weeks ago	
phi3:latest            	a2c89ceaed85	2.3 GB	5 weeks ago	


In [None]:
EVAL_PROMPT = """
Expected Response: {expected_response}
Actual Response: {actual_response}
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.
"""


# Function to evaluate the RAG system using Ollama model
def evaluate_rag_system_from_csv(conversation_chain, csv_path):
    df = pd.read_csv(csv_path)
    correct = 0
    total = len(df)
    eval_model = Ollama(model="mistral")  # Initialize the Ollama model

    for index, row in df.iterrows():
        question = row['Question']
        expected_answer = row['Answer']

        response = handle_userinput(conversation_chain, question, "Source: Policy document and additional dataset")
        bot_answer = response["chat_history"][-1].content  # Last message is the bot's response

        eval_prompt = EVAL_PROMPT.format(expected_response=expected_answer, actual_response=bot_answer)

        # Invoke the Ollama model with the evaluation prompt
        evaluation_results_str = eval_model.invoke(eval_prompt)
        evaluation_results_str_cleaned = evaluation_results_str.strip().lower()

        print(eval_prompt)

        if "true" in evaluation_results_str_cleaned:
            print("\033[92m" + f"Response: {evaluation_results_str_cleaned}" + "\033[0m")
            correct += 1
        elif "false" in evaluation_results_str_cleaned:
            print("\033[91m" + f"Response: {evaluation_results_str_cleaned}" + "\033[0m")
        else:
            print("\033[93m" + f"Unexpected Response: {evaluation_results_str_cleaned}" + "\033[0m")

    accuracy = correct / total
    print(f"Accuracy: {accuracy * 100:.2f}%")


# Path to your CSV file
csv_path = "Updated_Testcase_Dataset_v2.csv"

# Run the evaluation
evaluate_rag_system_from_csv(conversation_chain, csv_path)


Expected Response: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, and the policy will cover the cost of replacing or repairing broken glass in the windscreen, sunroof, or windows.
Actual Response: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, which is 0800 328 9150. This will enable you to report the damage and initiate the process for the replacement or repair of the broken glass in the windscreen, sunroof, or windows of your car.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true. the actual response provides the specific claims number for reporting windscreen damage and confirms that the policy covers the cost of replacement or repair of broken glass in the windscreen, sunroof, or windows.[0m

Expected Response: No, the policy does not cover theft or attempted theft if the car i


Expected Response: If your new car is stolen and not recovered, or written off, Comprehensive Plus will replace it with one of the same make and model if your car is less than 2 years old.
Actual Response: If your new car is stolen and not recovered, or written off, Comprehensive Plus will replace it with one of the same make and model.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true. the actual response accurately states that if a new car is stolen and not recovered, or written off, comprehensive plus will replace it with one of the same make and model. however, the expected response adds the condition that the car must be less than 2 years old. this additional detail is not mentioned in the actual response but does not affect the overall accuracy of the statement.[0m

Expected Response: Yes, personal belongings are covered if they are lost or damaged by fire, theft, attempted t


Expected Response: Yes, the policy covers vandalism damage, and it won’t affect your No Claim Discount if you have Comprehensive or Comprehensive Plus cover.
Actual Response: Yes, the policy covers vandalism damage. If you claim for damage to your car that’s a result of vandalism, it won’t affect the No Claim Discount on the policy.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true. the actual response correctly states that vandalism damage is covered under the insurance policy and that it will not affect the no claim discount if comprehensive or comprehensive plus cover is in place.[0m

Expected Response: The policy does not cover any claims if the car is driven by someone who is not insured on the policy.
Actual Response: If someone drives your car without being insured and gets into an accident, the insurance policy may not cover the damages or losses. This could result in you b

Accuracy improved from 80% to 96%.

### Testing with mixed Dataset(50): 30 generated, 20 added manually
Questions are added post researching the top car insurance policy questions.

In [None]:
EVAL_PROMPT = """
Expected Response: {expected_response}
Actual Response: {actual_response}
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.
"""


# Function to evaluate the RAG system using Ollama model
def evaluate_rag_system_from_csv(conversation_chain, csv_path):
    df = pd.read_csv(csv_path)
    correct = 0
    total = len(df)
    eval_model = Ollama(model="mistral")  # Initialize the Ollama model

    for index, row in df.iterrows():
        question = row['Question']
        expected_answer = row['Answer']

        response = handle_userinput(conversation_chain, question, "Source: Policy document and additional dataset")
        bot_answer = response["chat_history"][-1].content  # Last message is the bot's response

        eval_prompt = EVAL_PROMPT.format(expected_response=expected_answer, actual_response=bot_answer)

        # Invoke the Ollama model with the evaluation prompt
        evaluation_results_str = eval_model.invoke(eval_prompt)
        evaluation_results_str_cleaned = evaluation_results_str.strip().lower()

        print(eval_prompt)

        if "true" in evaluation_results_str_cleaned:
            print("\033[92m" + f"Response: {evaluation_results_str_cleaned}" + "\033[0m")
            correct += 1
        elif "false" in evaluation_results_str_cleaned:
            print("\033[91m" + f"Response: {evaluation_results_str_cleaned}" + "\033[0m")
        else:
            print("\033[93m" + f"Unexpected Response: {evaluation_results_str_cleaned}" + "\033[0m")

    accuracy = correct / total
    print(f"Accuracy: {accuracy * 100:.2f}%")


# Path to evaluation CSV file
csv_path = "Cleaned_Testcase_Dataset.csv"

# Run the evaluation
evaluate_rag_system_from_csv(conversation_chain, csv_path)


Expected Response: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, and the policy will cover the cost of replacing or repairing broken glass in the windscreen, sunroof, or windows.
Actual Response: To make a claim for windscreen damage, you need to contact the windscreen claims number provided in your policy documents. The insurance company will guide you through the process and cover the cost of replacing or repairing the broken glass in the windscreen, sunroof, or windows.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true. the actual response is similar to the expected response, but it provides more detail about the process and who will guide the policyholder through it. both responses indicate that contacting the windscreen claims number in the policy documents is necessary to make a claim and that the insurance company will cover


Expected Response: If your new car is stolen and not recovered, or written off, Comprehensive Plus will replace it with one of the same make and model if your car is less than 2 years old.
Actual Response: If your new car is stolen and not recovered, or written off, Comprehensive Plus will replace it with one of the same make and model. This cover is provided as long as you are the first and only registered keeper of the car.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true, the actual response correctly states that comprehensive plus will replace a new car (that is stolen and not recovered or written off) with one of the same make and model as long as the person making the claim is the first and only registered keeper of the car.[0m

Expected Response: Yes, personal belongings are covered if they are lost or damaged by fire, theft, attempted theft, or accident while theyâre in 


Expected Response: Yes, the policy covers vandalism damage, and it wonât affect your No Claim Discount if you have Comprehensive or Comprehensive Plus cover.
Actual Response: Yes, the policy covers vandalism damage. If you claim for damage to your car that’s a result of vandalism, it won’t affect the No Claim Discount on the policy.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true, the actual response accurately states that vandalism damage is covered under the policy and will not affect the no claim discount if comprehensive or comprehensive plus cover is in place.[0m

Expected Response: The policy does not cover any claims if the car is driven by someone who is not insured on the policy.
Actual Response: If someone drives your car without being insured, the insurance policy may not cover any loss, damage, liability, or injury that occurs while the car is being driven by someon


Expected Response: We can base your premium on the basis of your driving record. If you change your car insurance and thereÕs an extra premium to pay, you can pay this at thetime of the change, or add it to your monthly payments.
Actual Response: Your premium can be impacted by various factors, including your driving record, the type of coverage you choose, the make and model of your car, where you live, your age, and any additional optional extras you add to your policy. Additionally, if you choose DriveSure, our telematics insurance product, your premium may be affected based on your driving style and behavior as monitored by the technology.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true. the actual response provides a more comprehensive answer than the expected response, mentioning several factors that can impact a car insurance premium beyond just the driving record.[0m

Exp


Expected Response: Your NCD is protected if you make no more than 2 claims in 3 years. Protected NCD doesnât guarantee your premium wonât increase if a claim is made.
Actual Response: Your No Claim Discount (NCD) is protected if you make up to 2 claims in 3 years of insurance in a row. This means that your NCD won't be affected by these claims on your policy. However, if you make more than 2 claims in 3 years, your NCD may not be protected.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true. the actual response accurately states that making up to 2 claims in 3 years won't affect the no claim discount (ncd), but making more than 2 claims may result in the ncd not being protected.[0m

Expected Response: If your car is written off, payments will be made to the lease or hire purchase company. Any remaining balance will be paid to you if you have the option to become the full owner a


Expected Response: You must not do or refrain from doing anything that would increase the amount of the claim without written permission, such as admitting liability or negotiating to settle any claim.
Actual Response: You must not do, or refrain from doing, anything that would increase the amount of the claim without the written permission of the insurance provider. For example, admitting liability for, or negotiating to settle, any claim could potentially increase the amount claimed.
---
(Answer with 'true' or 'false' for all entries) Does the actual response match? Provide a brief explanation for your assessment.

[92mResponse: true. the actual response reaffirms and provides an example of what actions should not be taken without written permission from the insurance provider to avoid increasing the amount of a claim.[0m

Expected Response: While driving abroad, car keys are covered if stolen when you have Comprehensive cover with a Foreign Use Extension or Comprehensive Plus cov

**Accuracy: 96%**

### Evaluation - Relevancy, Recall & Precision
Using sentence Transformer

In [None]:
import os
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util

# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = "sk-proj-mXhbQjykLmNST2DJN7ldT3BlbkFJwLdQzBNgfTroC2qfhG82"

# Import the RAG chain from utils
conversation_chain = conversation_chain

# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to evaluate precision, recall, and relevancy using sentence-transformers
def evaluate_rag_system(conversation_chain, csv_path):
    df = pd.read_csv(csv_path)
    precision_scores = []
    recall_scores = []
    relevancy_scores = []

    for index, row in df.iterrows():
        question = row['Question']
        expected_answer = row['Answer']

        citation = "Source: Policy document and additional dataset"
        response = handle_userinput(conversation_chain, question, citation)
        bot_answer = response["chat_history"][-1].content  # Last message is the bot's response

        # Compute embeddings
        expected_embedding = model.encode(expected_answer, convert_to_tensor=True)
        bot_embedding = model.encode(bot_answer, convert_to_tensor=True)

        # Compute cosine similarity
        relevancy_score = util.pytorch_cos_sim(expected_embedding, bot_embedding).item()

        # Precision and Recall are a bit more abstract, let's consider them based on token overlap for simplicity
        expected_tokens = set(expected_answer.split())
        bot_tokens = set(bot_answer.split())
        true_positive = len(expected_tokens & bot_tokens)
        precision = true_positive / len(bot_tokens) if len(bot_tokens) > 0 else 0
        recall = true_positive / len(expected_tokens) if len(expected_tokens) > 0 else 0

        precision_scores.append(precision)
        recall_scores.append(recall)
        relevancy_scores.append(relevancy_score)

        print(f"Question: {question}")
        print(f"Expected: {expected_answer}")
        print(f"Bot: {bot_answer}")
        print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, Relevancy: {relevancy_score:.2f}")
        print(f"Citation: {response['citation']}\n")

    avg_precision = np.mean(precision_scores)
    avg_recall = np.mean(recall_scores)
    avg_relevancy = np.mean(relevancy_scores)

    print(f"Average Precision: {avg_precision:.2f}")
    print(f"Average Recall: {avg_recall:.2f}")
    print(f"Average Relevancy: {avg_relevancy:.2f}")

# Specify the path to your testcase CSV file
csv_path = "Cleaned_Testcase_Dataset.csv"

# Run the evaluation
evaluate_rag_system(conversation_chain, csv_path)



Question: What is the procedure to make a claim for windscreen damage?
Expected: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, and the policy will cover the cost of replacing or repairing broken glass in the windscreen, sunroof, or windows.
Bot: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, and the policy will cover the cost of replacing or repairing broken glass in the windscreen, sunroof, or windows.
Precision: 1.00, Recall: 1.00, Relevancy: 1.00
Citation: Source: Policy document and additional dataset

Question: Does the policy cover theft if the car is left unlocked?
Expected: No, the policy does not cover theft or attempted theft if the car is left unlocked.
Bot: No, the policy does not cover theft or attempted theft if the car is left unlocked.
Precision: 1.00, Recall: 1.00, Relevancy: 1.00
Citation: Source: Policy document and additional dataset

Question: Are electric car cha

Question: Can you use your car abroad under this policy?
Expected: Yes, you can use your car abroad, but you may need a Green Card and should take a European Accident Statement with you.
Bot: Yes, you can use your car abroad under this policy if you have Comprehensive cover and you've added the Foreign Use Extension to your cover before you travel. This information will be shown on your car insurance details.
Precision: 0.22, Recall: 0.33, Relevancy: 0.72
Citation: Source: Policy document and additional dataset

Question: What happens if your car is written off?
Expected: If your car is written off, the insurance will pay the market value of the car, and the car will become the property of the insurance company.
Bot: If your car is written off, typically the insurance company will pay you the actual cash value of your car at the time of the accident, minus any deductible you may have. This amount can be used towards purchasing a new vehicle. It's important to check your policy for spec

Question: What impacts my premium?
Expected: We can base your premium on the basis of your driving record. If you change your car insurance and thereÕs an extra premium to pay, you can pay this at thetime of the change, or add it to your monthly payments.
Bot: Factors that can impact your premium include your driving record, the type of coverage you have (Essentials, Comprehensive, Comprehensive Plus), the type of car you drive, your age, where you live, and any additional coverages or benefits you choose to add to your policy. Additionally, your premium may be affected by driver-monitoring technology that assesses your driving style and behavior.
Precision: 0.27, Recall: 0.38, Relevancy: 0.64
Citation: Source: Policy document and additional dataset

Question: When the premuim would not be refunded?
Expected: We will not refund any car insurance premium if you have made a car insurance claim or if one has been made against you during the period of cover (regardless of whether you pay a

Question: Under what conditions will the Uninsured Driver Promise apply?
Expected: The Uninsured Driver Promise applies if you are in an accident that isnât your fault, and the driver of the other vehicle is uninsured. You need to provide the registration number, make and model of the vehicle that hit you, the driverâs details, and any witness information.
Bot: The Uninsured Driver Promise will apply if you are involved in a road traffic accident that isn't your fault, and the driver of the vehicle that hits your car is uninsured. In this case, your No Claim Discount on the policy will not be affected, and you will not need to pay an excess.
Precision: 0.50, Recall: 0.58, Relevancy: 0.83
Citation: Source: Policy document and additional dataset

Question: What does the policy cover under Motor Legal Cover?
Expected: Motor Legal Cover includes costs for pursuing or defending claims related to road traffic accidents, motor contract disputes, and motoring offences, provided there is a 

**RESULTS**
- Average Precision: 0.50
- Average Recall: 0.63
- Average Relevancy: 0.82

### Ragas Evaluation

In [None]:
!pip -q install ragas

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.1/86.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.1/542.1 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.1/71.1 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source o

In [None]:
!pip show ragas

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Name: ragas
Version: 0.1.9
Summary: 
Home-page: 
Author: 
Author-email: 
License: 
Location: /Applications/anaconda3/lib/python3.11/site-packages
Requires: appdirs, datasets, langchain, langchain-community, langchain-core, langchain-openai, nest-asyncio, numpy, openai, pysbd, tiktoken
Required-by: 


In [None]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: evaluate
Successfully installed evaluate-0.4.2


In [4]:
from ragas.metrics import answer_relevancy, faithfulness, context_recall, context_precision, answer_correctness
from ragas import evaluate
from datasets import Dataset

Sample run on the test dataset created.

In [None]:
# Load the dataset
dataset_path = "/content/sample_data/Cleaned_Testcase_Dataset_Cleaned.csv"  
df = pd.read_csv(dataset_path)

if isinstance(df['contexts'].iloc[0], str):
    df['contexts'] = df['contexts'].apply(lambda x: x.strip('[]').split(','))  

# Convert the DataFrame to a Dataset object
data_dict = {
    'question': df['question'].tolist(),
    'answer': df['answer'].tolist(),
    'contexts': df['contexts'].tolist()
}
dataset = Dataset.from_dict(data_dict)

# Evaluate the dataset using the specified metrics
score = evaluate(dataset, metrics=[faithfulness])

# Convert the score to a pandas DataFrame and display it
score_df = score.to_pandas()
print(score_df)

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]



                                             question  \
0   What is the procedure to make a claim for wind...   
1   Does the policy cover theft if the car is left...   
2   Are electric car charging cables covered under...   
3          What is the coverage for personal accident   
4   What is DriveSure and how does it affect my pr...   
5   How much will be covered for the theft of car ...   
6                 What does Motor Legal Cover include   
7   Are courtesy cars available for all types of c...   
8        What are the territorial limits for coverage   
9   What should you do if you are involved in an a...   
10          What is covered under Section 1 Liability   
11  What is the coverage for new car replacement u...   
12   Are personal belongings covered under the policy   
13  What is the maximum number of claims allowed u...   
14     What should you do if you need to make a claim   
15  Does the policy cover mechanical or electrical...   
16      Can you use your car ab

In [4]:
# Function to evaluate precision, recall, and relevancy using sentence-transformers
def evaluate_rag_system(conversation_chain, csv_path):
    df = pd.read_csv(csv_path)
    results = []

    for index, row in df.iterrows():
        question = row['Question']
        expected_answer = row['Answer']

        citation = "Source: Policy document and additional dataset"
        response = handle_userinput(conversation_chain, question, citation)
        bot_answer = response["chat_history"][-1].content  # Last message is the bot's response

        # Collect the result
        results.append({
            "Question": question,
            "Expected Answer": expected_answer,
            "Bot Answer": bot_answer,
            "Citation": response['citation']
        })

        print(f"Question: {question}")
        print(f"Expected: {expected_answer}")
        print(f"Bot: {bot_answer}")
        print(f"Citation: {response['citation']}\n")

    # Save the results to a new CSV file
    results_df = pd.DataFrame(results)
    results_df.to_csv("/content/sample_data/results.csv", index=False)
    print("Results saved to results.csv")

# Specify the path to your CSV file
csv_path = "/content/sample_data/Cleaned_Testcase_Dataset.csv"

# Run the evaluation
evaluate_rag_system(conversation_chain, csv_path)

Question: What is the procedure to make a claim for windscreen damage?
Expected: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, and the policy will cover the cost of replacing or repairing broken glass in the windscreen, sunroof, or windows.
Bot: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, and the policy will cover the cost of replacing or repairing broken glass in the windscreen, sunroof, or windows.
Citation: Source: Policy document and additional dataset

Question: Does the policy cover theft if the car is left unlocked?
Expected: No, the policy does not cover theft or attempted theft if the car is left unlocked.
Bot: No, the policy does not cover theft or attempted theft if the car is left unlocked.
Citation: Source: Policy document and additional dataset

Question: Are electric car charging cables covered under the policy?
Expected: Yes, the home charger and charging cables are

In [14]:
# Load the dataset
dataset_path = "/content/sample_data/results 2.csv"  
df = pd.read_csv(dataset_path)


if isinstance(df['contexts'].iloc[0], str):
    df['contexts'] = df['contexts'].apply(lambda x: x.strip('[]').split(',')) 

# Convert the DataFrame to a Dataset object
data_dict = {
    'question': df['question'].tolist(),
    'answer': df['answer'].tolist(),
    'contexts': df['contexts'].tolist()
}
dataset = Dataset.from_dict(data_dict)

# Evaluate the dataset using multiple metrics
metrics = [faithfulness, answer_relevancy]
score = evaluate(dataset, metrics=metrics)

# Convert the score to a pandas DataFrame and display it
score_df = score.to_pandas()

print(score_df)

Evaluating:   0%|          | 0/100 [00:00<?, ?it/s]

                                             question  \
0   What is the procedure to make a claim for wind...   
1   Does the policy cover theft if the car is left...   
2   Are electric car charging cables covered under...   
3         What is the coverage for personal accident?   
4   What is DriveSure and how does it affect my pr...   
5   How much will be covered for the theft of car ...   
6                What does Motor Legal Cover include?   
7   Are courtesy cars available for all types of c...   
8       What are the territorial limits for coverage?   
9   What should you do if you are involved in an a...   
10        What is covered under Section 1: Liability?   
11  What is the coverage for new car replacement u...   
12  Are personal belongings covered under the policy?   
13  What is the maximum number of claims allowed u...   
14    What should you do if you need to make a claim?   
15  Does the policy cover mechanical or electrical...   
16     Can you use your car abr

In [None]:
# Save the results to a new CSV file
results_path = "evaluation_results.csv"
score_df.to_csv(results_path, index=False)

In [18]:
# Calculate aggregate score for faithfulness
aggregate_faithfulness = score_df['faithfulness'].mean()
answer_relevancy = score_df['answer_relevancy'].mean()
# Display the scores

print(f"Aggregate Faithfulness Score: {aggregate_faithfulness:.2f}")
print(f"Aggregate Relevency: {answer_relevancy:.2f}")

Aggregate Faithfulness Score: 0.64
Aggregate Relevency: 0.93


In [6]:
# Load the dataset
dataset_path = "/content/sample_data/results 3.csv" 
df = pd.read_csv(dataset_path)


if isinstance(df['ground_truth'].iloc[0], str):
    df['contexts'] = df['contexts'].apply(lambda x: x.strip('[]').split(','))  

# Convert the DataFrame to a Dataset object
data_dict = {
    'question': df['question'].tolist(),
    'answer': df['answer'].tolist(),
    'contexts': df['contexts'].tolist(),
    'ground_truth': df['ground_truth'].tolist()
}
dataset = Dataset.from_dict(data_dict)

# Evaluate the dataset using multiple metrics
metrics = [context_recall]
score = evaluate(dataset, metrics=metrics)

# Convert the score to a pandas DataFrame and display it
score_df2 = score.to_pandas()

print(score_df2)

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

                                             question  \
0   What is the procedure to make a claim for wind...   
1   Does the policy cover theft if the car is left...   
2   Are electric car charging cables covered under...   
3         What is the coverage for personal accident?   
4   What is DriveSure and how does it affect my pr...   
5   How much will be covered for the theft of car ...   
6                What does Motor Legal Cover include?   
7   Are courtesy cars available for all types of c...   
8       What are the territorial limits for coverage?   
9   What should you do if you are involved in an a...   
10        What is covered under Section 1: Liability?   
11  What is the coverage for new car replacement u...   
12  Are personal belongings covered under the policy?   
13  What is the maximum number of claims allowed u...   
14    What should you do if you need to make a claim?   
15  Does the policy cover mechanical or electrical...   
16     Can you use your car abr

In [8]:
# Calculate aggregate score for faithfulness
context_recall = score_df2['context_recall'].mean()

print(f"Aggregate context_recall: {context_recall:.2f}")

Aggregate context_recall: 0.86


In [9]:
# Load the dataset
dataset_path = "/content/sample_data/results 3.csv" 
df = pd.read_csv(dataset_path)


if isinstance(df['ground_truth'].iloc[0], str):
    df['contexts'] = df['contexts'].apply(lambda x: x.strip('[]').split(','))  # Adjust delimiter as needed

# Convert the DataFrame to a Dataset object
data_dict = {
    'question': df['question'].tolist(),
    'answer': df['answer'].tolist(),
    'contexts': df['contexts'].tolist(),
    'ground_truth': df['ground_truth'].tolist()
}
dataset = Dataset.from_dict(data_dict)

# Evaluate the dataset using multiple metrics
metrics = [answer_correctness]
score = evaluate(dataset, metrics=metrics)

# Convert the score to a pandas DataFrame and display it
score_df3 = score.to_pandas()

print(score_df3)

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

                                             question  \
0   What is the procedure to make a claim for wind...   
1   Does the policy cover theft if the car is left...   
2   Are electric car charging cables covered under...   
3         What is the coverage for personal accident?   
4   What is DriveSure and how does it affect my pr...   
5   How much will be covered for the theft of car ...   
6                What does Motor Legal Cover include?   
7   Are courtesy cars available for all types of c...   
8       What are the territorial limits for coverage?   
9   What should you do if you are involved in an a...   
10        What is covered under Section 1: Liability?   
11  What is the coverage for new car replacement u...   
12  Are personal belongings covered under the policy?   
13  What is the maximum number of claims allowed u...   
14    What should you do if you need to make a claim?   
15  Does the policy cover mechanical or electrical...   
16     Can you use your car abr

In [16]:
# Calculate aggregate score for faithfulness
answer_correctness = score_df3['answer_correctness'].mean()

print(f"Aggregate answer_correctness: {answer_correctness:.2f}")

Aggregate answer_correctness: 0.60


In [12]:
# Load the dataset
dataset_path = "/content/sample_data/results 3.csv" 
df = pd.read_csv(dataset_path)


if isinstance(df['ground_truth'].iloc[0], str):
    df['contexts'] = df['contexts'].apply(lambda x: x.strip('[]').split(','))  # Adjust delimiter as needed

# Convert the DataFrame to a Dataset object
data_dict = {
    'question': df['question'].tolist(),
    'answer': df['answer'].tolist(),
    'contexts': df['contexts'].tolist(),
    'ground_truth': df['ground_truth'].tolist()
}
dataset = Dataset.from_dict(data_dict)

# Evaluate the dataset using multiple metrics
metrics = [context_precision]
score = evaluate(dataset, metrics=metrics)

# Convert the score to a pandas DataFrame and display it
score_df4 = score.to_pandas()

print(score_df4)

Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

                                             question  \
0   What is the procedure to make a claim for wind...   
1   Does the policy cover theft if the car is left...   
2   Are electric car charging cables covered under...   
3         What is the coverage for personal accident?   
4   What is DriveSure and how does it affect my pr...   
5   How much will be covered for the theft of car ...   
6                What does Motor Legal Cover include?   
7   Are courtesy cars available for all types of c...   
8       What are the territorial limits for coverage?   
9   What should you do if you are involved in an a...   
10        What is covered under Section 1: Liability?   
11  What is the coverage for new car replacement u...   
12  Are personal belongings covered under the policy?   
13  What is the maximum number of claims allowed u...   
14    What should you do if you need to make a claim?   
15  Does the policy cover mechanical or electrical...   
16     Can you use your car abr

In [15]:
# Calculate aggregate score for faithfulness
context_precision = score_df4['context_precision'].mean()

print(f"Aggregate context_precision: {context_precision:.2f}")

Aggregate context_precision: 0.91


**RAGAS Evaluation Results:**

- Aggregate Faithfulness Score: 0.64
- Aggregate Relevency: 0.93
- Aggregate context_recall: 0.86
- Aggregate answer_correctness: 0.60
- Aggregate context_precision: 0.91