In [1]:
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from secret_key import my_openapi_key

os.environ["OPENAI_API_KEY"] = "my_openapi_key"

def get_pdf_text(pdf_paths):
    text = ""
    for pdf_path in pdf_paths:
        pdf_reader = PdfReader(pdf_path)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300, chunk_overlap=100
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks):
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

def get_conversation_chain(vectorstore):
    llm = ChatOpenAI()
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm, retriever=vectorstore.as_retriever(), memory=memory
    )
    return conversation_chain

def handle_userinput(conversation_chain, user_question, prompt_template):
    response = conversation_chain({"question": prompt_template.format(user_question)})
    return response

# Specify the path to your PDF file
pdf_paths = ["Doc/policy-booklet-0923.pdf"]

# Extract text from the PDF
raw_text = get_pdf_text(pdf_paths)

# Split the text into chunks
text_chunks = get_text_chunks(raw_text)

# Create a vector store from the text chunks
vectorstore = get_vectorstore(text_chunks)

# Create a conversation chain with the vector store
conversation_chain = get_conversation_chain(vectorstore)

prompt_template = "Please provide information about {} from within the policy document. If there are any other question request the user to stick to the context only."

# Example user input
user_question = "What is the policy on coverage for other people?"

# Handle the user input and get a response
response = handle_userinput(conversation_chain, user_question, prompt_template)

# Display the response
for message in response["chat_history"]:
    if message.type == "user":
        print(f"User: {message.content}")
    else:
        print(f"Bot: {message.content}")


  warn_deprecated(
  warn_deprecated(
  warn_deprecated(


Bot: Please provide information about Provide me the number to call, if I need to claim from within the policy document. If there are any other question request the user to stick to the context only.
Bot: If you need to make a claim according to the policy document, you can call 0345 878 6261 for general claims or 0800 328 9150 for windscreen claims. Stick to the context provided for any other questions.


### Evaluation of RAG - Accuracy

In [19]:
EVAL_PROMPT = """
Expected Response: {expected_response}
Actual Response: {actual_response}
---
(Answer with 'true' or 'false') Does the actual response match the expected response? 
"""

# Function to evaluate the RAG system using Ollama model
def evaluate_rag_system_from_csv(conversation_chain, csv_path):
    df = pd.read_csv(csv_path)
    correct = 0
    total = len(df)
    eval_model = Ollama(model="mistral")  # Initialize the Ollama model

    for index, row in df.iterrows():
        question = row['Question']
        expected_answer = row['Answer']

        response = handle_userinput(conversation_chain, question)
        bot_answer = response["chat_history"][-1].content  # Last message is the bot's response

        eval_prompt = EVAL_PROMPT.format(expected_response=expected_answer, actual_response=bot_answer)
        
        # Invoke the Ollama model with the evaluation prompt
        evaluation_results_str = eval_model.invoke(eval_prompt)
        evaluation_results_str_cleaned = evaluation_results_str.strip().lower()

        print(eval_prompt)

        if "true" in evaluation_results_str_cleaned:
            # Print response in Green if it is correct.
            print("\033[92m" + f"Response: {evaluation_results_str_cleaned}" + "\033[0m")
            correct += 1
        elif "false" in evaluation_results_str_cleaned:
            # Print response in Red if it is incorrect.
            print("\033[91m" + f"Response: {evaluation_results_str_cleaned}" + "\033[0m")
        #else:
            #raise ValueError(
                #f"Invalid evaluation result. Cannot determine if 'true' or 'false'."
            #)

    accuracy = correct / total
    print(f"Accuracy: {accuracy * 100:.2f}%")

# Path to your CSV file
csv_path = "Updated_Testcase_Dataset_v2.csv"

# Run the evaluation
evaluate_rag_system_from_csv(conversation_chain, csv_path)


Expected Response: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, and the policy will cover the cost of replacing or repairing broken glass in the windscreen, sunroof, or windows.
Actual Response: To make a claim for windscreen damage, you would need to contact Churchill at 0800 328 9150 if you have Essentials, Comprehensive, or Comprehensive Plus cover. They will guide you through the process of having your windscreen repaired or replaced. Additionally, if you have Motor Legal Cover, you can contact the Motor Legal Helpline at 0345 246 2408 for assistance with your claim. Remember that there may be excess amounts to pay for each incident you claim for, as outlined in your policy.
---
(Answer with 'true' or 'false') Does the actual response match the expected response? 

[92mResponse: true, but the actual response provides more detailed information about the process and contact numbers for making a windscreen damage claim with church


Expected Response: Section 1: Liability covers you if you're found to be legally responsible for an accident involving your car that injures or kills someone or damages their property.
Actual Response: Section 1: Liability covers the payment of reasonable legal costs or expenses to defend or represent you or any driver covered by the policy if you have to go to court due to an accident covered by the policy.
---
(Answer with 'true' or 'false') Does the actual response match the expected response? 

[91mResponse: false. the actual response is about legal costs related to an accident, not liability coverage for injuries or property damages.[0m

Expected Response: If your new car is stolen and not recovered, or written off, Comprehensive Plus will replace it with one of the same make and model if your car is less than 2 years old.
Actual Response: Under Comprehensive Plus cover, your car will be replaced with a new one of the same make and model if it is less than 2 years old when it’s


Expected Response: Yes, the policy covers vandalism damage, and it won’t affect your No Claim Discount if you have Comprehensive or Comprehensive Plus cover.
Actual Response: Yes, the policy covers vandalism damage. If you claim for damage to your car that's a result of vandalism, it won't affect the No Claim Discount on this policy. Just make sure to follow the necessary steps provided in the policy if you need to make a claim for vandalism damage.
---
(Answer with 'true' or 'false') Does the actual response match the expected response? 

[92mResponse: true. both responses convey that there is no impact on no claim discount when making a claim for vandalism damage under comprehensive or comprehensive plus car insurance policies. the second response provides some additional instructions to follow the necessary steps if making a claim, but it does not change the core message of the expected response.[0m

Expected Response: The policy does not cover any claims if the car is driven by 

Accuracy with this approach(local LLM & prompt engineering) provides 80% accuracy.

### RAG Evaluation - Relevancy, precision & Recall (Using SentenceTransformer)

In [6]:
import os
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from secret_key import my_openapi_key

os.environ["OPENAI_API_KEY"] = "my_openapi_key"


conversation_chain = conversation_chain

# Load the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to handle user input and get response
def handle_userinput(conversation_chain, user_question):
    response = conversation_chain({"question": user_question})
    return response

# Function to evaluate precision, recall, and relevancy using sentence-transformers
def evaluate_rag_system(conversation_chain, csv_path):
    df = pd.read_csv(csv_path)
    precision_scores = []
    recall_scores = []
    relevancy_scores = []

    for index, row in df.iterrows():
        question = row['Question']
        expected_answer = row['Answer']

        response = handle_userinput(conversation_chain, question)
        bot_answer = response["chat_history"][-1].content  # Last message is the bot's response

        # Compute embeddings
        expected_embedding = model.encode(expected_answer, convert_to_tensor=True)
        bot_embedding = model.encode(bot_answer, convert_to_tensor=True)

        # Compute cosine similarity
        relevancy_score = util.pytorch_cos_sim(expected_embedding, bot_embedding).item()

        # Precision and Recall are a bit more abstract, let's consider them based on token overlap for simplicity
        expected_tokens = set(expected_answer.split())
        bot_tokens = set(bot_answer.split())
        true_positive = len(expected_tokens & bot_tokens)
        precision = true_positive / len(bot_tokens) if len(bot_tokens) > 0 else 0
        recall = true_positive / len(expected_tokens) if len(expected_tokens) > 0 else 0

        precision_scores.append(precision)
        recall_scores.append(recall)
        relevancy_scores.append(relevancy_score)

        print(f"Question: {question}")
        print(f"Expected: {expected_answer}")
        print(f"Bot: {bot_answer}")
        print(f"Precision: {precision:.2f}, Recall: {recall:.2f}, Relevancy: {relevancy_score:.2f}\n")

    avg_precision = np.mean(precision_scores)
    avg_recall = np.mean(recall_scores)
    avg_relevancy = np.mean(relevancy_scores)

    print(f"Average Precision: {avg_precision:.2f}")
    print(f"Average Recall: {avg_recall:.2f}")
    print(f"Average Relevancy: {avg_relevancy:.2f}")

csv_path = "Updated_Testcase_Dataset_v2.csv"

# Run the evaluation
evaluate_rag_system(conversation_chain, csv_path)




Question: What is the procedure to make a claim for windscreen damage?
Expected: To make a claim for windscreen damage, you need to contact the windscreen claims number provided, and the policy will cover the cost of replacing or repairing broken glass in the windscreen, sunroof, or windows.
Bot: To make a claim for windscreen damage, you can call the Windscreen Claims number at 0800 328 9150 if you have Essentials, Comprehensive, or Comprehensive Plus cover with Churchill insurance. They will guide you through the claims process. Remember to refer to 'Section 5: Windscreen damage' on page 20 for details on the excess amounts for windscreen repairs and replacement.
Precision: 0.32, Recall: 0.53, Relevancy: 0.87

Question: Does the policy cover theft if the car is left unlocked?
Expected: No, the policy does not cover theft or attempted theft if the car is left unlocked.
Bot: No, the policy does not cover theft if the car is left unlocked. If the car is left unlocked, the policy will no

Question: What should you do if you need to make a claim?
Expected: You should call the claims number provided, have your personal details, policy number, car registration number, and a description of the loss or damage ready.
Bot: If you need to make a claim, you should contact the insurance company at 0345 878 6261. Make sure the incident happened within the territorial limits, you were covered on the date of the incident, and the legal proceedings will happen within the territorial limits. Additionally, take all reasonable precautions to minimize the cost of claims and prevent a claim from happening. If you do not follow the terms of the policy, the insurance company may refuse or withdraw from the claim.
Precision: 0.12, Recall: 0.30, Relevancy: 0.59

Question: Does the policy cover mechanical or electrical failure?
Expected: No, the policy does not cover mechanical or electrical failure.
Bot: No, the policy does not cover mechanical or electrical failure.
Precision: 1.00, Recall: 

Question: How long can you use the courtesy car if your car is being repaired?
Expected: You can use the courtesy car for the duration of the repair if it is being repaired by an approved repairer.
Bot: If your car can be repaired and is driveable, you can use a courtesy car:
- From the point your car goes in for repair until it is repaired if you use the approved repairer.
- For up to 21 days in a row while your car is being repaired if you use your own repairer. 

If your car is not driveable, once you confirm the repair can start, you'll receive a courtesy car until the repair is completed if you use the approved repairer. The same 21-day limit applies if you choose your own repairer. 

It's important to note that your cover to drive the courtesy car is limited to the terms and exclusions in your certificate of motor insurance and policy. Additionally, a courtesy car may not always be available on the same day.
Precision: 0.22, Recall: 0.79, Relevancy: 0.80

Question: What does the 

### Results:

- Average Precision: 0.39
- Average Recall: 0.68
- Average Relevancy: 0.80