In [1]:
from dotenv import load_dotenv
from langchain_openai import AzureChatOpenAI
import os
from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer # type: ignore
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
# from langchain.globals import set_llm_cache
from langchain_community.cache import InMemoryCache
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
load_dotenv()

  from tqdm.autonotebook import tqdm, trange


True

In [2]:
llm = AzureChatOpenAI(openai_api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-07-01-preview"),
    azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT", "gpt4chat"),
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT", "https://gpt-4-trails.openai.azure.com/"),
    api_key=os.environ.get("AZURE_OPENAI_KEY"))

In [3]:
file_path = "policy_data.pdf"
loader = PyPDFLoader(file_path)
pages = loader.load()

In [4]:
len(pages)

44

In [6]:
print(pages[30])

page_content='Page 31Where you can drive\nThe area your policy applies \n(the territorial limits)\nYour car insurance gives you the cover \ndescribed in your car insurance details in:\n>Great Britain.\n>Northern Ireland.\n>the Channel Islands.\n>the Isle of Man.\nIt also covers journeys between these places.\nPlease note: your ‘Liability for automated cars in \nGreat Britain’ cover only applies in Great Britain, \nwhich is: \n>England.\n>Scotland.\n>Wales.\nPlease see ‘Liability for automated cars in Great \nBritain’ on page 13 for more details.\nIf you drive in the Republic of Ireland\nIf you use your car in the Republic of Ireland, \nyour car insurance gives you the same cover as \nyou have within the territorial limits. However:\n>If you have Motor Legal Cover, this cover doesn’t apply to any claims in the Republic of Ireland, apart from road traffic accident claims. \n>If you have a courtesy car, it cannot be used \nin the Republic of Ireland.\n>If you have Guaranteed Hire Car Plus

In [7]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=700,
    chunk_overlap=50,
    length_function=len,
)

In [8]:
pages_text = [pages.page_content for pages in pages]
# the type of pages is documents but we need to convert them into list of strings for splitting the text.

In [9]:
documents = splitter.create_documents(pages_text)
# splitting the text into chunks of 250 characters and 50 characters overlap.

In [10]:
type(documents[0])

langchain_core.documents.base.Document

In [11]:
print(documents[1])

page_content='Page 2FAQs  3\nGlossary  4\nMaking a claim  6\nWhat your cover includes  8\nSection 1: Liability  11\nSection 2: Fire and theft  14\nSection 3: Courtesy car  17\nSection 4: Accidental damage  18\nSection 5: Windscreen damage  20\nSection 6: Personal benefits  21\nSection 7: Motor Legal Cover  23\nSection 8: Guaranteed Hire Car Plus  28\nSection 9: Protected No Claim Discount  30Where you can drive  31\nLosses we don’t cover  33\nOther conditions you need to know about  36\nHow the policy works  37\nEverything else  41\nIf you have a complaint  42\nIf you’re in an accident  43\nHow to get in touch  Back coverContentsWelcome to Churchill\nThis booklet tells you about your car insurance\nAbout the policy'


In [12]:
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

In [124]:
# set_llm_cache(InMemoryCache())

In [13]:
def get_vectorstore(text_chunks):
    # Check if the FAISS index file already exists
    if os.path.exists("faiss_index"):
        embeddings = OpenAIEmbeddings()
        # Load the existing FAISS index
        vectorstore = FAISS.load_local("faiss_index", embeddings)
        print("Loaded existing FAISS index.")
    else:
        # Create a new FAISS index
        embeddings = OpenAIEmbeddings()
        vectorstore = FAISS.from_documents(documents=text_chunks, embedding=embeddings)
        # Save the new FAISS index locally
        vectorstore.save_local("faiss_index")
        print("Created and saved new FAISS index.")
    return vectorstore

In [14]:
retriever = get_vectorstore(documents).as_retriever()# here i am using the vector_db as retriever to retrieve the documents.

Created and saved new FAISS index.


In [31]:
template = """Use the context below to answer the question.
Keep the answer concise and to the point.
If you are unsure about the answer, just say i do not know the answer to the question do not create your own answer and make sure the answer is concise and to the point.
use 3-4 sentences at max to answer the questions.
{context}

Question: {question}

Helpful Answer:"""
prompt = PromptTemplate.from_template(template)

In [32]:
chain_type_kwargs = {"prompt": prompt}
chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
)

In [33]:
result = chain.invoke("What steps should you take if you are unhappy with the response from an independent review and want to escalate your complaint to the Financial Ombudsman Service?")

In [36]:
print(result['result'])

If you're unhappy with the response from an independent review, you can escalate your complaint to the Financial Ombudsman Service (FOS). You should contact the FOS within 6 months of the response letter. You can reach them through email at complaint.info@financial-ombudsman.org.uk, phone (UK: 0300 123 9123 or 0800 023 4567, Abroad: +44 (0) 20 7964 0500), or by writing to Financial Ombudsman Service, Exchange Tower, London E14 9SR.


In [None]:
"""
You are a language model designed to evaluate the responses of this documentation query system.
You will use a rating scale of 0 to 10, 0 being poorest response and 10 being the best.
Responses with “not specified” or “no specific mention” or “rephrase question” or “unclear” or no documents returned or empty response are considered poor responses.
Responses where the question appears to be answered are considered good.
Responses that contain detailed answers are considered the best.
Also, use your own judgement in analyzing if the question asked is actually answered in the response. Remember that a response that contains a request to “rephrase the question” is usually a non-response.
Please rate the question/response pair entered. Only respond with the rating. No explanation necessary. Only integers.
"""