#### Simple RAG using FAISS

In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
import os
import sys
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
groq_api_key=os.getenv("GROQ_API_KEY")
llm=ChatGroq(groq_api_key=groq_api_key,model_name="Llama3-8b-8192")

path="sample_policy_doc_AU1234.pdf"

In [5]:
loader=PyPDFLoader(path)
docs=loader.load()

#Split the documents into chunks
splitter=RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
    length_function=len
)

texts=splitter.split_documents(docs)


#Embeddings 
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

#Create vector store
vectorstore=FAISS.from_documents(texts,embeddings)

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
docs[0]

Document(metadata={'producer': 'Adobe PDF Library 10.0', 'creator': 'Acrobat PDFMaker 10.1 for Word', 'creationdate': '2015-08-11T15:48:23+01:00', 'author': 'esutton', 'company': 'Swiftcover Insurance Services Ltd.', 'moddate': '2015-08-11T15:48:40+01:00', 'sourcemodified': 'D:20150811125127', 'title': 'This is a sample Policy document that provides full wording for all the covers we offer', 'source': 'sample_policy_doc_AU1234.pdf', 'total_pages': 58, 'page': 0, 'page_label': '1'}, page_content='ADH 15.10a \n1 \n \n \n \n \n \n \n \nThis is a sample Policy document that provides full \nwording for all the covers we offer. \n \nAll available options are on our website which will enable you to choose the level and type of cover. Once you \nhave bought your Policy you will be provided with the documentation specific to what you have requested.')

In [4]:
len(docs)

58

In [5]:
docs[0]

Document(metadata={'producer': 'Adobe PDF Library 10.0', 'creator': 'Acrobat PDFMaker 10.1 for Word', 'creationdate': '2015-08-11T15:48:23+01:00', 'author': 'esutton', 'company': 'Swiftcover Insurance Services Ltd.', 'moddate': '2015-08-11T15:48:40+01:00', 'sourcemodified': 'D:20150811125127', 'title': 'This is a sample Policy document that provides full wording for all the covers we offer', 'source': 'sample_policy_doc_AU1234.pdf', 'total_pages': 58, 'page': 0, 'page_label': '1'}, page_content='ADH 15.10a \n1 \n \n \n \n \n \n \n \nThis is a sample Policy document that provides full \nwording for all the covers we offer. \n \nAll available options are on our website which will enable you to choose the level and type of cover. Once you \nhave bought your Policy you will be provided with the documentation specific to what you have requested.')

In [7]:
#Retriever
retriever=vectorstore.as_retriever(search_kwargs={'k':4})

In [8]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
prompt=PromptTemplate(
    template=""" 
    You are assistant for question answering tasks.
    Use the following piece of retreived context to answer
    the question.If you don't know the answer, say that you don't know.
    keep the answer concise.
    {context}
    Question:{question}
    """,
    input_variables=['context','question']
)

In [9]:
#Building chain
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

def format_docs(retrieved_docs):
    context_text="\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

parllel_chain=RunnableParallel({
    'context':retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

parser = StrOutputParser()

rag_chain = parllel_chain | prompt | llm | parser


In [10]:
answer=rag_chain.invoke('How my home is covered for any theft?')

In [11]:
answer

'According to the provided context, your home is covered for theft as long as the keys are lost or stolen, and the theft is not caused by you, your family, or anyone else who has been allowed into the home.'

In [12]:
answer=rag_chain.invoke("""I am planning for vacation and might leave my house empty. 
Does my policy covers any potential damage that might occur? """)
print(answer)

According to the provided context, your policy does not cover damage to the home unless it has been damaged at the same time and by the same cause. However, it does cover contents whilst in the home for loss or damage resulting from a storm or flood. Additionally, if you have chosen the Accidental Damage (optional extra) section, it may cover some types of damage. It's best to check your policy schedule to confirm what is included.


In [1]:
import os

In [2]:
def extract_policy_number(file_path):
    # Extract just the filename from the path
    filename = os.path.basename(file_path)
    
    # Remove the file extension
    name_without_ext = os.path.splitext(filename)[0]
    
    # Split by underscore and take the last part
    parts = name_without_ext.split('_')
    if parts:
        return parts[-1]
    return None


In [3]:
policy_number = extract_policy_number("data/policy_file/sample_policy_doc_AU1234.pdf")
print(policy_number)

AU1234
