# Querying the benefits

In [60]:
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain.schema import Document
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI
from docling.chunking import HybridChunker
from docling.document_converter import DocumentConverter
from transformers import AutoTokenizer
import os


In [61]:
file_path = '/workspaces/Implementing-RAG/extracted_data/Guardian Vision Insurance.md'
converter = DocumentConverter()
docling_doc = converter.convert(source=file_path).document
   

In [64]:
def chunk_document(docling_doc, embed_model_id="sentence-transformers/all-MiniLM-L6-v2", max_tokens=1000):
    # Initialize the tokenizer and HybridChunker
    tokenizer = AutoTokenizer.from_pretrained(embed_model_id)
    chunker = HybridChunker(tokenizer=tokenizer, max_tokens=max_tokens)
    
    # Chunk the single document
    chunked_docs = []
    chunks = list(chunker.chunk(dl_doc=docling_doc))
    for i, chunk in enumerate(chunks):
        chunked_docs.append(Document(
            page_content=chunk.text,
           metadata={
            "chunk_id": i,
            "source_file": file_path,
            "token_count": len(tokenizer(chunk.text)["input_ids"])}
        ))
    return chunked_docs

In [65]:
embed_model_id = "sentence-transformers/all-MiniLM-L6-v2"

# Chunk the document
chunked_documents = chunk_document(docling_doc, embed_model_id)

# Print the chunks for debugging (optional)
for doc in chunked_documents:
    print("Chunk Content:", doc.page_content)
    print("Metadata:", doc.metadata)
    print("-" * 50)
    

Token indices sequence length is longer than the specified maximum sequence length for this model (617 > 512). Running this sequence through the model will result in indexing errors


Chunk Content: For just a few dollars a month, this coverage saves you money on optical wellness, as well as providing discounts on eyewear, contacts, and corrective vision services
· Extensive network of vision specialists and medical professionals
· Affordable coverage
· Quick and easy claim payments
Metadata: {'chunk_id': 0, 'source_file': '/workspaces/Implementing-RAG/extracted_data/Guardian Vision Insurance.md', 'token_count': 54}
--------------------------------------------------
Chunk Content: Option 1: Significant out-of-pocket savings available with your Full Feature plan by visiting one of VSP's network locations, including one of the largest private practice provider networks, Visionworks and contracted Pearle Vision locations.
Your Vision Plan, 1 = VSP Vision Premier. Your Vision Plan, 2 = VSP Vision Premier. Your Network is, 1 = VSP Network Signature Plan. Your Network is, 2 = . Copay, 1 = . Copay, 2 = . Exams Copay, 1 = $ 10. Exams Copay, 2 = . Materials Copay (waived for

In [66]:
# Loading the embedding model
from langchain.embeddings import HuggingFaceBgeEmbeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {"device": "cpu"}
embeddings = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs = model_kwargs)

In [67]:
# Creating vector store
from langchain.vectorstores.faiss import FAISS
vc_db = FAISS.from_documents(chunked_documents, embeddings)
vc_db = FAISS.load_local("vc_db_ragas", embeddings,allow_dangerous_deserialization=True)


In [68]:
# Retrieval
retriever = vc_db.as_retriever(search_kwargs={"k":3})
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x741031fe0a10>, search_kwargs={'k': 3})

In [76]:
# RAG pipeline
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

# Define LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [77]:
# Prompt template
template = """ Utilize the retrieved context below to answer each question. If you do not know the answer just say that you do not know it. Keep the response concise do not exceed 3 sentences.
Question: {question}
Context: {context}
"""

In [78]:
prompt = ChatPromptTemplate.from_template(template)
# Rag pipeline
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [79]:
from datasets import Dataset

questions = ['What is the copay for an eye exam in the VSP Network Signature Plan?',
        'What is the contact lens allowance for medically necessary lenses under the plan?',
        'How often are eye exams covered under the vision plan?',
        'What discount is provided for laser correction surgery?']


answers = []
contexts = []


In [80]:
for query in questions:
    answers.append(rag_chain.invoke(query))
    contexts.append([docs.page_content for docs in retriever.get_relevant_documents(query)])
    

AssertionError: 