In [None]:
# pip install googleapis-common-protos==1.56.2

In [None]:
# pip install protobuf==3.20.3

## **RAG with Langchain**

### Import Libraries

In [None]:
# Generic Libraries
import os
from IPython.display import display, Markdown

# Data Preparation Libraries
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings

# Data Retrieval libraries
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate

import google.generativeai as genai
from sentence_transformers import CrossEncoder

In [None]:
import pandas as pd
import numpy as np
import json

In [None]:
pd.set_option('display.max_colwidth', None)

### Load the keys

In [None]:
gemini_key = open("API/gemini_key", "r").read()
langSmith_key = open("API/langSmith_key", "r").read()

In [None]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = langSmith_key
os.environ["GOOGLE_API_KEY"] = gemini_key

### Load the document

In [None]:
source_data_folder = "documents"

loader = PyPDFDirectoryLoader(source_data_folder)
data_on_pdf = loader.load()

len(data_on_pdf)

### Split the document and form using RecursiveCharacterTextSplitter

In [None]:
# Partitioning the data. With a limited size (chunks) 
# and 200 characters of overlapping to preserve the context
text_splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n", "\n", ". ", " ", ""],
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(data_on_pdf)
# Number of Chunks generated
len(splits)

### Create a new embedding model `all-MiniLM-L6-v2`

In [None]:
# For the creation of the embeddings we will use Hugging Face
# https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
# You can use any other model
embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

### Create a VectorDB and store the documents split in it in the the embedded form

In [None]:
# Database folder path
path_db = "langchain_store" # @param {type:"string"}
#  Store the chunks in the DataBase
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=path_db)

### Create LLM and VectorStore retriever

In [None]:
retriever = vectorstore.as_retriever()

In [None]:
gen_config = genai.types.GenerationConfig(candidate_count=1)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",
                         temperature=0,
                         max_tokens=None,
                         timeout=None,
                         max_retries=2,
                         generation_config = gen_config,
                         api_key=gemini_key
                         )
llm

In [None]:
# https://smith.langchain.com/hub/rlm/rag-prompt
# prompt = hub.pull("rlm/rag-prompt")
# prompt

### Business logic to 
- extract the results using the Semantic search
- Rerank using the cross encoder

In [None]:
def get_store_results(question):
    search_kwargs = {"score_threshold":0.8,"k":10}
    docs = retriever.get_relevant_documents(query=question, search_kwargs=search_kwargs)
    return docs, question
    
def cross_encoder_ranking(docs, question):
    cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
    res_df = pd.DataFrame([t.__dict__ for t in docs])[["metadata", "page_content"]]
    res_df.metadata = res_df.metadata.apply(lambda x: str(x))
    cross_inputs = [[question, response.page_content] for response in docs]
    res_df["Reranks"] = cross_encoder.predict(cross_inputs)
    res_df = res_df.drop_duplicates()
    res_df = res_df.sort_values(by='Reranks', ascending=False)
    return res_df[["metadata", "page_content"]]
    

### Pipeline to run search and reranking

In [None]:
def results_runnable(question):
    # question = "What can you tell me about life insurance premiums? "
    docs, question = get_store_results(question)
    result = cross_encoder_ranking(docs, question)

    return result

In [None]:
question = "What are the age related conditions in the life insurance?"
temp_df = results_runnable(question)

In [None]:
temp_df

### Prompt for the LLM
- Input variables - `question` and `context`

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [("system", """
You are a highly skilled insurance expert tasked with answering user queries using the provided search results. These results are one or more pages from relevant insurance documents that contain the information needed to address the query.

You have a user query: '{question}'. The relevant search results are in the DataFrame '{context}'. The 'page_content' column contains the text from the policy documents, and the 'metadata' column contains the policy name and source page.
        **Your Task:**
        1. **Analyze the Query:** Carefully understand the user's intent and the specific information they are seeking.
        2. **Identify Relevant Documents:** Select the most pertinent documents from the search results based on their content and relevance to the query.
        3. **Extract Key Information:** Carefully extract the required information from the selected documents, ensuring accuracy and completeness.
        4. **Construct a Comprehensive Response:** Craft a clear, concise, and informative response that directly addresses the user's query.
        5. **Provide Citations:** Cite the specific policy names and page numbers where the information was found, using the following format:

            **[Policy Name], [Page Number]**

            **References:**
            * [Policy Name 1], [Page Number 1]
            * [Policy Name 2], [Page Number 2]
            * ...

        **Guidelines:**
        * **Accuracy:** Ensure that your response is factually correct and consistent with the information provided in the documents.
        * **Relevance:** Focus on the most relevant information and avoid providing unnecessary details.
        * **Clarity:** Use plain language and avoid technical jargon.
        * **Completeness:** Provide a comprehensive answer that covers all aspects of the user's query.
        * **Conciseness:** Be brief and to the point, while still providing sufficient detail.

        **Example Response:**
        > The maximum coverage for [policy type] is [amount], as stated in **[Policy Name], [Page Number]**.

            **References:**
            * **[Policy Name 1], [Page Number 1]**
            * **[Policy Name 2], [Page Number 2]**

        Important: Take the policy name and page number from metadata column only
        
        If you cannot find sufficient information to answer the query, indicate that and suggest possible alternative approaches or resources.
        """), ("human", "{question}")]
)


### RAG pipeline
- Search and rerank
- Generate the context
- Integrate the prompt
- Invoke the LLM
- Parse string output

In [None]:
rag_chain = ({"context" : results_runnable, "question" : RunnablePassthrough()}
             | prompt
             | llm
             | StrOutputParser()
            )

In [None]:
question = "What are the age related conditions in the life insurance?"
result = rag_chain.invoke(question)
display(Markdown(result))

## **RAG with LlamaIndex**

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.gemini import Gemini

from llama_index.core import Settings

from llama_index.core.prompts import PromptTemplate

from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core import get_response_synthesizer

from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

from sentence_transformers import CrossEncoder

In [None]:
import os
from IPython.display import display, Markdown
import pandas as pd

In [None]:
from huggingface_hub import login
hf_token = open("API/hf_token", "r").read()
login(token = hf_token)

In [None]:
google_api_key = open("API/gemini_key", "r").read()
os.environ["GOOGLE_API_KEY"] = google_api_key

In [None]:
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") # set the embedding model
Settings.llm = Gemini(model_name="models/gemini-1.5-pro")

In [None]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_dir="documents").load_data()

In [None]:
text_splitter = SentenceSplitter(chunk_size=1000, chunk_overlap=200)

# global
Settings.text_splitter = text_splitter

In [None]:
index = VectorStoreIndex.from_documents(documents, transformations=[text_splitter])
index.storage_context.persist(persist_dir="llamaIndex_store")

In [None]:
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="store")

# load index
index = load_index_from_storage(storage_context)

In [None]:
template = """
You are a knowledgeable and precise assistant specialized in question-answering tasks, 
particularly from academic and research-based sources. 
Your goal is to provide accurate, concise, and contextually relevant answers based on the given information.

Instructions:

Comprehension and Accuracy: Carefully read and comprehend the provided context from the research paper to ensure accuracy in your response.
Conciseness: Deliver the answer in no more than three sentences, ensuring it is concise and directly addresses the question.
Truthfulness: If the context does not provide enough information to answer the question, clearly state, "I don't know."
Contextual Relevance: Ensure your answer is well-supported by the retrieved context and does not include any information beyond what is provided.

Remember if no context is provided please say you don't know the answer
Here is the question and context for you to work with:

\nQuestion: {question} \nContext: {context} \nAnswer:

        **Your Task:**
        1. **Analyze the Query:** Carefully understand the user's intent and the specific information they are seeking.
        2. **Identify Relevant Documents:** Select the most pertinent documents from the search results based on their content and relevance to the query.
        3. **Extract Key Information:** Carefully extract the required information from the selected documents, ensuring accuracy and completeness.
        4. **Construct a Comprehensive Response:** Craft a clear, concise, and informative response that directly addresses the user's query.
        5. **Provide Citations:** Cite the specific policy names and page numbers where the information was found, using the following format:

            **[Policy Name], [Page Number]**

            **References:**
            * [Policy Name 1], [Page Number 1]
            * [Policy Name 2], [Page Number 2]
            * ...

        **Guidelines:**
        * **Accuracy:** Ensure that your response is factually correct and consistent with the information provided in the documents.
        * **Relevance:** Focus on the most relevant information and avoid providing unnecessary details.
        * **Clarity:** Use plain language and avoid technical jargon.
        * **Completeness:** Provide a comprehensive answer that covers all aspects of the user's query.
        * **Conciseness:** Be brief and to the point, while still providing sufficient detail.

        **Example Response:**
        > The maximum coverage for [policy type] is [amount], as stated in **[Policy Name], [Page Number]**.

            **References:**
            * **[Policy Name 1], [Page Number 1]**
            * **[Policy Name 2], [Page Number 2]**

        Important: Take the policy name and page number from metadata column only
        
        If you cannot find sufficient information to answer the query, indicate that and suggest possible alternative approaches or resources.
        """


prompt_tmpl = PromptTemplate(
    template=template,
    template_var_mappings={"query_str": "question", "context_str": "context"},
)

In [None]:
# configure retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=5,
)

# configure response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.55)]
)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template":prompt_tmpl}
)

In [None]:
question = "What are the cases of failure to pay premium?"
result = query_engine.query(question)

In [None]:
display(Markdown(result.response))