# Retrieval Augmented Generation

## Import packages

In [None]:
# # linux
# sudo apt-get install poppler-utils tesseract-ocr-all

# # mac
# brew install poppler
# brew install tesseract --all-languages

In [None]:
import warnings


warnings.filterwarnings("ignore")

In [None]:
import os
import uuid
from typing import Dict, List, Optional, Tuple

import langchain
import numpy as np
import rootutils
from dotenv import find_dotenv, load_dotenv
from langchain.cache import RedisCache, RedisSemanticCache
from langchain.chains import RetrievalQA
from langchain.chat_models.base import BaseChatModel
from langchain.document_loaders import UnstructuredMarkdownLoader, UnstructuredPDFLoader, WikipediaLoader
from langchain.document_transformers import LongContextReorder
from langchain.embeddings import CacheBackedEmbeddings
from langchain.globals import set_llm_cache
from langchain.llms.base import BaseLLM
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain.retrievers import ContextualCompressionRetriever, MultiVectorRetriever
from langchain.retrievers.document_compressors import DocumentCompressorPipeline, LLMChainExtractor
from langchain.schema.output_parser import StrOutputParser
from langchain.storage import RedisStore
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Qdrant
from langchain_core.documents.base import Document
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.http import models
from redis import Redis
from tenacity import retry, stop_after_attempt, wait_exponential
from trulens_eval import Feedback, FeedbackMode, Select, Tru, TruChain
from trulens_eval.app import App
from trulens_eval.feedback import Groundedness, prompts

In [None]:
from trulens_eval.feedback.provider.openai import AzureOpenAI as fAzureOpenAI
from trulens_eval.feedback.v2.feedback import ClassVar, Relevance, WithPrompt
from unstructured.cleaners.core import clean

## Settings

In [None]:
# read local .env file
_ = load_dotenv(find_dotenv())

In [None]:
class debug_langchain:
    def __init__(self, enable: bool = True):
        self.enable = enable

    def __enter__(self):
        langchain.debug = True if self.enable else False

    def __exit__(self, exc_type, exc_val, exc_tb):
        langchain.debug = False

In [None]:
@retry(stop=stop_after_attempt(max_attempt_number=10), wait=wait_exponential(multiplier=1, min=10, max=20))
def call_tru_query_engine(recorder: TruChain, chain: BaseChatModel | BaseLLM, question: str) -> None:
    with recorder:
        _ = chain.invoke(question)

In [None]:
SEED = 42
SEPARATORS = ["\n\n", "\n", r"(?<=\. )", r"(?<=\! )", r"(?<=\? )", " ", ""]

In [None]:
path_to_root = rootutils.find_root(indicator=".project-root")
path_to_data = path_to_root / "data" / "my_documents"
path_to_db = path_to_root / "data" / "db"
path_to_questions = path_to_root / "data" / "questions"

path_to_db.mkdir(exist_ok=True, parents=True)
path_to_data.mkdir(exist_ok=True, parents=True)

In [None]:
llm_deployment_name = os.getenv("LLM_DEPLOYMENT_NAME")
embedding_deployment_name = os.getenv("EMBEDDING_DEPLOYMENT_NAME")

In [None]:
redis_url = os.getenv("REDIS_URL")

In [None]:
eval_questions_files_list = [
    "eval-questions.txt",
    # "eval-surzhik-questions.txt",
    # "eval-abstract-questions.txt",
]
eval_questions = []

for eval_question_file_name in eval_questions_files_list:
    with open(path_to_questions / eval_question_file_name, "r") as file:
        eval_questions.extend([line.strip() for line in file.readlines()])

In [None]:
db_name = "my-documents-db.sqlite"
tru = Tru(database_file=str(path_to_db / db_name))

tru.run_dashboard(force=True)
# tru.reset_database()

In [ ]:
class NoAnswerQuestionStatementRelevance(Relevance, WithPrompt):
    prompt: ClassVar[PromptTemplate] = PromptTemplate.from_template(
        """You are a RELEVANCE grader; providing the relevance of the given RESPONSE to the given PROMPT.
Respond only as a number from 0 to 10 where 0 is the least relevant and 10 is the most relevant.

A few additional scoring guidelines:

- Long RESPONSES should score equally well as short RESPONSES.

- Answers that intentionally do not answer the question, such as 'I don't know' and model refusals, should be counted as the NOT RELEVANT and get score of 0.

- Answers that do not answer the question, such as 'context does not provide information', should be counted as the NOT RELEVANT and get score of 0.

- RESPONSE must be relevant to the entire PROMPT to get a score of 10.

- RELEVANCE score should increase as the RESPONSE provides RELEVANT context to more parts of the PROMPT.

- RESPONSE that is RELEVANT to none of the PROMPT should get a score of 0.

- RESPONSE that is RELEVANT to some of the PROMPT should get as score of 2, 3, or 4. Higher score indicates more RELEVANCE.

- RESPONSE that is RELEVANT to most of the PROMPT should get a score between a 5, 6, 7 or 8. Higher score indicates more RELEVANCE.

- RESPONSE that is RELEVANT to the entire PROMPT should get a score of 9 or 10.

- RESPONSE that is RELEVANT and answers the entire PROMPT completely should get a score of 10.

- RESPONSE that confidently FALSE should get a score of 0.

- RESPONSE that is only seemingly RELEVANT should get a score of 0.

- Never elaborate.

PROMPT: {prompt}

RESPONSE: {response}

RELEVANCE: """
    )


class ExtendedAzureOpenAI(fAzureOpenAI):
    def no_answer_relevance_with_cot_reasons(
        self,
        prompt: str,
        response: str,
    ) -> float | Tuple[float, Dict]:
        """Uses chat completion Model. A function that completes a template to
        check the relevance of the response to a prompt. Also uses chain of
        thought methodology and emits the reasons.

        Args:
        ----
            prompt (str): A text prompt to an agent.
            response (str): The agent's response to the prompt.

        Returns:
        -------
            float: A value between 0 and 1. 0 being "not relevant" and 1 being
            "relevant".
        """
        system_prompt = str.format(
            NoAnswerQuestionStatementRelevance.prompt.template,
            prompt=prompt,
            response=response,
        )
        system_prompt = system_prompt.replace("RELEVANCE:", prompts.COT_REASONS_TEMPLATE)

        return self._extract_score_and_reasons_from_response(system_prompt)

In [None]:
def build_feedbacks(rag_chain: BaseChatModel | BaseLLM) -> List[Feedback]:
    feedbacks = []
    azureopenai = ExtendedAzureOpenAI(deployment_name=llm_deployment_name)

    # get context
    if not isinstance(rag_chain.retriever, ContextualCompressionRetriever):
        context = App.select_context(rag_chain)
    else:
        context = Select.Record.app.combine_documents_chain._call.args.inputs.input_documents[:].page_content

    # Question/answer relevance between overall question and answer.
    feedbacks.append(
        Feedback(
            azureopenai.relevance_with_cot_reasons,
            name="Answer Relevance",
        ).on_input_output()
    )

    feedbacks.append(
        Feedback(
            azureopenai.no_answer_relevance_with_cot_reasons,
            name="Answer Relevance (no answer)",
        ).on_input_output()
    )

    # Question/statement relevance between question and each context chunk.
    if context:
        feedbacks.append(
            Feedback(azureopenai.relevance_with_cot_reasons, name="Context Relevance")
            .on_input()
            # .on(context.collect())
            .on(context)
            .aggregate(np.max)
        )

    # is the response supported by the context
    if context:
        grounded = Groundedness(groundedness_provider=azureopenai)
        feedbacks.append(
            Feedback(
                grounded.groundedness_measure_with_cot_reasons,
                name="Groundedness",
            )
            .on(context)
            # .on(context.collect())
            .on_output()
            .aggregate(grounded.grounded_statements_aggregator)
        )

    return feedbacks

## Prepare data

+ unstructured PDFs: how to deal with tables, texts and images - [link](https://medium.com/@kbouziane.ai/harnessing-rag-for-text-tables-and-images-a-comprehensive-guide-ca4d2d420219)
+ issue with PDFs loaders - [link](https://github.com/langchain-ai/langchain/issues/13805)
+ langchain doc about unstructured PDFs (also includes postprocessing) - [link](https://python.langchain.com/docs/integrations/document_loaders/unstructured_file)
+ `AmazonTextractPDFParser` can be a greate option for PDFs

In [None]:
def describe_table(table: str) -> str:
    describe_prompt_text = """You are an assistant tasked with describing the table below. Provide detailed description of the information inside the table in human readable form. You can use bullet points, full sentences, or any other format you like. The description should be informative and very precise. Provide description in the language of the contents of the table. Table:\n{element} """
    # Try to limit your description to 200 words
    # Preferably, use the Ukrainian language for describing.

    model = AzureChatOpenAI(
        deployment_name=llm_deployment_name,
        temperature=0,
    )
    describe_chain = (
        {"element": lambda x: x} | ChatPromptTemplate.from_template(describe_prompt_text) | model | StrOutputParser()
    )
    described_tables = describe_chain.invoke(table)

    return described_tables

In [ ]:
def _process_document_elements(
    elements: List[Document],
) -> List[Document]:
    # TODO: generate additional documents with summary and hypothetical questions
    new_elements = []
    for element in elements:
        if element.metadata["category"] == "Table":
            new_element = element.copy()
            table_summary = describe_table(element.metadata["text_as_html"])
            new_element.page_content = f"{table_summary}"
            new_elements.append(new_element)
        else:
            new_elements.append(element)
    return new_elements


def _combine_elements(
    documents: List[Document],
    additional_metadata: Optional[dict] = None,
) -> Document:
    additional_metadata = additional_metadata or {}
    result = "\n\n".join([document.page_content for document in documents])
    combined_metadata = {k: str(v) for k, v in documents[0].metadata.items()}
    for document in documents[1:]:
        for k, v in document.metadata.items():
            v_str = str(v).strip()
            if k in combined_metadata and combined_metadata[k] != v_str:
                combined_metadata[k] += f", {v_str}"
            elif k not in combined_metadata:
                combined_metadata[k] = v_str

    combined_metadata.pop("category", None)
    combined_metadata.pop("text_as_html", None)
    combined_metadata.pop("languages", None)
    combined_metadata.update(additional_metadata)
    return Document(page_content=result, metadata=combined_metadata)

In [None]:
def load_pdf_document(
    file_path: str,
    additional_metadata: Optional[dict] = None,
) -> Document:
    elements = UnstructuredPDFLoader(
        file_path=file_path,
        mode="elements",
        post_processors=[clean],
        extract_images_in_pdf=False,
        infer_table_structure=True,
        chunking_strategy="by_title",
        max_characters=4000,
        new_after_n_chars=2800,
        combine_text_under_n_chars=1000,
        hi_res_model_name="yolox",
        languages=["eng", "ukr", "rus"],
    ).load()

    return _combine_elements(
        documents=_process_document_elements(elements=elements), additional_metadata=additional_metadata
    )

In [None]:
def load_md_document(
    file_path: str,
    additional_metadata: Optional[dict] = None,
) -> Document:
    elements = UnstructuredMarkdownLoader(
        file_path=file_path,
        mode="elements",
        post_processors=[clean],
        extract_images_in_pdf=False,
        infer_table_structure=True,
        chunking_strategy="by_title",
        max_characters=4000,
        new_after_n_chars=2800,
        combine_text_under_n_chars=1000,
        languages=["eng", "ukr", "rus"],
    ).load()

    return _combine_elements(
        documents=_process_document_elements(elements=elements), additional_metadata=additional_metadata
    )

In [None]:
# Load PDF
loaders = [
    WikipediaLoader(
        query="Розпізнавання іменованих сутностей", doc_content_chars_max=10000, load_max_docs=2, lang="uk"
    ),
    WikipediaLoader(query="Дід Панас", doc_content_chars_max=10000, load_max_docs=1, lang="uk"),
    WikipediaLoader(query="Grandpa Panas", doc_content_chars_max=10000, load_max_docs=1, lang="en"),
]

docs = []
for loader in loaders:
    docs.append(_combine_elements(documents=loader.load(), additional_metadata={"app_name": "main"}))

for file_name in os.listdir(path_to_data):
    if file_name.endswith(".pdf"):
        docs.append(
            load_pdf_document(file_path=str(path_to_data / file_name), additional_metadata={"app_name": "main"})
        )

for file_name in os.listdir(path_to_data):
    if file_name.endswith(".md"):
        docs.append(load_md_document(file_path=str(path_to_data / file_name), additional_metadata={"app_name": "main"}))

In [None]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    encoding_name="cl100k_base",
    chunk_size=768,  # 768
    chunk_overlap=128,  # 128
    separators=SEPARATORS,
)

In [None]:
splits = text_splitter.split_documents(docs)

In [None]:
len(splits)

## Create embeddings and fill vector store

In [None]:
embeddings = AzureOpenAIEmbeddings(deployment=embedding_deployment_name)

In [None]:
url = os.getenv("QDRANT_URL")
# collection_name = os.getenv("DOCUMENTS_COLLECTION_NAME")
collection_name = "my_custom_documents"

In [None]:
qdrant = Qdrant.from_documents(
    documents=splits,
    embedding=embeddings,
    url=url,
    collection_name=collection_name,
    force_recreate=True,
)

## Prepare the document store and LLM

In [ ]:
doc_store = Qdrant(
    client=QdrantClient(url=url),
    collection_name=collection_name,
    embeddings=embeddings,
)

In [ ]:
temperature = 0.1
llm = AzureChatOpenAI(
    deployment_name=llm_deployment_name,
    temperature=temperature,
)

## Try out the search

In [None]:
query = "Що таке розпізнавання іменованих сутностей?"
found_docs = qdrant.similarity_search(query)
found_docs[0]

In [None]:
query = "Хто такий дід Панас?"
found_docs = qdrant.max_marginal_relevance_search(query, k=2, fetch_k=10)
found_docs[0]

## Setup Redis cache

In [ ]:
# cached_embedding = CacheBackedEmbeddings.from_bytes_store(
#     embeddings, RedisStore(redis_url=redis_url), namespace=embeddings.model,
# )

In [None]:
# set_llm_cache(
#     RedisSemanticCache(redis_url=redis_url, embedding=embeddings, score_threshold=0.05)
# )

In [None]:
# set_llm_cache(
#     RedisCache(redis_=Redis.from_url(url=redis_url))
# )

In [None]:
# llm1 = AzureChatOpenAI(
#     deployment_name=llm_deployment_name,
# )

In [None]:
# %%time
# # The first time, it is not yet in cache, so it should take longer
# llm1.invoke("Tell me a joke")

In [None]:
# %%time
# # The second time, while not a direct hit, the question is semantically similar to the original question,
# # so it uses the cached result!
# llm1.invoke("Tell me one joke")

## Create simple RAG chain with map_rerank chain type

In [None]:
# system_message = """"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Answer the questions in the language of questions.
#
# In addition to giving an answer, also return a score of how fully it answered the user's question. This should be in the following format:
#
# Question: [question here]
# Helpful Answer: [answer here]
# Score: [score between 0 and 100]
#
# How to determine the score:
# - Higher is a better answer
# - Better responds fully to the asked question, with sufficient level of detail
# - If you do not know the answer based on the context, that should be a score of 0
# - Don't be overconfident!
#
# Example #1
#
# Context:
# ---------
# Apples are red
# ---------
# Question: what color are apples?
# Helpful Answer: red
# Score: 100
#
# Example #2
#
# Context:
# ---------
# it was night and the witness forgot his glasses. he was not sure if it was a sports car or an suv
# ---------
# Question: якого типу була машина?
# Helpful Answer: спортивний автомобіль або позашляховик
# Score: 60
#
# Example #3
#
# Context:
# ---------
# Pears are either red or orange
# ---------
# Question: what color are apples?
# Helpful Answer: This document does not answer the question
# Score: 0
#
# Begin!
#
# Context:
# ---------
# {context}
# ---------
# Question: {question}
# Helpful Answer:"""

In [None]:
# system_message_prompt = PromptTemplate(
#     template=system_message,
#     input_variables=["context", "question"],
#     output_keys=['answer', 'score'],
#     output_parser=RegexParser(regex='(.*?)\\nScore: (\\d*)', output_keys=['answer', 'score']),
# )

In [None]:
chain_type = "map_rerank"  # "stuff", "map_reduce", "map_rerank", and "refine".
search_type = "similarity"
k = 6

pipeline_compressor = DocumentCompressorPipeline(
    transformers=[
        LongContextReorder(),
    ]
)

retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor,
    base_retriever=doc_store.as_retriever(
        search_type=search_type, search_kwargs={"k": k, "filter": {"app_name": ["main"]}}
    ),
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=retriever,
    return_source_documents=False,
)

In [None]:
metadata = {"temperature": temperature, "search_type": search_type, "chain_type": chain_type}
app_id = f'RAG {", ".join([f"{str(key)}: {str(value)}" for key, value in metadata.items()])}'

tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=build_feedbacks(rag_chain=qa_chain),
    metadata=metadata,
    feedback_mode=FeedbackMode.WITH_APP,
)

In [None]:
for question in eval_questions:
    call_tru_query_engine(recorder=tru_recorder, chain=qa_chain, question=question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.head()

## Create simple RAG chain with stuff chain type

In [None]:
chain_type = "stuff"  # "stuff", "map_reduce", "map_rerank", and "refine".
search_type = "similarity"
k = 6

pipeline_compressor = DocumentCompressorPipeline(
    transformers=[
        LongContextReorder(),
    ]
)

retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor,
    base_retriever=doc_store.as_retriever(
        search_type=search_type,
        search_kwargs={"k": k, "filter": {"app_name": ["main"]}},
    ),
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=retriever,
    return_source_documents=False,
)

In [ ]:
# chain_type = "stuff"  # "stuff", "map_reduce", "map_rerank", and "refine".
# search_type = "mmr"
# k = 6
# fetch_k = 12
#
# pipeline_compressor = DocumentCompressorPipeline(
#     transformers=[
#         LongContextReorder(),
#     ]
# )
#
# retriever = ContextualCompressionRetriever(
#     base_compressor=pipeline_compressor,
#     base_retriever=doc_store.as_retriever(search_type=search_type, search_kwargs={"k": k, 'fetch_k': fetch_k}),
# )
#
# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     chain_type=chain_type,
#     retriever=retriever,
#     return_source_documents=False,
# )

In [None]:
metadata = {"temperature": temperature, "search_type": search_type, "chain_type": chain_type}
app_id = f'RAG {", ".join([f"{str(key)}: {str(value)}" for key, value in metadata.items()])}'

tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=build_feedbacks(rag_chain=qa_chain),
    metadata=metadata,
    feedback_mode=FeedbackMode.WITH_APP,
)

In [None]:
for question in eval_questions:
    call_tru_query_engine(recorder=tru_recorder, chain=qa_chain, question=question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.head()

## Modify the chain to use custom prompt in English

In [None]:
system_template = """You are helpful assistant that use the following pieces of context to answer the user's question. If you don't know the answer and piece of answer can't be extracted from the context, just say that you don't know, don't try to make up an answer. Answer the questions in the language of questions. You can edit the context so that the answer looks more attractive to the user, but at the same time the essence and content do not change. It should look like a standalone answer on the question without mentioning the context and where the answer was found. Do not say you used the context to answer the question. You can ask clarifying questions if you need more information to answer the user's question, but try to avoid it.
----------------
{context}"""

human_template = "{question}"

messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template(human_template),
]

In [None]:
chain_type = "stuff"  # "stuff", "map_reduce", "map_rerank", and "refine".
search_type = "similarity"
k = 6

pipeline_compressor = DocumentCompressorPipeline(
    transformers=[
        LongContextReorder(),
    ]
)

retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor,
    base_retriever=doc_store.as_retriever(
        search_type=search_type,
        search_kwargs={"k": k, "filter": {"app_name": ["main"]}},
    ),
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": ChatPromptTemplate.from_messages(messages)} if chain_type == "stuff" else None,
)

In [None]:
metadata = {"custom_prompt": True, "temperature": temperature, "search_type": search_type, "chain_type": chain_type}
app_id = f'RAG {", ".join([f"{str(key)}: {str(value)}" for key, value in metadata.items()])}'

tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=build_feedbacks(rag_chain=qa_chain),
    metadata=metadata,
    feedback_mode=FeedbackMode.WITH_APP,
)

In [None]:
for question in eval_questions:
    call_tru_query_engine(recorder=tru_recorder, chain=qa_chain, question=question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.head()

## Modify the chain to use custom prompt in English and context compression

In [None]:
custom_retriever_prompt = """Given the following question and context, extract any part of the context AS IS that is relevant to answer the question. Remember, DO NOT edit the extracted parts of the context.
Question: {question}
Context: {context}
Helpful Answer:
"""
custom_retriever_prompt_template = PromptTemplate(
    template=custom_retriever_prompt, input_variables=["question", "context"]
)

In [None]:
chain_type = "stuff"  # "stuff", "map_reduce", "map_rerank", and "refine".
search_type = "similarity"
k = 5

base_retriever = doc_store.as_retriever(
    search_type=search_type,
    search_kwargs={"k": k, "filter": {"app_name": ["main"]}},
)

base_compressor = DocumentCompressorPipeline(
    transformers=[LongContextReorder(), LLMChainExtractor.from_llm(llm=llm, prompt=custom_retriever_prompt_template)]
)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=base_compressor,
    base_retriever=base_retriever,
    k=k,
)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=compression_retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": ChatPromptTemplate.from_messages(messages)} if chain_type == "stuff" else None,
)

In [None]:
metadata = {"custom_prompt": True, "temperature": temperature, "search_type": search_type, "chain_type": chain_type}
app_id = f'RAG with compression {", ".join([f"{str(key)}: {str(value)}" for key, value in metadata.items()])}'

tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=build_feedbacks(rag_chain=qa_chain),
    metadata=metadata,
    feedback_mode=FeedbackMode.WITH_APP,
)

In [None]:
for question in eval_questions:
    call_tru_query_engine(recorder=tru_recorder, chain=qa_chain, question=question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.head()

## Multivector Retrieval

In [None]:
id_key = "doc_id"
url = os.getenv("QDRANT_URL")
# collection_name = os.getenv("DOCUMENTS_COLLECTION_NAME")
collection_name = "my_custom_documents_child"

In [None]:
qdrant_client = QdrantClient(url=url)

In [None]:
qdrant_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=1536,
        distance=models.Distance.COSINE,
    ),
)

In [None]:
embeddings = AzureOpenAIEmbeddings(deployment=embedding_deployment_name)

In [None]:
temperature = 0.0
llm = AzureChatOpenAI(
    deployment_name=llm_deployment_name,
    temperature=temperature,
)

In [None]:
parent_store = RedisStore(
    redis_url=redis_url,
    namespace="parent-document-store",
)
child_vectorstore = Qdrant(
    client=qdrant_client,
    collection_name=collection_name,
    embeddings=embeddings,
)

In [None]:
k = 6
search_type = "similarity"

multi_vector_retriever = MultiVectorRetriever(
    vectorstore=child_vectorstore,
    byte_store=parent_store,
    id_key=id_key,
    search_type=search_type,
    search_kwargs={"k": k, "filter": {"app_name": ["main"]}},
)

In [None]:
doc_ids_map = {str(uuid.uuid4()): split for split in splits}
doc_ids = list(doc_ids_map.keys())

### Create a child collection: chunking

In [None]:
# The splitter to use to create smaller chunks
child_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=256,
    chunk_overlap=64,
    separators=SEPARATORS,
)

In [None]:
sub_docs = []
for i, doc in enumerate(splits):
    _id = doc_ids[i]
    _sub_docs = child_text_splitter.split_documents([doc])
    for _doc in _sub_docs:
        _doc.metadata[id_key] = _id
        _doc.metadata["app_name"] = doc_ids_map[doc_ids[i]].metadata["app_name"]
    sub_docs.extend(_sub_docs)

In [None]:
multi_vector_retriever.vectorstore.add_documents(sub_docs)
multi_vector_retriever.docstore.mset(list(zip(doc_ids, splits)))

### Create simple RAG chain with stuff chain type

In [None]:
chain_type = "stuff"  # "stuff", "map_reduce", "map_rerank", and "refine".

pipeline_compressor = DocumentCompressorPipeline(
    transformers=[
        LongContextReorder(),
    ]
)

retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor,
    base_retriever=multi_vector_retriever,
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": ChatPromptTemplate.from_messages(messages)} if chain_type == "stuff" else None,
)

In [None]:
metadata = {"temperature": temperature, "search_type": search_type, "chain_type": chain_type}
app_id = (
    "RAG multivector chunking (256, 64) and custom prompt"
    f" {', '.join([f'{str(key)}: {str(value)}' for key, value in metadata.items()])}"
)

tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=build_feedbacks(rag_chain=qa_chain),
    metadata=metadata,
    feedback_mode=FeedbackMode.WITH_APP,
)

In [None]:
for question in eval_questions:
    call_tru_query_engine(recorder=tru_recorder, chain=qa_chain, question=question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.head()

In [ ]:
# t, _ =  tru.get_records_and_feedback(app_ids=[app_id])
# t = t[['input', 'output', 'Answer Relevance', 'Answer Relevance (no answer)', 'Context Relevance', 'Groundedness']]
# t['output'] = t['output'].apply(lambda x: x.encode().decode('unicode_escape'))
# t['input'] = t['input'].apply(lambda x: x.encode().decode('unicode_escape'))
# # t.to_excel(path_to_data.parent / 'test-results' / 'custom_promt_stuff_full.xlsx', index=False)
# t.to_excel(path_to_data.parent / 'test-results' / 'multivector_chunking_256_64_full.xlsx', index=False)

### Create a child collection: summary

In [None]:
chain = (
    {"doc": lambda x: x.page_content}
    | ChatPromptTemplate.from_template("Summarize the following document:\n\n{doc}")
    | AzureChatOpenAI(deployment_name=llm_deployment_name)
    | StrOutputParser()
)

In [None]:
summaries = chain.batch(splits, {"max_concurrency": 5})

In [None]:
summary_docs = [
    Document(page_content=s, metadata={id_key: doc_ids[i], "app_name": doc_ids_map[doc_ids[i]].metadata["app_name"]})
    for i, s in enumerate(summaries)
]

In [None]:
multi_vector_retriever.vectorstore.add_documents(summary_docs)
multi_vector_retriever.docstore.mset(list(zip(doc_ids, splits)))

### Create simple RAG chain with stuff chain type

In [None]:
chain_type = "stuff"  # "stuff", "map_reduce", "map_rerank", and "refine".

pipeline_compressor = DocumentCompressorPipeline(
    transformers=[
        LongContextReorder(),
    ]
)

retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor,
    base_retriever=multi_vector_retriever,
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type=chain_type,
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": ChatPromptTemplate.from_messages(messages)} if chain_type == "stuff" else None,
)

In [None]:
metadata = {"temperature": temperature, "search_type": search_type, "chain_type": chain_type}
app_id = (
    "RAG multivector summary and custom prompt"
    f" {', '.join([f'{str(key)}: {str(value)}' for key, value in metadata.items()])}"
)

tru_recorder = TruChain(
    qa_chain,
    app_id=app_id,
    feedbacks=build_feedbacks(rag_chain=qa_chain),
    metadata=metadata,
    feedback_mode=FeedbackMode.WITH_APP,
)

In [None]:
for question in eval_questions:
    call_tru_query_engine(recorder=tru_recorder, chain=qa_chain, question=question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_id])
records.head()

### Create a child collection: hypothetical questions

In [None]:
# functions = [
#     {
#         "name": "hypothetical_questions",
#         "description": "Generate hypothetical questions",
#         "parameters": {
#             "type": "object",
#             "properties": {
#                 "questions": {
#                     "type": "array",
#                     "items": {"type": "string"},
#                 },
#             },
#             "required": ["questions"],
#         },
#     }
# ]

In [None]:
# from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser
# from langchain_openai import ChatOpenAI
#
# chain = (
#     {"doc": lambda x: x.page_content}
#     # Only asking for 3 hypothetical questions, but this could be adjusted
#     | ChatPromptTemplate.from_template(
#         "Generate a list of exactly 3 hypothetical questions that the below document could be used to answer:\n\n{doc}"
#     )
#     | AzureChatOpenAI(
#         deployment_name=llm_deployment_name,
#     ).bind(
#         functions=functions, function_call={"name": "hypothetical_questions"}
#     )
#     | JsonKeyOutputFunctionsParser(key_name="questions")
# )

In [None]:
# chain.invoke(splits[0])

In [None]:
# hypothetical_questions = chain.batch(docs, {"max_concurrency": 5})

In [None]:
# question_docs = []
# for i, question_list in enumerate(hypothetical_questions):
#     question_docs.extend(
#         [Document(page_content=s, metadata={id_key: doc_ids[i], "app_name": doc_ids_map[doc_ids[i]].metadata['app_name']}) for s in question_list]
#     )

In [None]:
# multi_vector_retriever.vectorstore.add_documents(question_docs)
# multi_vector_retriever.docstore.mset(list(zip(doc_ids, docs)))

### Create simple RAG chain with stuff chain type