# Config

In [1]:
from hypster import HP, config

In [2]:
@config
def indexing_config(hp: HP):
    from haystack.components.converters import PyPDFToDocument

    converter = PyPDFToDocument()

    from haystack import Pipeline

    pipeline = Pipeline()
    pipeline.add_component("loader", converter)

    enrich_doc_w_llm = hp.select([True, False], default=True)
    if enrich_doc_w_llm:
        from haystack.components.builders import PromptBuilder

        from src.haystack_utils import AddLLMMetadata

        template = hp.text_input("""
        Please provide a one sentence summary of 15 words max what this document is about.
        Then add a list of 3-5 keywords, including acronyms, that will help to find this document using keywords.
        Context:
        {{ documents[0].content[:1000] }}
        Output format:

        Summary:
        Keywords:
        """)
        from hypster import load

        llm = load("configs/llm.py")
        llm_inputs = hp.propagate(llm)
        pipeline.add_component("prompt_builder", PromptBuilder(template=template))
        pipeline.add_component("llm", llm_inputs["llm"])
        pipeline.add_component("document_enricher", AddLLMMetadata())
        pipeline.connect("loader", "prompt_builder")
        pipeline.connect("prompt_builder", "llm")
        pipeline.connect("llm", "document_enricher")
        pipeline.connect("loader", "document_enricher")

        doc_source = "document_enricher"
    else:
        doc_source = "loader"

    from haystack.components.preprocessors import DocumentSplitter

    split_by = hp.select(["sentence", "word", "passage", "page"], default="sentence")
    splitter = DocumentSplitter(split_by=split_by, split_length=hp.int_input(10), split_overlap=hp.int_input(2))

    pipeline.add_component("splitter", splitter)
    pipeline.connect(doc_source, "splitter")


indexing_config.save("configs/indexing.py")

In [3]:
@config
def fast_embed(hp: HP):
    from typing import Any, Dict, List

    from fastembed import TextEmbedding

    def get_model_dim(chosen_model: str, model_list: List[Dict[str, Any]]) -> int:
        for model in model_list:
            if model["model"] == chosen_model:
                return model["dim"]
        raise ValueError(f"Model {chosen_model} not found in the list of supported models.")

    from haystack_integrations.components.embedders.fastembed import (
        FastembedDocumentEmbedder,
        FastembedTextEmbedder,
    )

    meta_fileds_to_embed = ["parent_doc_summary"]

    model = hp.select(
        {"bge-small": "BAAI/bge-small-en-v1.5", "mini-lm": "sentence-transformers/all-MiniLM-L6-v2"},
        default="mini-lm",
    )
    import os

    cpu_count = os.cpu_count() or 1
    doc_embedder = FastembedDocumentEmbedder(
        model=model,
        parallel=hp.int_input(cpu_count),
        meta_fields_to_embed=meta_fileds_to_embed,
    )
    text_embedder = FastembedTextEmbedder(model=model)
    embedding_dim = get_model_dim(model, TextEmbedding.list_supported_models())


fast_embed.save("configs/fast_embed.py")

In [4]:
@config
def jina_embed(hp: HP):
    from haystack_integrations.components.embedders.jina import JinaDocumentEmbedder, JinaTextEmbedder

    meta_fileds_to_embed = ["parent_doc_summary"]

    model = hp.select({"v3": "jina-embeddings-v3", "v2": "jina-embeddings-v2"}, default="v3")
    late_chunking = hp.select([True, False], default=True, name="late_chunking") if model == "v3" else False
    doc_embedder = JinaDocumentEmbedder(
        model=model,
        batch_size=hp.int_input(16),
        dimensions=hp.int_input(256),
        task="retrieval.passage",
        late_chunking=late_chunking,
        meta_fields_to_embed=meta_fileds_to_embed,
    )
    text_embedder = JinaTextEmbedder(model=model, dimensions=doc_embedder.dimensions, task="retrieval.query")
    embedding_dim = doc_embedder.dimensions


jina_embed.save("configs/jina_embed.py")

In [5]:
@config
def in_memory_retrieval(hp: HP):
    from haystack.components.retrievers.in_memory import InMemoryBM25Retriever, InMemoryEmbeddingRetriever
    from haystack.document_stores.in_memory import InMemoryDocumentStore

    embedding_similarity_function = hp.select(["cosine", "dot_product"], default="cosine")
    bm25_algorithm = hp.select(["BM25Okapi", "BM25L", "BM25Plus"], default="BM25L")
    document_store = InMemoryDocumentStore(
        embedding_similarity_function=embedding_similarity_function, bm25_algorithm=bm25_algorithm
    )

    from haystack.components.joiners.document_joiner import DocumentJoiner

    join_mode = hp.select(
        ["distribution_based_rank_fusion", "concatenate", "merge", "reciprocal_rank_fusion"],
        default="distribution_based_rank_fusion",
    )
    joiner = DocumentJoiner(join_mode=join_mode, top_k=hp.int_input(10))

    from haystack import Pipeline

    from src.haystack_utils import PassThroughDocuments, PassThroughText

    pipeline = Pipeline()
    pipeline.add_component("query", PassThroughText())
    pipeline.add_component("bm25_retriever", InMemoryBM25Retriever(document_store=document_store))
    pipeline.add_component("embedding_retriever", InMemoryEmbeddingRetriever(document_store=document_store))
    pipeline.add_component("document_joiner", joiner)
    pipeline.add_component("retrieved_documents", PassThroughDocuments())
    pipeline.connect("query", "bm25_retriever")
    pipeline.connect("bm25_retriever", "document_joiner")
    pipeline.connect("embedding_retriever", "document_joiner")
    pipeline.connect("document_joiner", "retrieved_documents")


in_memory_retrieval.save("configs/in_memory_retrieval.py")

In [6]:
@config
def qdrant_retrieval(hp: HP):
    from haystack_integrations.components.retrievers.qdrant import QdrantEmbeddingRetriever
    from haystack_integrations.document_stores.qdrant import QdrantDocumentStore

    location = hp.text_input(":memory:")
    embedding_similarity_function = hp.select(["cosine", "dot_product", "l2"], default="cosine")

    document_store = QdrantDocumentStore(
        location=location,
        recreate_index=True,
        similarity=embedding_similarity_function,
        embedding_dim = hp.int_input(256),
        on_disk=True,
    )

    embedding_retriever = QdrantEmbeddingRetriever(document_store=document_store, top_k=hp.int_input(20))

    from haystack import Pipeline

    from src.haystack_utils import PassThroughDocuments, PassThroughText

    pipeline = Pipeline()
    pipeline.add_component("query", PassThroughText())
    pipeline.add_component("embedding_retriever", embedding_retriever)
    pipeline.add_component("retrieved_documents", PassThroughDocuments())
    pipeline.connect("embedding_retriever", "retrieved_documents")


qdrant_retrieval.save("configs/qdrant_retrieval.py")

In [7]:
@config
def llm(hp: HP):
    anthropic_models = {"haiku": "claude-3-haiku-20240307", "sonnet": "claude-3-5-sonnet-20240620"}
    openai_models = {"gpt-4o-mini": "gpt-4o-mini", "gpt-4o": "gpt-4o", "gpt-4o-latest": "gpt-4o-2024-08-06"}
    model = hp.select({**anthropic_models, **openai_models}, default="gpt-4o-mini")
    from haystack.components.generators import OpenAIGenerator
    from haystack_integrations.components.generators.anthropic import AnthropicGenerator

    llm = AnthropicGenerator(model=model) if model in anthropic_models.values() else OpenAIGenerator(model=model)


llm.save("configs/llm.py")

In [8]:
@config
def reranker(hp: HP):
    jina_models = {
        "reranker-v2": "jina-reranker-v2-base-multilingual",
        "colbert-v2": "jina-colbert-v2",
        "reranker-v1": "jina-reranker-v1-base-en",
    }

    transformers_models = {
        "tiny-bert-v2": "cross-encoder/ms-marco-TinyBERT-L-2-v2",
        "minilm-v2": "cross-encoder/ms-marco-MiniLM-L-2-v2",
    }

    model = hp.select({**jina_models, **transformers_models}, default="reranker-v2")
    if model in jina_models.values():
        from haystack_integrations.components.rankers.jina import JinaRanker

        reranker = JinaRanker(model=model, top_k=hp.int_input(3))
    else:
        from haystack.components.rankers import TransformersSimilarityRanker

        reranker = TransformersSimilarityRanker(model=model, top_k=hp.int_input(3))


reranker.save("configs/reranker.py")

In [45]:
@config
def response_config(hp: HP):
    from hypster import load
    llm_config = load("configs/llm.py")
    response_llm = hp.propagate(llm_config)

    from textwrap import dedent

    from haystack import Pipeline
    from haystack.components.builders import PromptBuilder
    template = dedent("""
    Given the following information, answer the question in one short sentence, 
    using the information provided in the documents. 
    Add an exact quote from the documents that you based your answer on.
    Note: In some cases, only one or some of the documents will be relevant to the question.
    ========================================
    Context:
    {% for document in documents %}
        <Document {{ document.id }}>
        <LLM Extracted Information>
        {{ document.meta.llm_extracted_info }}
        <\LLM Extracted Information>
        <Document Content>
        {{ document.content }}
        <\Document Content>
        <\Document {{ document.id }}>
        \n
    {% endfor %}
    ========================================
    Question: {{question}}
    Chain of Thought:
    Answer:
    Quotes:
    """)

    pipeline = Pipeline()
    pipeline.add_component("prompt_builder", PromptBuilder(template=template))
    pipeline.add_component("llm", response_llm["llm"])
    pipeline.connect("prompt_builder", "llm")

response_config.save("configs/response.py")

In [48]:
@config
def hp_config(hp: HP):
    from hypster import load

    file_path = hp.text_input("data/raw/modular_rag.pdf")
    query = "What is the use of BERT in this document?"

    indexing_config = load("configs/indexing.py")
    indexing = hp.propagate(indexing_config)
    indexing_pipeline = indexing["pipeline"]

    fast_embed = load("configs/fast_embed.py")
    jina_embed = load("configs/jina_embed.py")
    embedder = hp.select({"fastembed": hp.propagate(fast_embed), "jina": hp.propagate(jina_embed)}, default="fastembed")
    indexing_pipeline.add_component("doc_embedder", embedder["doc_embedder"])

    qdrant = load("configs/qdrant_retrieval.py")
    in_memory = load("configs/in_memory_retrieval.py")
    document_store = hp.select(
        {"in_memory": hp.propagate(in_memory), "qdrant": hp.propagate(qdrant)}, default="in_memory"
    )

    from haystack.components.writers import DocumentWriter
    from haystack.document_stores.types import DuplicatePolicy

    document_writer = DocumentWriter(document_store["document_store"], policy=DuplicatePolicy.OVERWRITE)
    indexing_pipeline.add_component("document_writer", document_writer)

    indexing_pipeline.connect("splitter", "doc_embedder")
    indexing_pipeline.connect("doc_embedder", "document_writer")

    retrieval_pipeline = document_store["pipeline"]
    retrieval_pipeline.add_component("text_embedder", embedder["text_embedder"])
    retrieval_pipeline.connect("query", "text_embedder")
    retrieval_pipeline.connect("text_embedder", "embedding_retriever")

    from src.haystack_utils import PassThroughDocuments

    retrieval_pipeline.add_component("docs_for_generation", PassThroughDocuments())
    use_reranker = hp.select([True, False], default=True)
    if use_reranker:
        reranker_config = load("configs/reranker.py")
        reranker = hp.propagate(reranker_config)
        retrieval_pipeline.add_component("reranker", reranker["reranker"])
        retrieval_pipeline.connect("retrieved_documents", "reranker")
        retrieval_pipeline.connect("reranker", "docs_for_generation")
        retrieval_pipeline.connect("query", "reranker")
    else:
        retrieval_pipeline.connect("retrieved_documents", "docs_for_generation")

    response_config = load("configs/response.py")
    response = hp.propagate(response_config)
    response_pipeline = response["pipeline"]


In [62]:
inputs = hp_config(
    selections={
        "large_llm_model.model": "gpt-4o",
        "document_store": "in_memory",
        "document_store.bm25_algorithm": "BM25Plus",
        "indexing_inputs.llm_inputs.model": "gpt-4o-mini",
        "reranker.model": "tiny-bert-v2",
    },
    overrides={"indexing.splitter.split_length" : 4}
)

In [63]:
globals().update(inputs)

In [64]:
indexing_pipeline.warm_up()
file_paths = ["data/raw/modular_rag.pdf", "data/raw/enhancing_rag.pdf"]
for file_path in file_paths:
    indexing_pipeline.run({"loader": {"sources": [file_path]}})

Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 92385.55it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 60963.72it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 8090.86it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 106997.55it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 85948.85it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 61680.94it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 8185.60it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 9646.51it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 19152.07it/s]
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 7741.42it/s]
Calculating embeddings: 100%|██████████| 623/623 [00:15<00:00, 39.31it/s]
Calculating embeddings: 100%|██████████| 221/221 [00:02<00:00, 84.95it/s]


In [66]:
#retrieval_pipeline.show()

In [67]:
retrieval_pipeline.warm_up()

In [68]:
query = "What is the use of BERT or ColBERT in RAG?"
retrieval = retrieval_pipeline.run({"query": {"text": query}})

Calculating embeddings: 100%|██████████| 1/1 [00:00<00:00, 229.00it/s]


In [69]:
for doc in retrieval["docs_for_generation"]["documents"]:
    print(doc.content)
    print(doc.meta)
    print()

 The use of tensor core will help in optimization of inference speed in the 
encoding process in the RAG pipeline.  
 
In this upcoming section, the approach taken to survey optimization techniques  for RAG systems is outlined.  
 
Table  1: Literature  survey  summary  table  
 
Study  Focus  Approach  Key  Findings  
 
Pierre  et 
al. Query  
optimization  in 
RAG  BERT,  Orca2,  
prompt augmenter  Improved  accuracy,  
rel- 
evance,  and 
contextual  richness   
in  document  
retrieval  
Gao et al.
{'file_path': 'data/raw/enhancing_rag.pdf', 'llm_extracted_info': 'Summary:  \nThis document is an issue of the International Journal of Scientific Research in Engineering and Management.\n\nKeywords:  \nIJSREM, scientific research, engineering, management, SJIF', 'source_id': '2ca957eaedc6549878e65411ada94d966ac0fa4edc9f9b10d2577c10c8dcc11e', 'page_number': 4, 'split_id': 50, 'split_idx_start': 13456, '_split_overlap': [{'doc_id': '610eb77156aa51862cc3c87bf65da517e92e2fc4c7f921b12c63b7c

In [70]:
retrieval["docs_for_generation"]["documents"]

[Document(id=73ee2e2811f8605325948a50f94593eb365470a5eb4c4db6f7ab136ff26d88a0, content: ' The use of tensor core will help in optimization of inference speed in the 
 encoding process in the...', meta: {'file_path': 'data/raw/enhancing_rag.pdf', 'llm_extracted_info': 'Summary:  \nThis document is an issue of the International Journal of Scientific Research in Engineering and Management.\n\nKeywords:  \nIJSREM, scientific research, engineering, management, SJIF', 'source_id': '2ca957eaedc6549878e65411ada94d966ac0fa4edc9f9b10d2577c10c8dcc11e', 'page_number': 4, 'split_id': 50, 'split_idx_start': 13456, '_split_overlap': [{'doc_id': '610eb77156aa51862cc3c87bf65da517e92e2fc4c7f921b12c63b7cb848748c7', 'range': (66, 293)}, {'doc_id': '6703bfc8beae40c67e3853702e6a859631e714941ec09ce8ccdd313fc08918f5', 'range': (0, 280)}]}, score: 0.9821396470069885, embedding: vector of size 384),
 Document(id=d7f6628affb0ad7d3ba2f89658508f3728d3fb749782fd5c2d005bf9ecc726f8, content: ' Typical models
 include

In [71]:
#response_pipeline.show()

In [72]:
pipeline = inputs["response_pipeline"]
pipeline.warm_up()
response = pipeline.run(
    {"prompt_builder": {"question": query, "documents": retrieval["docs_for_generation"]["documents"]}},
    include_outputs_from={"prompt_builder"},
)

In [73]:
print(response["llm"]["replies"][0])

BERT or ColBERT is used in RAG to enhance retrieval effectiveness through hybrid approaches that combine sparse and dense retrievers. 

Quote: "Typical models include BERT structure PLMs, like ColBERT, and multi-task fine-tuned models like BGE [40] and GTE [41]."


In [74]:
response["llm"]["meta"]

[{'model': 'gpt-4o-mini-2024-07-18',
  'index': 0,
  'finish_reason': 'stop',
  'usage': {'completion_tokens': 64,
   'prompt_tokens': 943,
   'total_tokens': 1007,
   'completion_tokens_details': CompletionTokensDetails(audio_tokens=None, reasoning_tokens=0),
   'prompt_tokens_details': PromptTokensDetails(audio_tokens=None, cached_tokens=0)}}]