In [2]:
!pip install matplotlib

Collecting matplotlib
  Downloading matplotlib-3.9.1-cp39-cp39-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.2.1-cp39-cp39-macosx_11_0_arm64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.53.1-cp39-cp39-macosx_11_0_arm64.whl.metadata (162 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.6/162.6 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.5-cp39-cp39-macosx_11_0_arm64.whl.metadata (6.4 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.1.2-py3-none-any.whl.metadata (5.1 kB)
Collecting importlib-resources>=3.2.0 (from matplotlib)
  Downloading importlib_resources-6.4.0-py3-none-any.whl.metadata (3.9 kB)
Downloading matplotlib

In [4]:
!pip install llama_index

Collecting llama_index
  Using cached llama_index-0.10.55-py3-none-any.whl.metadata (11 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4 (from llama_index)
  Using cached llama_index_agent_openai-0.2.8-py3-none-any.whl.metadata (729 bytes)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama_index)
  Using cached llama_index_cli-0.1.12-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core==0.10.55 (from llama_index)
  Using cached llama_index_core-0.10.55-py3-none-any.whl.metadata (2.4 kB)
Collecting llama-index-embeddings-openai<0.2.0,>=0.1.5 (from llama_index)
  Using cached llama_index_embeddings_openai-0.1.10-py3-none-any.whl.metadata (604 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.2.0 (from llama_index)
  Using cached llama_index_indices_managed_llama_cloud-0.2.5-py3-none-any.whl.metadata (3.8 kB)
Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama_index)
  Using cached llama_index_legacy-0.9.48-py3-none-any.whl.metadata (8.5 kB)
Collecting ll

In [2]:
import os

import numpy as np

from llama_index.core import Document
from llama_index.core import SimpleDirectoryReader
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.node_parser import HierarchicalNodeParser
from llama_index.core.node_parser import get_leaf_nodes
from llama_index.core import StorageContext
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core import  load_index_from_storage
from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core import load_index_from_storage
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.schema import NodeWithScore
from copy import deepcopy
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core import QueryBundle
from llama_index.core.schema import TextNode, NodeWithScore
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.response.notebook_utils import display_response

In [3]:
def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)
    merging_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
    )
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        automerging_index = VectorStoreIndex(
            leaf_nodes, storage_context=storage_context, service_context=merging_context
        )
        automerging_index.storage_context.persist(persist_dir=save_dir)
    else:
        automerging_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=merging_context,
        )
    return automerging_index


def get_automerging_query_engine(
    automerging_index,
    similarity_top_k=12,
    rerank_top_n=6,
):
    base_retriever = automerging_index.as_retriever(similarity_top_k=similarity_top_k)
    retriever = AutoMergingRetriever(
        base_retriever, automerging_index.storage_context, verbose=True
    )
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )
    auto_merging_engine = RetrieverQueryEngine.from_args(
        retriever, node_postprocessors=[rerank]
    )
    return auto_merging_engine

In [3]:
documents = SimpleDirectoryReader(
    input_files=["/Users/rohit/Desktop/Juspay/data/Ec Headless - Android _ Juspay Developer Docs.pdf"]
).load_data()

In [4]:
document = Document(text="\n\n".join([doc.text for doc in documents]))

In [6]:
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [9]:
!pip install langchain_community

Collecting langchain_community
  Using cached langchain_community-0.2.7-py3-none-any.whl.metadata (2.5 kB)
Collecting langchain<0.3.0,>=0.2.7 (from langchain_community)
  Using cached langchain-0.2.9-py3-none-any.whl.metadata (6.9 kB)
Collecting langchain-core<0.3.0,>=0.2.12 (from langchain_community)
  Using cached langchain_core-0.2.21-py3-none-any.whl.metadata (6.0 kB)
Collecting langsmith<0.2.0,>=0.1.0 (from langchain_community)
  Using cached langsmith-0.1.92-py3-none-any.whl.metadata (13 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain<0.3.0,>=0.2.7->langchain_community)
  Using cached langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.12->langchain_community)
  Using cached jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.0->langchain_community)
  Downloading orjson-3.10.6-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_

In [7]:
from langchain_community.chat_models import ChatOllama
local_llm = 'llama3'
llm = ChatOllama(model=local_llm, temperature=0)

In [12]:
!pip install llama-index-llms-langchain

Collecting llama-index-llms-langchain
  Using cached llama_index_llms_langchain-0.1.4-py3-none-any.whl.metadata (751 bytes)
Collecting langchain<0.2.0,>=0.1.3 (from llama-index-llms-langchain)
  Using cached langchain-0.1.20-py3-none-any.whl.metadata (13 kB)
Collecting llama-index-llms-anyscale<0.2.0,>=0.1.1 (from llama-index-llms-langchain)
  Using cached llama_index_llms_anyscale-0.1.4-py3-none-any.whl.metadata (647 bytes)
Collecting langchain-community<0.1,>=0.0.38 (from langchain<0.2.0,>=0.1.3->llama-index-llms-langchain)
  Using cached langchain_community-0.0.38-py3-none-any.whl.metadata (8.7 kB)
Collecting langchain-core<0.2.0,>=0.1.52 (from langchain<0.2.0,>=0.1.3->llama-index-llms-langchain)
  Using cached langchain_core-0.1.52-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<0.1,>=0.0.1 (from langchain<0.2.0,>=0.1.3->llama-index-llms-langchain)
  Using cached langchain_text_splitters-0.0.2-py3-none-any.whl.metadata (2.2 kB)
Collecting packaging<24.0,>=23.

In [13]:
!pip install llama-index-embeddings-huggingface

Collecting llama-index-embeddings-huggingface
  Using cached llama_index_embeddings_huggingface-0.2.2-py3-none-any.whl.metadata (769 bytes)
Collecting huggingface-hub>=0.19.0 (from huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface)
  Using cached huggingface_hub-0.24.0-py3-none-any.whl.metadata (13 kB)
Collecting sentence-transformers>=2.6.1 (from llama-index-embeddings-huggingface)
  Using cached sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting filelock (from huggingface-hub>=0.19.0->huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface)
  Using cached filelock-3.15.4-py3-none-any.whl.metadata (2.9 kB)
Collecting minijinja>=1.0 (from huggingface-hub[inference]>=0.19.0->llama-index-embeddings-huggingface)
  Using cached minijinja-2.0.1-cp38-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl.metadata (8.8 kB)
Collecting transformers<5.0.0,>=4.34.0 (from sentence-transformers>=2.6.1->llama-index-embeddings-

In [8]:
sentence_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    node_parser=node_parser,
)

  sentence_context = ServiceContext.from_defaults(
  from .autonotebook import tqdm as notebook_tqdm


In [15]:
!pip install llama-index-llms-fireworks

Collecting llama-index-llms-fireworks
  Downloading llama_index_llms_fireworks-0.1.5-py3-none-any.whl.metadata (619 bytes)
Downloading llama_index_llms_fireworks-0.1.5-py3-none-any.whl (4.4 kB)
Installing collected packages: llama-index-llms-fireworks
Successfully installed llama-index-llms-fireworks-0.1.5


In [9]:
sentence_index = VectorStoreIndex.from_documents(
    [document], service_context=sentence_context
)

In [10]:
sentence_index.storage_context.persist(persist_dir="./sentence_index")

In [11]:
if not os.path.exists("./sentence_index"):
    sentence_index = VectorStoreIndex.from_documents(
        [document], service_context=sentence_context
    )

    sentence_index.storage_context.persist(persist_dir="./sentence_index")
else:
    sentence_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./sentence_index"),
        service_context=sentence_context
    )

In [12]:
postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

In [13]:
rerank = SentenceTransformerRerank(
    top_n=2, model="BAAI/bge-reranker-base"
)

In [14]:
sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=6, node_postprocessors=[postproc, rerank]
)

In [17]:
window_response = sentence_window_engine.query(
    "What is the limit for number of transactions?"
)

In [18]:
display_response(window_response)

**`Final Response:`** The number of transactions are capped at 200 per day.