In [None]:
%%capture
!pip install llama-index==0.10.37 openai==1.30.1 llama-index-embeddings-openai==0.1.9 qdrant-client==1.9.1 llama-index-vector-stores-qdrant==0.2.8 llama-index-llms-oepnai==0.1.9

In [None]:
import os
import sys
from getpass import getpass
import nest_asyncio

from IPython.display import Markdown, display

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv("")

sys.path.append('../helpers')

from utils import setup_llm, setup_embed_model, setup_vector_store

In [None]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OPENAI_API_KEY key: ")

In [None]:
QDRANT_URL = os.environ['QDRANT_URL'] or getpass("Enter your Qdrant URL:")

In [None]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

In [None]:
from llama_index.core.settings import Settings
from utils import setup_llm, setup_embed_model

setup_llm(
    provider="openai",
    api_key=OPENAI_API_KEY, 
    model="gpt-4o", 
    temperature=0.75, 
    system_prompt="""Use ONLY the provided context and generate a complete, coherent answer to the user's query. 
    Your response must be grounded in the provided context and relevant to the essence of the user's query.
    """
    )

setup_embed_model(
    provider="openai", 
    model="text-embedding-3-small",
    api_key=OPENAI_API_KEY)

In [None]:
from utils import get_documents_from_docstore

senpai_documents = get_documents_from_docstore("../data/words-of-the-senpais")

In [None]:
print(senpai_documents[42].text)

# 🔹→🔷 Small to Big Retrieval ◾️ → ⬛️

The concept of small to big retrieval, also known as recursive retrieval, is a key part of LlamaIndex. And, in order to use this, we need to define how to efficiently retrieve relevant context from an index based on a query. That means defining a recursive retrieval strategy, post processing the nodes once they've been retrieved and synthsizing the responses. 

 1) 🔄 **Recursive Retrieval**

  - **Small Chunks (Child Chunks)**: Initially retrieves smaller, query-specific chunks of data.

  - **Big Chunks (Parent Chunks)**: Follows references to larger, contextual chunks related to the smaller chunks. Retains context within each chunk.

  2) 🛠️ **Node Postprocessing:** Apply transformations, filtering, or re-ranking to the retrieved nodes to enhance data quality and relevance.
  
  3) 📝 **Response Synthesizer:** Use the retrieved text chunks along with the user query to generate a response


## [🪟`SentenceWindowNodeParser`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/node_parser/text/sentence_window.py)

The `SentenceWindowNodeParser` is unique in that it focuses on individual sentences while also capturing the surrounding context.  This is particularly useful for tasks where understanding the broader context of a sentence is useful.

### How it Works

1. **Sentence Splitting:** 

    *   Similar to `SentenceSplitter`, it first divides the document into individual sentences using a sentence tokenizer (defaults to [`PunktSentenceTokenizer`](https://www.nltk.org/api/nltk.tokenize.PunktSentenceTokenizer.html) from the `nltk` library).

2. **Window Creation:**

    *   For each sentence (node), it gathers a "window" of surrounding sentences based on the specified `window_size`. 

    *   This window is stored in the node's metadata under the `window_metadata_key`.

3. **Metadata Management:**

    *   The original sentence text is also stored in the metadata under `original_text_metadata_key`.

    *   Importantly, both the window and original text are excluded from being seen by the embedding model and LLM.

### Arguments you need to know

*   **`window_size`**: Controls the number of sentences to include before and after the central sentence in the window.

*   **`window_metadata_key`**: The key used to store the window text in the node's metadata.

*   **`original_text_metadata_key`**: The key used to store the original sentence text in the metadata.

*   **`sentence_splitter`**: The text splitter to use when splitting documents (defaults to [`PunktSentenceTokenizer`](https://www.nltk.org/api/nltk.tokenize.PunktSentenceTokenizer.html) from the `nltk` library).

### Usage Example

```python
from llama_index.core.node_parser import SentenceWindowNodeParser

parser = SentenceWindowNodeParser(window_size=2)

nodes = parser.get_nodes_from_documents(documents)
```

### When to Use `SentenceWindowNodeParser`

*   **Tasks requiring sentence-level understanding with context:** 
    *   Question answering, summarization, or sentiment analysis where the surrounding sentences provide valuable context.

*   **Fine-grained control over embedding scope:** 
    *   Creating embeddings that focus on the specific meaning of a sentence within its local context.
    
*   **Combining with MetadataReplacementNodePostProcessor:**
    *   Replacing the original sentence with its surrounding window before sending it to the LLM, allowing the model to consider the broader context.


In [None]:
senpai_documents[42].__dict__

In [None]:
from llama_index.core.node_parser import SentenceWindowNodeParser

example_parsed = SentenceWindowNodeParser(window_size=2).build_window_nodes_from_documents([senpai_documents[42]])

In [None]:
example_parsed[3].__dict__

In [None]:
example_parsed_2 = SentenceWindowNodeParser(window_size=3).get_nodes_from_documents([senpai_documents[42]])

In [None]:
example_parsed_2[3].__dict__

### 🔄 **Understanding the `MetadataReplacementPostProcessor` and `SentenceWindowNodeParser`**

- 📝 **`SentenceWindowNodeParser` Review**

  - **Single Sentence Parsing**: Parses documents into nodes, each containing a single sentence.

  - **Contextual Window**: Each node includes a "window" of sentences surrounding the core sentence for added context.

- 🔄 **[`MetadataReplacementPostProcessor`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/postprocessor/metadata_replacement.py)**

  - **Context Enhancement**: Replaces the sentence in each node with its surrounding window of sentences during retrieval.

  - **Used in Conjunction**: Often paired with the `SentenceWindowNodeParser` to maximize contextual data provided to the LLM (Language Learning Model).

### Query and Response Process

- 🔍 **Query Handling**

  - **Sentence Retrieval**: Retrieves the most relevant sentences based on the query.

  - **Context Injection**: Instead of merely returning these sentences, the post-processor injects the surrounding context from the window.

- 📊 **Benefits of Enhanced Context**

  - **Improved Understanding**: More context helps the LLM understand queries better, leading to more accurate responses.

  - **Detailed Responses**: The additional context allows for responses that are both detailed and relevant.

- 🌟 **Ideal for Large Documents**

  - **Fine-Grained Retrieval**: Especially useful for large documents or indexes, enabling more precise information extraction.

<img src="https://miro.medium.com/v2/resize:fit:2000/0*JKZ9m_c6jyIKqCWu.png">

Image Source: [Ivan Ilin](https://pub.towardsai.net/advanced-rag-techniques-an-illustrated-overview-04d193d8fec6)

In [None]:
from llama_index.core.node_parser import SentenceWindowNodeParser

def sentence_window_splitter(window_size, documents):
    splitter = SentenceWindowNodeParser(
        window_size=window_size,
        window_metadata_key="window_size",
        original_text_metadata_key="original_text",
        )
    nodes = splitter.get_nodes_from_documents(documents)
    return nodes

In [None]:
nodes = sentence_window_splitter(window_size=5, documents=senpai_documents)

In [None]:
nodes[5].__dict__

In [None]:
print(nodes[5].get_content(metadata_mode="all"))

In [None]:
print(nodes[5].get_content(metadata_mode="llm"))

## 👷🏽‍♂️ 🗂️ Ingest to Qdrant And Build the Index

In [None]:
from llama_index.core import StorageContext
from llama_index.core.settings import Settings

from utils import create_index, create_query_engine
from utils import setup_vector_store

COLLECTION_NAME = "wots-small-to-big-sentence-window"

sentence_window_vector_store = setup_vector_store(QDRANT_URL, QDRANT_API_KEY, COLLECTION_NAME)


In [None]:
from utils import ingest

transforms = [Settings.embed_model]

split_nodes = ingest(
    documents=nodes,
    transformations=transforms,
    vector_store=sentence_window_vector_store
)

### 🛠️ Setup Query Engine

In [None]:
from llama_index.core import PromptTemplate
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

from utils import create_query_engine
from prompts import HYPE_ANSWER_GEN_PROMPT

HYPE_ANSWER_GEN_PROMPT_TEMPLATE = PromptTemplate(HYPE_ANSWER_GEN_PROMPT)

node_postprocessors = [MetadataReplacementPostProcessor(target_metadata_key="window")]

sentence_window_index = create_index(
    from_where="vector_store",
    vector_store=sentence_window_vector_store,
    embed_model=Settings.embed_model,
    )

sentence_window_query_engine = create_query_engine(
    index=sentence_window_index, 
    mode="query",
    response_mode="compact",
    similiarty_top_k=5,
    vector_store_query_mode="mmr", 
    vector_store_kwargs={"mmr_threshold": 0.42},
    node_postprocessors=node_postprocessors
    )

sentence_window_query_engine.update_prompts({'response_synthesizer:text_qa_template':HYPE_ANSWER_GEN_PROMPT_TEMPLATE})

### 🔧 Setup Query Pipeline

In [None]:
from utils import create_query_pipeline

from llama_index.core.query_pipeline import InputComponent

input_component = InputComponent()

sentence_window_chain = [input_component, sentence_window_query_engine]

sentence_window_query_pipeline = create_query_pipeline(sentence_window_chain)

In [None]:
sentence_window_query_pipeline.run(input="How can I effectively build strength across multiple facets of real life without relying on complicated machines?")

In [None]:
sentence_window_query_pipeline.run(input="How can I set rules and speak honestly without worrying about hurting someone's feelings?")

# 👨‍👦 Smaller Child Chunks Referring to Bigger Parent Chunk

<img src="https://miro.medium.com/v2/resize:fit:2000/0*x4rMd50GP99OSDuo.png"  width="70%">

Source: [Ivan Ilin](https://pub.towardsai.net/advanced-rag-techniques-an-illustrated-overview-04d193d8fec6)

🔗 **Chunk References Explained:**

- 🧩 **Concept**: Chunk References involve smaller chunks of data pointing to larger parent chunks, forming a hierarchical graph structure.
  
- 🌐 **Purpose**: This method is utilized in recursive retrieval to efficiently manage and access data in a structured manner.

### Process During Query

- 🔍 **During Query-Time**:

  - **Small Chunk Retrieval**: Initially, smaller chunks relevant to the query are retrieved.

  - **Following References**: The system then follows references to retrieve the larger parent chunks associated with these smaller chunks.

- 📈 **Benefits of Contextual Retrieval**:

  - **Enhanced Context**: Retrieving larger chunks along with the smaller ones provides additional context.
  
  - **Improved Responses**: This deeper context allows for more accurate and comprehensive responses to queries.

This structured approach ensures that data retrieval is both efficient and context-rich, enhancing the overall synthesis and response accuracy.

 The code below is creating a system where smaller chunks of text refer to the larger chunks they were created from. This allows for more context to be provided when retrieving chunks of text based on a query.

In [None]:
# Import the SentenceSplitter class from the llama_index.core.node_parser module
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import IndexNode

# Define the sizes of chunks for sentence splitting
sub_chunk_sizes = [128, 256, 512]

# Create a list of SentenceSplitter instances with different chunk sizes
sub_node_parsers = [SentenceSplitter(chunk_size=c, chunk_overlap=16) for c in sub_chunk_sizes]

# Initialize an empty list to store all index nodes
all_nodes = []

# Iterate over each base node in senpai_documents
for base_node in senpai_documents:
    # Process each base node with every SentenceSplitter in the list
    for n in sub_node_parsers:
        # Get sub-nodes by splitting the base node document into smaller parts
        sub_nodes = n.get_nodes_from_documents([base_node])
        # Convert each sub-node into an IndexNode and link it to the base node's ID
        sub_inodes = [
            IndexNode.from_text_node(sn, base_node.node_id) for sn in sub_nodes
        ]
        # Add the newly created index nodes to the all_nodes list
        all_nodes.extend(sub_inodes)

    # Also add the original base node to the list of all nodes as an IndexNode
    original_node = IndexNode.from_text_node(base_node, base_node.node_id)
    all_nodes.append(original_node)

In [None]:
all_nodes_dict = {n.node_id: n for n in all_nodes}

In [None]:
all_nodes[5].__dict__

### 👷🏽‍♂️ 🗂️  Ingest to Qdrant and Build the Index 

In [None]:
from utils import ingest

COLLECTION_NAME = "words-of-the-senpai-small-to-big-parent-child"

parent_child_vector_store = setup_vector_store(QDRANT_URL, QDRANT_API_KEY, COLLECTION_NAME)

transforms = [Settings.embed_model]

parent_child_nodes = ingest(
    documents=all_nodes,
    transformations=transforms,
    vector_store=parent_child_vector_store
)

parent_child_index = create_index(
    from_where="vector_store", 
    embed_model=Settings.embed_model,
    vector_store=parent_child_vector_store)

### 🛠️ Setup Query Engine for Parent Child Chunks

We're making use of the `RecursiveRetriever` and the `RetrieverQueryEngine`.

`RecursiveRetriever` is a separate class that is not directly associated with an index. It is uses multiple retrievers and query engines to recursively retrieve and query nodes.

But, we can't directly use a `RecursiveRetriever` with the `index.as_retriever()` pattern we've seen before. The `index.as_retriever()` pattern is used to create a retriever from an index, and the type of retriever it creates depends on the `retriever_mode` argument you pass to it. `RecursiveRetriever` requires a dictionary of retrievers, and optionally a dictionary of query engines and a dictionary of node. There aren't required when creating a retriever using `index.as_retriever()`.

So, we need to build the `RecursiveRetriever` and the `RetrieverQueryEngine` to accomplish this.

  - `RecursiveRetriever` queries a graph of retrievers and query engines, following links between them to fetch relevant information for a given query. It recursively traverses the graph, deduplicates nodes, and returns the retrieved nodes along with any additional source nodes.

  - `RetrieverQueryEngine` is a component that uses a retriever to fetch relevant documents or nodes based on a given query and then *synthesizes a response from the retrieved nodes using a `ResponseSynthesizer`*. It retrieves relevant nodes, applies postprocessing, synthesizes a response, and returns the result.


More details are below.

#### [`RecursiveRetriever`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/retrievers/recursive_retriever.py)

##### 🌿 Initialization

   - Takes a root ID, retriever dict, and optional query engine & node dicts

   - Validates root ID and checks for overlapping keys

##### 🔄 Recursive Retrieval

   - Starts from the root ID when `retrieve` is called with a query bundle

   - Fetches the object (retriever, query engine, or node) for the current ID

   - If it's a node, adds it to the list of nodes to return

   - If it's a retriever, retrieves nodes and recursively queries them

   - If it's a query engine, queries it and adds the response as a text node

##### 🔗 Querying Retrieved Nodes

   - For each retrieved IndexNode, recursively retrieves from the referenced ID

   - For each TextNode, simply adds it to the list of nodes to return

   - Avoids querying the same ID multiple times

##### 🧹 Deduplication

   - Deduplicates nodes based on their node ID

   - Keeps the node with the highest score or the first one returned

##### 📚 Retrieving All Nodes

   - `retrieve_all` method retrieves all nodes, including additional source nodes

   - Calls the recursive retrieval process and returns both retrieved and additional nodes


#### [`RetrieverQueryEngine`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/query_engine/retriever_query_engine.py)

The `index.as_retriever()` pattern creates a retriever from an index. The type of retriever it creates depends on the `retriever_mode` argument you pass to it. 

So, while `index.as_retriever()` is used to create a retriever from an index, a `RetrieverQueryEngine` uses a retriever to fetch relevant nodes and a `ResponseSynthesizer` to synthesize a response.

##### 🌿 Initialization
   - Takes a retriever, an optional response synthesizer, and node postprocessors

   - Creates a default response synthesizer if not provided
   
   - Sets up callback manager for the query engine and node postprocessors

##### 🔧 Customization

   - Can be initialized with various arguments using the `from_args` method

   - Allows customization of response mode, prompt templates, async usage, etc.

##### 🔄 Retrieval

   - Retrieves nodes using the provided retriever when `retrieve` or `aretrieve` is called

   - Applies node postprocessors to the retrieved nodes

   - Returns the processed nodes

##### 🔀 Retriever Swapping

   - Allows swapping the retriever using the `with_retriever` method

   - Creates a new RetrieverQueryEngine instance with the new retriever

##### 🧩 Node Postprocessing
   
   - Applies a list of node postprocessors to the retrieved nodes

   - Postprocessors can modify or filter the nodes based on the query bundle

##### 🎨 Response Synthesis

   - Synthesizes a response using the response synthesizer
   - Takes the query bundle, retrieved nodes, and additional source nodes as input
   - Generates a response based on the configured response mode and templates

##### ❓ Querying

   - Handles a query using the `_query` or `_aquery` method

   - Retrieves nodes, synthesizes a response, and returns the response

   - Triggers callback events for query start and end

##### 🏃 Async Support

   - Provides async versions of retrieval, synthesis, and querying methods

   - Allows for asynchronous processing of queries


In [None]:
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import RecursiveRetriever

parent_child_retriever = parent_child_index.as_retriever(
    response_mode="compact",
    similiarty_top_k=5,
    vector_store_query_mode="mmr", 
    vector_store_kwargs={"mmr_threshold": 0.42},
    )

retriever_chunk = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": parent_child_retriever},
    node_dict=all_nodes_dict,
    verbose=True,
)

parent_child_query_engine = RetrieverQueryEngine.from_args(retriever_chunk, llm=Settings.llm)

parent_child_query_engine.update_prompts({'response_synthesizer:text_qa_template':HYPE_ANSWER_GEN_PROMPT_TEMPLATE})

### 🔧Setup Query Pipline for Parent Child Chunks

In [None]:
from utils import create_query_pipeline

from llama_index.core.query_pipeline import InputComponent

input_component = InputComponent()

parent_child_chain = [input_component, parent_child_query_engine]

parent_child_query_pipeline = create_query_pipeline(parent_child_chain)

In [None]:
parent_child_query_pipeline.run(input="How can I effectively build strength across multiple facets of real life without relying on complicated machines?")

In [None]:
parent_child_query_pipeline.run(input="How can I set rules and speak honestly without worrying about hurting someone's feelings?")