In [None]:
%%capture
!pip install llama-index==0.10.37 llama-index-embeddings-openai==0.1.9 qdrant-client==1.9.1 llama-index-vector-stores-qdrant==0.2.8 llama-index-llms-openai==0.1.19

In [None]:
import os
import sys
from getpass import getpass
import nest_asyncio

from IPython.display import Markdown, display

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv()

sys.path.append('../helpers')

from utils import setup_llm, setup_embed_model, setup_vector_store

In [None]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OPENAI_API_KEY key: ")

In [None]:
QDRANT_URL = os.environ['QDRANT_URL'] or getpass("Enter your Qdrant URL:")

In [None]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

In [None]:
from llama_index.core.settings import Settings
from utils import setup_llm, setup_embed_model

setup_llm(
    provider="openai",
    api_key=OPENAI_API_KEY, 
    model="gpt-4o", 
    temperature=0.75, 
    system_prompt="""Use ONLY the provided context and generate a complete, coherent answer to the user's query. 
    Your response must be grounded in the provided context and relevant to the essence of the user's query.
    """
    )

setup_embed_model(
    provider="openai",
    model="text-embedding-3-small",
    api_key=OPENAI_API_KEY
    )

In [None]:
from utils import get_documents_from_docstore
import random

all_documents = get_documents_from_docstore("../data/words-of-the-senpais")

random.seed(42)
senpai_documents = random.sample(all_documents, 100)

# 🧠 Semantic Chunking

This is a recent method that's been popularized by Greg Kamradt. He discussed this in detail in [an informative YouTube video](https://youtu.be/8OJC21T2SL4), which is also a great resource for more information on various chunking strategies.


Here's the gist of what semantic chunking does:

- Uses sentence embeddings to find breakpoints based on semantic similarity

- Keeps related sentences together in the same chunk

- Dynamically determines chunk size, no fixed length needed


## How semantic chunking works

1. ✂️ Split document into sentences

2. 🔢 Index sentences by position

3. 🎚️ Choose buffer size (sentences on either side to keep)

4. 📊 Measure similarity in embedding space
   - Keep similar sentences together
   - Split dissimilar sentences apart

5. 🧩 Merge groups based on similarity threshold


## [`SemanticSplitterNodeParser`](https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/node_parser/text/semantic_splitter.py)

SemanticSplitterNodeParser a document into semantically related chunks called nodes. It uses semantic similarity and adaptive breakpoint determination, to create meaningful and coherent nodes from a document. 

### How it works

#### 1. 🧩 Document Splitting

   - The parser takes a document as input.

   - It splits the document into individual sentences using a sentence splitter (e.g., split_by_sentence_tokenizer).

#### 2. 🎚️ Sentence Grouping

   - The parser groups adjacent sentences together based on a configurable buffer size.

   - The buffer size determines how many sentences are considered together when evaluating semantic similarity.

   - For example, if the buffer size is 1, each sentence is treated individually. If it's greater than 1, sentences are grouped together.

#### 3. 🌐 Embedding Calculation

   - The parser calculates embeddings for each group of sentences using an embedding model.

   - Embeddings represent the semantic meaning of the sentence groups in a dense vector format.

#### 4. 📏 Distance Calculation

   - The parser calculates the cosine similarity between the embeddings of adjacent sentence groups.

   - It then computes the distance by subtracting the similarity from 1.

   - These distances represent the semantic dissimilarity between sentence groups.

#### 5. 🎯 Breakpoint Determination

   - The parser determines breakpoints based on a configurable percentile threshold (e.g., 95th percentile).

   - If the distance between two adjacent sentence groups exceeds the breakpoint threshold, it indicates a semantic shift and marks the start of a new node.

#### 6. 🧩 Node Creation

   - The parser splits the document into nodes based on the determined breakpoints.

   - Each node represents a semantically related chunk of text.

   - The sentences within a node are combined to form a coherent unit of information.

#### 7. 📝 Node Metadata

   - The parser can include additional metadata in the nodes, such as the original text or other relevant information.

   - It can also establish relationships between nodes, such as previous and next relationships, to maintain the sequential order of the chunks.

### Arguments you need to know

- `embed_model`: The embedding model to use for semantic comparison. If not provided, the parser will attempt to use the OpenAIEmbedding model. If the `llama-index-embeddings-openai` package is not installed, an ImportError will be raised.

- `buffer_size`: The number of sentences to group together when evaluating semantic similarity. 

  - Default value is 1 (each sentence is considered individually). 
  
  - Increasing the buffer size allows the parser to consider the context of adjacent sentences when determining semantic similarity. This can help capture more meaningful relationships between sentences.

- `breakpoint_percentile_threshold`: The percentile of cosine dissimilarity that must be exceeded between a group of sentences and the next to form a node. 

  - A smaller value results in more nodes being generated. Default value is 95 (95th percentile). 

  - Adjusting this threshold allows you to control the granularity of the node splits. A lower value will create more nodes, while a higher value will create fewer nodes.

- `sentence_splitter`: The function or callable object used to split the text into sentences. Default is `split_by_sentence_tokenizer` (again, using the `PunktSentenceTokenizer` from the `nltk` library). The choice of sentence splitter can affect how the text is divided into individual sentences, which in turn influences the node splitting process.

  - While the `sentence_splitter` is used to initially split the document into individual sentences, the actual determination of node boundaries is based on semantic similarity rather than a fixed splitting approach.
  
  - `sentence_splitter` is more of a preprocessing step to prepare the document for the semantic analysis. It ensures that the parser has a consistent input format to work with (i.e., a list of sentences).

In [None]:
from llama_index.core.node_parser import SentenceSplitter, SemanticSplitterNodeParser

def semantic_splitter(
    embed_model,
    buffer_size, 
    breakpoint_percentile_threshold, 
    documents,
    **kwargs):
    splitter = SemanticSplitterNodeParser(
        embed_model=embed_model,
        buffer_size=buffer_size,
        breakpoint_percentile_threshold=breakpoint_percentile_threshold,
        )
    nodes = splitter.get_nodes_from_documents(documents)
    return nodes

## You have a lot of design choices to make here

These are all points of experimentation for you. Hack around with these, run evaluation against the metrics that matter to you, vibe check the results, and find one that works best.

- What embedding model do you want to use?

- What's the dimensions you want to use for the embedding model?

- What buffer size do you want to use? 

- What about the breakpoint threshold?

- What sentence splitter do you want to use?

I'll use some arbitrary settings for illustrative purposes.


In [None]:
semantic_nodes = semantic_splitter(
    embed_model = Settings.embed_model,
    buffer_size = 3, 
    breakpoint_percentile_threshold = 0.55, 
    documents = senpai_documents
    )

In [None]:
len(semantic_nodes)

In [None]:
semantic_nodes[101].__dict__

In [None]:
print(semantic_nodes[100].get_content(metadata_mode="all"))

## 👷🏽‍♂️ 🗂️ Build the Index and Ingest to Qdrant

Note: This will also take a long time (about 30 minutes)


In [None]:
from llama_index.core import StorageContext
from llama_index.core.settings import Settings

from utils import create_index, create_query_engine, ingest, setup_vector_store

COLLECTION_NAME = "words-of-the-senpai-semantic-nodes"

semantic_nodes_vector_store = setup_vector_store(":memory:", QDRANT_API_KEY, COLLECTION_NAME)

In [None]:
transforms = [Settings.embed_model]

semantic_nodes = ingest(
    documents=semantic_nodes,
    transformations=transforms,
    vector_store=semantic_nodes_vector_store
)

semantic_nodes_index = create_index(
    from_where="vector_store", 
    embed_model=Settings.embed_model,
    vector_store=semantic_nodes_vector_store
    )

### 🛠️ Setup Query Engine

In [None]:
from llama_index.core import PromptTemplate
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

from utils import create_query_engine
from prompts import HYPE_ANSWER_GEN_PROMPT

HYPE_ANSWER_GEN_PROMPT_TEMPLATE = PromptTemplate(HYPE_ANSWER_GEN_PROMPT)

semantic_nodes_query_engine = create_query_engine(
    index=semantic_nodes_index, 
    mode="query",
    response_mode="compact",
    similiarty_top_k=5,
    vector_store_query_mode="mmr", 
    vector_store_kwargs={"mmr_threshold": 0.42},
    text_qa_template=HYPE_ANSWER_GEN_PROMPT_TEMPLATE
    )

### 🔧 Setup Query Pipeline

In [None]:
from utils import create_query_pipeline

from llama_index.core.query_pipeline import InputComponent

input_component = InputComponent()

semantic_nodes_chain = [input_component,  semantic_nodes_query_engine]

semantic_nodes_query_pipeline = create_query_pipeline(semantic_nodes_chain)

In [None]:
semantic_nodes_query_pipeline.run(input="How can I navigate the maze of the market while building a company?")