In [1]:
import logging
import sys
import os

import qdrant_client
# from IPython.display import Markdown, display
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
from llama_index.core import StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings

Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5", cache_dir='./fastembed_weights')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from nest_asyncio import  apply
apply()

In [3]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [4]:
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

--2025-07-11 15:16:25--  https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8002::154, 2606:50c0:8001::154, 2606:50c0:8000::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8002::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 75042 (73K) [text/plain]
Saving to: ‘data/paul_graham/paul_graham_essay.txt’


2025-07-11 15:16:26 (626 KB/s) - ‘data/paul_graham/paul_graham_essay.txt’ saved [75042/75042]



In [5]:
from functools import partial
from llama_index.core.readers.file.base import default_file_metadata_func
from llama_index.core import SimpleDirectoryReader
from llama_index.core.schema import Document  # Adjust if needed

def add_metadata(input_file: str, x: dict, metadata_fn=default_file_metadata_func, fs=None) -> dict:
    metadata = metadata_fn(file_path=input_file, fs=fs)
    metadata.update(x)
    return metadata


def get_documents(filepath: str, additional_metadata: dict=None, **kwargs) -> list[Document]:
    metadata_fn = kwargs.pop('file_metadata', default_file_metadata_func)

    if additional_metadata:
        metadata_fn = partial(add_metadata, x=additional_metadata, metadata_fn=metadata_fn)


    documents = SimpleDirectoryReader(filepath, file_metadata=metadata_fn, **kwargs).load_data()

    
    return documents

In [6]:
docs = get_documents('data/paul_graham/', additional_metadata={'company_name': 'paul'})

In [7]:
d = docs[0]

In [8]:
print(dir(d))

['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__class_vars__', '__copy__', '__deepcopy__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__fields__', '__fields_set__', '__format__', '__ge__', '__get_pydantic_core_schema__', '__get_pydantic_json_schema__', '__getattr__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__pretty__', '__private_attributes__', '__pydantic_complete__', '__pydantic_computed_fields__', '__pydantic_core_schema__', '__pydantic_custom_init__', '__pydantic_decorators__', '__pydantic_extra__', '__pydantic_fields__', '__pydantic_fields_set__', '__pydantic_generic_metadata__', '__pydantic_init_subclass__', '__pydantic_parent_namespace__', '__pydantic_post_init__', '__pydantic_private__', '__pydantic_root_model__', '__pydantic_serializer__', '__pydantic_setattr_handlers__', '__pydantic_validator__', '__reduce__', 

In [9]:
d.metadata

{'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt',
 'file_name': 'paul_graham_essay.txt',
 'file_type': 'text/plain',
 'file_size': 75042,
 'creation_date': '2025-07-11',
 'last_modified_date': '2025-07-11',
 'company_name': 'paul'}

# setup qdrant

In [10]:
regex_pattern='''(?x)
# Used in https://tokenize.jina.ai; Enable verbose mode for readability

# 1. Headings (Setext-style, Markdown, and HTML-style)
(?:^#{1,6}|^[^\r\n]{1,200}(?:\r?\n=+|\r?\n-+)|<h[1-6][^>]*>[^<\r\n]{1,200}</h[1-6]>)(?:\r?\n|$)

# 2. List items (bulleted, numbered, lettered, or task lists, including nested, up to three levels)
(?:(?:^|\r?\n)[ \t]{0,3}(-{1,3}|\d{1,3}[.\)]|\[ [xX ]\])(?:[^\r\n]{1,200})
(?:(?:\r?\n[ \t]+[^\s,:;!?-]{1,3}[.\)]|\[ [xX ]\])?[^\r\n]{1,200}){0,5}
|(?:\r?\n[ \t]{4,7}(?:-{1,3}|\d{1,3}[.\)]|\[ [xX ]\])?[^\r\n]{1,200}){0,5})?

# 3. Block quotes (including nested quotes and citations, up to three levels)
(?:(?:^|\s|\r?\n)[>‣]{1,3}[^\r\n]{0,200})(?:\r?\n){1,10}

# 4. Code blocks (fenced, indented, or HTML pre/code tags)
(?:(?:```|~~~|<pre>|<code>)[^\r\n]{0,20})?(?:\r?\n|\s){0,1000}?(?:```|~~~|\r?\n
|<pre>|<code>)(?:\r?\n){0,200}?|(?: {4}|\t)[^\r\n]{0,200}){0,20}(?:\r?\n)
|(?:<code>)[^\r\n]{0,500}(?:</code>){0,5}

# 5. Tables (Markdown, grid tables, and HTML tables)
(?:(?:^|\r?\n)(?:\|[^\r\n]{0,200})(?:\r?\n(?:\|[-:]{1,200}){0,1}(?:\r?\n(?:\|[^\r\n]{0,200})){0,20}
|<table>[^\s]{0,2000}</table>)

# 6. Horizontal rules
(?:^[-*_]{3,})\s*$|\r\n\s*<hr\s*/?>

# 7. Sentences or phrases ending with punctuation
(?:[^\r\n]{1,300}(?:[.?!…‽!?:…‥]{1,3}|\u2026\u2047-\u2049]|[\p{Emoji_Presentation}\p{Extended_Pictographic}])
(?:\s+|\r?\n|^))

# 8. Quoted text, parenthetical phrases, or bracketed content
(?:(^|\r?\n){0,300}"
|(?:(^|\r?\n){0,200}(["'‘“])[^\r\n]{0,200}(["'’”])(?:\r?\n){0,5})
|(?:\r?\n\[ [^\r\n]{0,200}\](?:\r?\n|\[ [^\r\n]{0,200}\]){0,5}\]
|[^\r\n]{0,100})$

# 9. Paragraphs
(?:(?:\r?\n\r?\n|\A)(?:<p>.*?</p>){1,1000}?)(?=</p>|(?:\r?\n\r?\n|\Z))

# 10. Standalone lines or phrases
(?:^[a-zA-Z]{1,100,99})?(?:\r?\n[a-zA-Z]+)?(?:\r?\n|$))

# 11. HTML-like tags and their content
(?:<a-zA-Z][^>]{0,99}>\s{0,1000}?</a-zA-Z]+\s*/?>)

# 12. LaTeX-style math expressions
(?:\$\s{0,100}\$|\$\$.*?\$\$|\$\s*[^\r\n]{0,100}\$)

# 13. Emoji and special characters
(?:[\p{Emoji_Presentation}\p{Extended_Pictographic}\p{So}]+|[\u2600-\u26FF\u0001F300-\u0001F6FF\u0001F700-\u0001F77F\u0001F780-\u0001F7FF\u0001F800-\u0001F8FF])

# 14. Fallback for any remaining content
(?:^[^\r\n]{1,200})
)
'''

In [11]:
import os

In [12]:
# os.getenv('QDRANT_API_KEY0')

In [13]:
client = qdrant_client.QdrantClient(
    # you can use :memory: mode for fast and light-weight experiments,
    # it does not require to have Qdrant deployed anywhere
    # but requires qdrant-client >= 1.1.1
    # location=":memory:"
    # otherwise set Qdrant instance address with:
    url="https://3e782d13-9d96-405c-8dc0-7deb8bdba7a9.eu-west-2-0.aws.cloud.qdrant.io",
    # otherwise set Qdrant instance with host and port:
    # host="localhost",
    # set API KEY for Qdrant Cloud
    api_key=os.getenv('QDRANT_API_KEY0'),
    check_compatibility=False
)

from qdrant_client import QdrantClient
from qdrant_client.http.models import  PayloadSchemaType, PayloadSchemaType






In [14]:
# from here we will see what strategy works better for given steps:

In [15]:
from llama_index.core import Document
from llama_index.core.schema import BaseNode, TransformComponent
from typing import Any, Generator, List, Optional, Sequence, Union

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline, IngestionCache

# create the pipeline with transformations
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=25, chunk_overlap=0),
        TitleExtractor(),
        OpenAIEmbedding(),
    ]
)

# run the pipeline
# nodes = pipeline.run(documents=[Document.example()])

In [16]:
from abc import ABC, abstractmethod

class MakeNode(ABC):
    transformations:list[TransformComponent]

    @abstractmethod
    def ingest(self, *args, **kargs) -> Sequence[BaseNode]:
        raise NotImplementedError

In [17]:
class SimpleMakeNode(MakeNode):
    def __init__(self, **ingestion_kwargs)-> None:
        self.ingestion_pipeline=IngestionPipeline(**ingestion_kwargs)

    def ingest(self, documents, *args, **kwargs) -> Sequence[BaseNode]:
        return self.ingestion_pipeline.run(documents=documents)
        

In [18]:
# docs

In [19]:
docs2 = get_documents('data/me')
len(docs2)

1

In [20]:
from llama_index.core.node_parser import SemanticSplitterNodeParser
from llama_index.embeddings.openai import OpenAIEmbedding

# embed_model = OpenAIEmbedding()
splitter = SemanticSplitterNodeParser(
    buffer_size=1, breakpoint_percentile_threshold=95, embed_model=Settings.embed_model
)

splitter2 = SentenceSplitter(secondary_chunking_regex=regex_pattern)
transformations = [
    splitter2
]


In [21]:
from llama_index.readers.wikipedia import WikipediaReader


In [22]:
from functools import cache

splitter = SemanticSplitterNodeParser(
buffer_size=1, breakpoint_percentile_threshold=95, embed_model=Settings.embed_model
)

splitter2 = SentenceSplitter(secondary_chunking_regex=regex_pattern)
splitter3 = SentenceSplitter()
@cache
def get_chunks(path,split_name='simple', show=False)-> BaseNode:
    d = {'semantic': splitter, 'regex': splitter2, 'simple': splitter3}
    splitterr = d.get(split_name)
    pages=get_documents(path)
    if show:
        print(pages)

    transformations = [
        # splitter2,
        splitterr
    ]
    nodes = SimpleMakeNode(
    transformations=transformations

    ).ingest(pages)
    return nodes

In [23]:
# nodes = get_chunks('./data/qna', 'semantic',False ) # semantic chunks
# for i in nodes:
#     print('-X'*22)
#     print(i.text)

In [24]:
# nodes = get_chunks('./data/qna', 'regex',False ) # semantic chunks
# for i in nodes:
#     print('-X'*22)
#     print(i.text)

In [25]:
# nodes = get_chunks('./data/qna', 'simple',False ) # semantic chunks
# for i in nodes:
#     print('-X'*22)
#     print(i.text)

In [26]:
# nodes = get_chunks('./data/paul_graham')

# for i in nodes:
#     print('-X'*22)
#     print(i.text)

In [27]:
from pathlib import Path
from abc import ABC, abstractmethod

class BaseRag(ABC):
    
    ingestion_pipeline:list[TransformComponent]
    node_postprocessor:list[TransformComponent]

    @abstractmethod
    def _ingest(self, filepath:Path, **kwargs):
        raise NotImplementedError
    
    @abstractmethod
    def _query(self, query:str, k:int, **kwargs):
        raise NotImplementedError
    
    def ingest(self, filepath:Path, **kwargs):
        assert filepath.exists() , f"{filepath} doesn't exists"
        return self._ingest(filepath, **kwargs)
    
    def query(self, query:str, k:int, **kwargs):
        return self._query(query, k, **kwargs)

In [28]:
from llama_index.core.llms import LLM

In [29]:
from llama_index.core import Document
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter, TokenTextSplitter
from llama_index.core.extractors import TitleExtractor
from llama_index.core.ingestion import IngestionPipeline, IngestionCache

# create the pipeline with transformations
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=25, chunk_overlap=0),
        # TitleExtractor(),
        # OpenAIEmbedding(),
    ]
)

# run the pipeline
nodes = pipeline.run(documents=[Document.example()])

Metadata length (9) is close to chunk size (25). Resulting chunks are less than 50 tokens. Consider increasing the chunk size or decreasing the size of your metadata to avoid this.


In [30]:
from llama_index.llms.openai import OpenAI
from llama_index.core.schema import MetadataMode

In [31]:
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512)

In [32]:
class Rag(BaseRag):
    def __init__(self, llm:LLM, ingestion_pipeline:list[TransformComponent]|None=None, node_preprocessor:list[TransformComponent]|None=None):
        self.llm = llm
        self.ingestion_pipeline=ingestion_pipeline
        self.node_postprocessor=node_preprocessor

    def _query(self, id:str, query:str, k:int, **kwargs)-> str:
        return ''
    
    def _ingest(self, id:str, filepath:Path, **kwargs)-> None:
        return


In [33]:
for n, i in enumerate(nodes):
    print(i.text)
    print(f'{n+1}->'*22, '\n')

Context
LLMs are a phenomenal piece of technology for knowledge generation and
1->1->1->1->1->1->1->1->1->1->1->1->1->1->1->1->1->1->1->1->1->1-> 

reasoning.
They are pre-trained on large amounts of publicly available data.
2->2->2->2->2->2->2->2->2->2->2->2->2->2->2->2->2->2->2->2->2->2-> 

How do we best augment LLMs with our own private data?
3->3->3->3->3->3->3->3->3->3->3->3->3->3->3->3->3->3->3->3->3->3-> 

We need a comprehensive toolkit to help perform this data augmentation for LLMs.
4->4->4->4->4->4->4->4->4->4->4->4->4->4->4->4->4->4->4->4->4->4-> 

Proposed Solution
That's where LlamaIndex comes in.
5->5->5->5->5->5->5->5->5->5->5->5->5->5->5->5->5->5->5->5->5->5-> 

LlamaIndex is a "data framework" to help
you build LLM
6->6->6->6->6->6->6->6->6->6->6->6->6->6->6->6->6->6->6->6->6->6-> 

apps. It provides the following tools:

Offers data connectors to ingest your
7->7->7->7->7->7->7->7->7->7->7->7->7->7->7->7->7->7->7->7->7->7-> 

existing data sources and data formats
(

In [34]:
prompt = '''

You are given a block of text in which each line starts with a line number. Your task is to analyze the structure and content of the text to determine appropriate points where the text should be split into separate logical chunks.

Please follow the instructions below precisely:

⸻

✅ Objective:

Return the line numbers after which a break or chunk boundary should occur.

⸻

📌 Guidelines for Identifying Breakpoints:
	1.	Question-Answer Pairs:
Insert a breakpoint after each QA pair. If there are multiple QA pairs, each should be its own chunk.
Example:

12. Q: What is X?
13. A: X is ...
→ Break after line 13


	2.	Topic Transitions:
If there's a shift in topic, subheading, or structural marker (like “Step”, “Section”, “Part”, or bullet points restarting), insert a break before the start of the new topic, i.e., after the previous line.
	3.	Paragraph Separations:
If the content clearly separates into paragraphs (even if no blank lines exist), and these paragraphs represent distinct ideas, insert a break between them.
	4.	Speaker Changes (Dialogue):
If multiple speakers are indicated (e.g., “Speaker 1:”, “John:”, “Interviewer:”), treat each exchange as a separate chunk. Break after each speaker’s turn.
	5.	Bullet/List Item Groups:
If there is a coherent set of bullet or numbered list items, keep them in one chunk. But break after the list ends or if a new list starts.

⸻

🚫 What Not To Do:
	•	Do not insert breaks arbitrarily.
	•	Do not insert a break inside a tightly coupled pair of lines (e.g., question and answer).
	•	Do not insert any text or explanation — just the list of line numbers.

⸻

🧾 Output Format:
	•	Return a comma-separated list of integers, where each integer represents the line number after which a break should occur.
	•	Example output:

[3,7,13,20]


	•	If no breaks are needed, return an empty list:

[]



⸻

This prompt ensures precise structure, encourages thoughtful segmentation, and communicates expectations clearly to the model or annotator performing the task. Let me know if you’d like to tailor this prompt to a specific kind of text or dataset!
'''

prompt = '''

You are given a text where each line starts with a line number followed by a space and content. This content consists of multiple Question and Answer pairs in the format Q: and A:. Some Q&As may span multiple lines. Your task is to return a list of integers, each representing the line number after which a breakpoint should be inserted.

Follow these strict rules:

⸻

✅ Breakpoint Placement Rules
	1.	Insert a breakpoint only after a complete Q&A pair has ended.
	•	Both Q: and its corresponding A: must be fully present and finished.
	•	A Q&A pair may span multiple lines.
	2.	Return the line number of the last line of the complete Q&A pair.

⸻

❌ When Not to Insert a Breakpoint
	3.	Do not insert a breakpoint if the Q or A is incomplete or continues on the next page:
	•	If the last line of a page ends mid-answer or mid-question.
	•	If the Q&A seems to be split across pages.
	4.	Do not insert a breakpoint immediately after a partial Q or A that continues from the previous page.
	•	Wait until that Q&A completes.

⸻

📄 Continuation Detection
	5.	If the beginning of the page starts mid-sentence or mid-answer, assume it is a continuation from the previous page. Do not break until the first full Q&A pair finishes.
	6.	Understand if question and answer are using some prefixes to identify question and answer sections. use it to understand the pair.

⸻


🧠 Additional Instructions
	7.	Use punctuation (periods, question marks) to detect the end of a sentence, if helpful.
	8.	Do not rely on fixed line counts; rely on semantic completeness.
	9.	Be cautious of malformed input — only insert breakpoints in clearly valid, complete Q&A pairs.



🚫 What Not To Do
	•	❌ Don't insert arbitrary breaks without clear justification.
	•	❌ Don't break Q and A lines from each other.
	•	❌ Don't break in the middle of lists or paragraphs.
	•	❌ Don't add commentary or explanation in your output.

⸻

🧾 Output Format
	•	Return a comma-separated list of integers, where each number is the line number after which a break should occur.
Example Output:

[3, 7, 13, 20]


	•	If no breaks are needed:

[]

'''

format_specific='''

⸻

📚 Format-Specific Instructions

⸻

🔸 1. Q&A Style Documents

Use the General Guidelines above with additional care for:
	•	Multiple Q&A pairs: Ensure each pair is isolated as a separate chunk.
	•	Sub-question chains: If a main question has several follow-ups (e.g., Q1.1, Q1.2), treat the group as a single unit unless each sub-question has its own distinct answer.

⸻

🔸 2. Story/Narrative Documents
	•	Break after scene transitions, time jumps, or dialogue exchanges.
	•	Keep descriptive paragraphs together unless there's a thematic or spatial shift.
	•	If there's a chapter or section heading, break before it.
	•	Speaker changes in dialogue should follow the speaker rule.

⸻

🔸 3. Markdown Documents

Pay special attention to syntax markers:
	•	Headings (#, ##, ###): Always insert a break before a new heading (after the previous line).
	•	Code blocks (```):
	•	Treat the full code block as one chunk.
	•	Break before and after the code block, but not inside it.
	•	Lists (-, *, 1.): Group related items; break after the entire list ends.
	•	Blockquotes (>): Keep together if continuous. Break only when the quotation context ends.

⸻

🔸 4. Miscellaneous Document Types (e.g., instructions, logs, notes)
	•	For instructional content (e.g., numbered steps), break between steps.
	•	For logs, break between log sessions, especially when timestamps, user IDs, or contexts shift.
	•	For meeting notes, follow speaker turn logic or topic boundaries.

⸻
'''

prompt = '''
You are given a text where each line begins with a line number, followed by a space and the content. This content may include Q&A pairs, passages, conversations, paragraphs, bullet points, or article-like prose. Your task is to return a list of line numbers, each representing the last line of a complete and self-contained content unit — after which a breakpoint should be inserted.

⸻

✅ Breakpoint Placement Rules
	1.	Insert a breakpoint only after a semantically complete and self-contained content unit ends.
This could be:
	•	A complete Q&A pair (Q: … A: …),
	•	A complete paragraph,
	•	A completed exchange in a conversation,
	•	A finished list or bullet section,
	•	A section of an article or structured argument that is logically complete.
	2.	Return the line number of the last line of that complete unit.

⸻

❌ When Not to Insert a Breakpoint
	3.	Do NOT insert a breakpoint if the content is incomplete or continues onto the next section.
For example:
	•	If a paragraph ends mid-sentence.
	•	If a list is only partially complete.
	•	If a Q or A is mid-response.
	•	If a speaker in a dialogue is interrupted or the exchange is not concluded.
	4.	Avoid breakpoints at the beginning of a continuation.
If the page or section starts mid-thought, mid-answer, or mid-paragraph, wait until the current unit concludes before inserting a breakpoint.

⸻

📄 Continuation Detection
	5.	Detect whether a line is a continuation from a previous page or section.
	•	If a section starts mid-sentence or mid-thought, assume it’s a continuation.
	•	Wait for the first complete, independent content unit before placing a breakpoint.
	6.	Identify formatting cues like Q:, A:, speaker tags (User:, Agent:), bullet points, numbering, or Markdown headers to recognize structural boundaries.

⸻

🧠 Additional Guidance
	7.	Use punctuation such as periods, question marks, or colons to detect sentence and paragraph completion.
	8.	Don’t rely on fixed line counts. Use semantic and syntactic cues to determine completeness.
	9.	Be cautious of malformed or unstructured input. Only insert breakpoints where logical content boundaries are clear.

⸻

🚫 What Not To Do
	•	❌ Don’t insert arbitrary breaks just to make text shorter.
	•	❌ Don’t break mid-sentence, mid-paragraph, or in the middle of a speaker’s turn.
	•	❌ Don’t separate semantically connected items (e.g., Q from A, bullet point lists, dialogue exchanges).
	•	❌ Don’t include commentary or justification in your output.

⸻

🧾 Output Format
	•	Return a comma-separated list of integers, where each number is the line number after which a break should occur.

Example Output:

[3, 7, 13, 20]

	•	If no breakpoints should be inserted, return:

[]


⸻

Let me know if you’d like me to help you implement this logic programmatically or test it on a sample input.
'''

In [35]:
import re
from litellm import completion
from typing import TypedDict
from pydantic import BaseModel, Field

class BreakPoints(BaseModel):
    line_before_breakpoint:list[int]=Field(description='line numbers after which should should be breakpoints')

def add_line_numbers(text: str) -> str:
    """Add line numbers to each line of the input text."""
    lines = text.splitlines()
    return "\n".join(f"{i+1}: {line}" for i, line in enumerate(lines))


def chunk_text_by_breakpoints(text: str, breakpoints: list[int]) -> list[str]:
    """
    Split the original text into chunks at the given line numbers.
    breakpoints: list of line numbers (1-based) after which to break.
    """
    lines = text.splitlines()
    chunks = []
    prev = 0
    for bp in breakpoints:
        chunk = "\n".join(lines[prev:bp])
        if chunk.strip():
            chunks.append(chunk)
        prev = bp
    # Add the last chunk
    if prev < len(lines):
        chunk = "\n".join(lines[prev:])
        if chunk.strip():
            chunks.append(chunk)
    return chunks


INFO:httpx:HTTP Request: GET https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json "HTTP/1.1 200 OK"
HTTP Request: GET https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json "HTTP/1.1 200 OK"


In [36]:
from litellm import completion, JSONSchemaValidationError
from typing import Type
from tenacity import retry
from tenacity import retry, stop_after_attempt, retry_if_exception_type

@retry(
    stop=stop_after_attempt(3),
    retry=retry_if_exception_type(JSONSchemaValidationError)
)
def ask_llm_for_breakpoints(
        numbered_text: str,
        model:str='gemini/gemini-2.5-flash', 
        prompt: str = prompt,
        pydantic_class:Type[BaseModel]=None) -> BaseModel:
    """
    Use the LLM to suggest line numbers after which to split the text.
    Returns a list of line numbers (1-based).
    """

    messages=[
            {'role': 'system', 'content': prompt}, 
            {'role': 'user', 'content': numbered_text}
            ]

    try: 
        response = completion(
        model=model,
        messages=messages, 
        response_format={
            "type": "json_object", 
            "response_schema": pydantic_class.model_json_schema(),
            "enforce_validation": True # 👈 KEY CHANGE
        } if pydantic_class else None
        )
        return pydantic_class.model_validate_json(response.choices[0].message.content)

    except JSONSchemaValidationError as e: 
        print("Raw Response: {}".format(e.raw_response))
        raise e
    

    # Extract numbers from response
    # output = response.choices[0].message.content
    # return output
    # numbers = re.findall(r'\d+', output)
    # return [int(n) for n in numbers]

In [38]:
# docs = get_documents('./data/qna')
docs = get_documents('./data/nasa')

In [None]:
text = docs[0].text

numbered = add_line_numbers(text)
breakpoints = ask_llm_for_breakpoints(numbered_text=numbered, model='gemini/gemini-2.0-flash', pydantic_class=BreakPoints)

[92m15:16:28 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:35 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:35 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


  PydanticSerializationUnexpectedValue(Expected 9 fields but got 5: Expected `Message` - serialized value may not be as expected [input_value=Message(content='{"line_b...er_specific_fields=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [input_value=Choices(finish_reason='st...r_specific_fields=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_python(
[92m15:16:35 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:38 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:38 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


  PydanticSerializationUnexpectedValue(Expected 9 fields but got 5: Expected `Message` - serialized value may not be as expected [input_value=Message(content='{\n  "li...er_specific_fields=None), input_type=Message])
  PydanticSerializationUnexpectedValue(Expected `StreamingChoices` - serialized value may not be as expected [input_value=Choices(finish_reason='st...r_specific_fields=None)), input_type=Choices])
  return self.__pydantic_serializer__.to_python(
[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:42 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:42 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:17:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:17:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


In [40]:
points = breakpoints
points

BreakPoints(line_before_breakpoint=[6, 8, 15, 19, 23, 29, 33, 35, 41, 47, 51, 53, 57, 61, 65, 70, 72, 77, 79, 81, 83, 85, 89, 91, 93, 99, 101, 103, 105, 109, 111, 115, 117, 125, 127, 129, 133, 135, 139, 141, 143, 145, 147, 156, 161, 162, 172, 174, 176, 178, 186, 188, 190, 197, 201, 203, 205, 207, 209, 211, 215, 220, 223, 227, 231, 233, 237, 239, 243, 248, 250, 252, 254, 259, 261, 265, 267, 269, 271, 277, 279, 281, 283, 290, 296, 298, 304, 306, 311, 315, 319, 323, 328, 330, 332, 335, 337, 339, 345, 350, 352, 355, 357, 359, 364, 366, 368, 370, 372, 375, 379, 384, 389, 394, 396, 401, 403, 408, 410, 415, 420, 424, 428, 434, 442, 445, 457, 466, 481, 484, 496, 501, 503, 506, 510, 512, 517, 519, 524, 527, 532, 535, 538, 544, 547, 584, 593, 602, 604, 610, 612, 614, 617, 619, 621, 623, 625, 628, 630, 632, 634, 636, 638, 640, 642])

In [41]:
def chuncked_text(text:str, points:list[int])-> list[str]:
    text = text.splitlines()
    chunks = []
    prev = 0
    for bp in points:
        chunk = "\n".join(text[prev:bp])
        if chunk.strip():
            chunks.append(chunk)
        prev = bp
    if prev < len(text):
        chunk = "\n".join(text[prev:])
        if chunk.strip():
            chunks.append(chunk)
    return chunks

In [42]:
out = chuncked_text(numbered, points.line_before_breakpoint)

In [43]:
for i,n in zip(out, points.line_before_breakpoint):
    print(i)
    print(n,'x'*22)

1: 
2: 
3: NASA
4: 
5: 
6: The National Aeronautics and Space Administration (NASA /ˈnæsə/) is an independent agency of the US federal government responsible for the United States's civil space program, aeronautics research and space research. Established in 1958, it succeeded the National Advisory Committee for Aeronautics (NACA) to give the American space development effort a distinct civilian orientation, emphasizing peaceful applications in space science. It has since led most of America's space exploration programs, including Project Mercury, Project Gemini, the 1968–1972 Apollo program missions, the Skylab space station, and the Space Shuttle. Currently, NASA supports the International Space Station (ISS) along with the Commercial Crew Program and oversees the development of the Orion spacecraft and the Space Launch System for the lunar Artemis program.
6 xxxxxxxxxxxxxxxxxxxxxx
7: 
8: NASA's science division is focused on better understanding Earth through the Earth Observing Sys

In [44]:
import asyncio
from nest_asyncio import apply
apply()

In [45]:

from litellm import token_counter  
from functools import cache
 
@cache
def prompt_token_counter(prompt, model='gemini/gemini-2.5-flash')-> int:
    messages = [{"role": "system", "content": prompt}, {'role': 'user', 'role': ''}]  
    return token_counter(model="gpt-3.5-turbo", messages=messages)

In [46]:
import asyncio

default_model='gemini/gemini-2.5-flash'
default_prompt = prompt
default_chunk_size_offset = prompt_token_counter(prompt=prompt, model=default_model)
def chunk_by_llm(
    text: str,
    token_limit: int = 8000,
    model: str = 'gemini/gemini-2.0-flash',
    prompt: str = prompt,  # define separately or pass explicitly
    chunk_overlap: int = 200,
    **kwargs
) -> List[str]:

    chunk_size_offset = prompt_token_counter(prompt=prompt, model=model)

    text_numbered = add_line_numbers(text)
    splitter = TokenTextSplitter(
        chunk_overlap=chunk_overlap,
        chunk_size=token_limit - chunk_size_offset,
        separator='\n'
    )
    splited_texts = splitter.split_text(text=text_numbered)

    async def process_chunks_async(chunks):
        loop = asyncio.get_event_loop()
        tasks = [
            loop.run_in_executor(
                None,
                ask_llm_for_breakpoints,
                chunk,
                model,
                prompt,
                BreakPoints
            )
            for chunk in chunks
        ]
        return await asyncio.gather(*tasks)

    breakpoints_list = asyncio.run(process_chunks_async(splited_texts))
    all_breakpoints = [j for i in breakpoints_list for j in i.line_before_breakpoint]
    return chunk_text_by_breakpoints(text, sorted(set(all_breakpoints)))

In [47]:
# text*2

In [None]:
out = chunk_by_llm(text=text*2)

[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini



LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini



LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini



LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini



LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:16:35 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:38 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:38 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:39 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler


[92m15:16:39 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler


[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


Wrapper: Completed Call, calling success_handler
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:39 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:39 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:40 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:40 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:40 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:40 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:41 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:41 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:41 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:41 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:42 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:42 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:42 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:42 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:42 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:42 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:16:43 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:16:46 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:16:46 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:17:57 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:17:57 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini



LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini


[92m15:17:57 - LiteLLM:INFO[0m: utils.py:3119 - 
LiteLLM completion() model= gemini-2.0-flash; provider = gemini



LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:LiteLLM:
LiteLLM completion() model= gemini-2.0-flash; provider = gemini

LiteLLM completion() model= gemini-2.0-flash; provider = gemini
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:17:59 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:17:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:17:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:17:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:17:59 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:17:59 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=AIzaSyBAos4tOxOnBHqz2itlGLWcXB6-y4GuEx8 "HTTP/1.1 200 OK"


[92m15:18:00 - LiteLLM:INFO[0m: utils.py:1215 - Wrapper: Completed Call, calling success_handler


INFO:LiteLLM:Wrapper: Completed Call, calling success_handler
Wrapper: Completed Call, calling success_handler


[92m15:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


selected model name for cost calculation: gemini/gemini-2.0-flash
INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


[92m15:18:00 - LiteLLM:INFO[0m: cost_calculator.py:655 - selected model name for cost calculation: gemini/gemini-2.0-flash


INFO:LiteLLM:selected model name for cost calculation: gemini/gemini-2.0-flash
selected model name for cost calculation: gemini/gemini-2.0-flash


In [49]:
out

["\n\nNASA\n\n\nThe National Aeronautics and Space Administration (NASA /ˈnæsə/) is an independent agency of the US federal government responsible for the United States's civil space program, aeronautics research and space research. Established in 1958, it succeeded the National Advisory Committee for Aeronautics (NACA) to give the American space development effort a distinct civilian orientation, emphasizing peaceful applications in space science. It has since led most of America's space exploration programs, including Project Mercury, Project Gemini, the 1968–1972 Apollo program missions, the Skylab space station, and the Space Shuttle. Currently, NASA supports the International Space Station (ISS) along with the Commercial Crew Program and oversees the development of the Orion spacecraft and the Space Launch System for the lunar Artemis program.",
 "\nNASA's science division is focused on better understanding Earth through the Earth Observing System; advancing heliophysics through t

In [50]:
print(len(out))

627


In [51]:
for n,i in enumerate(out,1):
    print(f'{n}-'*11)
    print(i)

1-1-1-1-1-1-1-1-1-1-1-


NASA


The National Aeronautics and Space Administration (NASA /ˈnæsə/) is an independent agency of the US federal government responsible for the United States's civil space program, aeronautics research and space research. Established in 1958, it succeeded the National Advisory Committee for Aeronautics (NACA) to give the American space development effort a distinct civilian orientation, emphasizing peaceful applications in space science. It has since led most of America's space exploration programs, including Project Mercury, Project Gemini, the 1968–1972 Apollo program missions, the Skylab space station, and the Space Shuttle. Currently, NASA supports the International Space Station (ISS) along with the Commercial Crew Program and oversees the development of the Orion spacecraft and the Space Launch System for the lunar Artemis program.
2-2-2-2-2-2-2-2-2-2-2-

NASA's science division is focused on better understanding Earth through the Earth Observing System

In [52]:
out[0]

"\n\nNASA\n\n\nThe National Aeronautics and Space Administration (NASA /ˈnæsə/) is an independent agency of the US federal government responsible for the United States's civil space program, aeronautics research and space research. Established in 1958, it succeeded the National Advisory Committee for Aeronautics (NACA) to give the American space development effort a distinct civilian orientation, emphasizing peaceful applications in space science. It has since led most of America's space exploration programs, including Project Mercury, Project Gemini, the 1968–1972 Apollo program missions, the Skylab space station, and the Space Shuttle. Currently, NASA supports the International Space Station (ISS) along with the Commercial Crew Program and oversees the development of the Orion spacecraft and the Space Launch System for the lunar Artemis program."

In [53]:
out[1]

"\nNASA's science division is focused on better understanding Earth through the Earth Observing System; advancing heliophysics through the efforts of the Science Mission Directorate's Heliophysics Research Program; exploring bodies throughout the Solar System with advanced robotic spacecraft such as New Horizons and planetary rovers such as Perseverance; and researching astrophysics topics, such as the Big Bang, through the James Webb Space Telescope, the four Great Observatories, and associated programs. The Launch Services Program oversees launch operations for its uncrewed launches."

In [54]:
from llama_index.core.node_parser import NodeParser
from llama_index.core.schema import Document, TextNode
from typing import List, Sequence, Any

class SemanticChunkingNodeParser(NodeParser):
    def _parse_nodes(
        self,
        documents: Sequence[Document],
        **kwargs: Any
    ) -> List[TextNode]:
        """
        Parse documents into nodes using the chunk_by_llm function.

        Parameters:
        - documents: Sequence of Document objects to be parsed.
        - kwargs: Additional keyword arguments to be passed to chunk_by_llm.

        Returns:
        - List of TextNode objects resulting from the chunking process.
        """
        all_nodes = []
        for doc in documents:
            # Use chunk_by_llm with runtime parameters
            chunks = chunk_by_llm(doc.text, **kwargs)
            for chunk in chunks:
                node = TextNode(text=chunk, metadata=doc.metadata)
                all_nodes.append(node)
        return all_nodes



In [55]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("./data/qna").load_data()

parser = SemanticChunkingNodeParser(

)

nodes = parser.get_nodes_from_documents(documents)

In [56]:
def print_nodes(nodes):
    for i in nodes:
        print(i.text, end='\n\n')
        print('->'*33)

print_nodes(nodes)



**Q1: What is the surface temperature of the Sun?**
A1: The surface temperature of the Sun is approximately 5500°C (10,000°F). This is the temperature at the Sun's photosphere, which is the layer that we can see and is the source of sunlight.

->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->

**Q2: What is the Sun's role in our solar system?**
A2: The Sun is the center of our solar system and plays a crucial role in sustaining life on Earth. It provides light, heat, and energy to our planet through nuclear reactions that occur within its core.

->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->

**Q3: How long does it take for sunlight to reach Earth?**
A3: It takes approximately 8 minutes and 20 seconds for sunlight to reach Earth from the Sun. This is because the Sun is about 149.6 million kilometers (92.96 million miles) away from Earth.

->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->->

**Q4: What is the Sun's expected lifespan?

In [57]:
from llama_index.core.extractors import (
    TitleExtractor,
    KeywordExtractor,
    QuestionsAnsweredExtractor,
)

In [58]:

    
from llama_index.core.node_parser import MarkdownNodeParser

parser = MarkdownNodeParser()

ip = IngestionPipeline(
    transformations=[
        SemanticChunkingNodeParser(),
        QuestionsAnsweredExtractor(),
        # KeywordExtractor(),
        # FastEmbedEmbedding(cache_dir='./fastembed_weights'),

        
    ])

In [59]:
import logging
import sys
import os

import qdrant_client
from IPython.display import Markdown, display
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.vector_stores.types import BasePydanticVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings


In [60]:

ip = IngestionPipeline(
    transformations=[SemanticSplitterNodeParser(embed_model=Settings.embed_model), QuestionsAnsweredExtractor()])

In [61]:
documents = get_documents('./data/paul_graham', additional_metadata={
    'company_name': 'paul',
    'age':100,
    'location': 'wikipedia',
    'id':123,
    'chunk': 'big'
    })
nodes = ip.run(documents=documents)


  0%|          | 0/39 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  3%|▎         | 1/39 [00:01<01:05,  1.72s/it]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  5%|▌         | 2/39 [00:02<00:32,  1.13it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


  8%|▊         | 3/39 [00:03<00:37,  1.04s/it]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 13%|█▎        | 5/39 [00:03<00:18,  1.84it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 15%|█▌        | 6/39 [00:03<00:14,  2.23it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 18%|█▊        | 7/39 [00:04<00:18,  1.74it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 23%|██▎       | 9/39 [00:04<00:11,  2.70it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 26%|██▌       | 10/39 [00:05<00:09,  3.06it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 28%|██▊       | 11/39 [00:06<00:15,  1.83it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 31%|███       | 12/39 [00:06<00:14,  1.88it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 33%|███▎      | 13/39 [00:07<00:11,  2.17it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 36%|███▌      | 14/39 [00:07<00:10,  2.47it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 38%|███▊      | 15/39 [00:08<00:13,  1.75it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 41%|████      | 16/39 [00:08<00:12,  1.86it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 46%|████▌     | 18/39 [00:08<00:06,  3.04it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 49%|████▊     | 19/39 [00:09<00:09,  2.11it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 51%|█████▏    | 20/39 [00:10<00:09,  2.05it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 56%|█████▋    | 22/39 [00:10<00:05,  2.86it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 59%|█████▉    | 23/39 [00:11<00:05,  2.91it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 62%|██████▏   | 24/39 [00:11<00:07,  2.02it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 64%|██████▍   | 25/39 [00:12<00:05,  2.54it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 67%|██████▋   | 26/39 [00:12<00:04,  3.09it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 72%|███████▏  | 28/39 [00:13<00:04,  2.39it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 74%|███████▍  | 29/39 [00:13<00:03,  2.61it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 77%|███████▋  | 30/39 [00:13<00:03,  2.54it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 79%|███████▉  | 31/39 [00:14<00:03,  2.13it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 85%|████████▍ | 33/39 [00:14<00:02,  2.96it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 87%|████████▋ | 34/39 [00:15<00:02,  2.23it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 92%|█████████▏| 36/39 [00:16<00:01,  1.99it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 95%|█████████▍| 37/39 [00:17<00:00,  2.26it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


 97%|█████████▋| 38/39 [00:17<00:00,  2.09it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


100%|██████████| 39/39 [00:19<00:00,  1.99it/s]


In [62]:
for i in nodes:
    print(len(i.text), end=', ')

1233, 2481, 1400, 3557, 1102, 19, 972, 787, 1266, 10118, 305, 1005, 330, 656, 308, 3335, 925, 1080, 6193, 645, 1337, 351, 229, 6, 575, 551, 6, 4368, 4068, 296, 3800, 5954, 430, 1367, 836, 2530, 1143, 1378, 8072, 

In [63]:
len(nodes[9].text)

10118

In [64]:
nodes[9]

TextNode(id_='e0785300-a19b-49c9-b785-26d9b1eeaeb8', embedding=None, metadata={'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2025-07-11', 'last_modified_date': '2025-07-11', 'company_name': 'paul', 'age': 100, 'location': 'wikipedia', 'id': 123, 'chunk': 'big', 'questions_this_excerpt_can_answer': "1. What was the company name where the author worked and learned valuable lessons about technology companies being run by product people rather than sales people?\n2. What was the author's budget for everything else after paying rent in Florence, and how did his income change when he started working at Interleaf?\n3. What important lesson did the author learn at Interleaf that he later applied in both Viaweb and Y Combinator?\n4. What did the author learn in the color class he took at RISD, and why did he ultimately decide to drop out in 1993?\n5. How d

In [65]:
documents[0].metadata, nodes[0].metadata

({'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt',
  'file_name': 'paul_graham_essay.txt',
  'file_type': 'text/plain',
  'file_size': 75042,
  'creation_date': '2025-07-11',
  'last_modified_date': '2025-07-11',
  'company_name': 'paul',
  'age': 100,
  'location': 'wikipedia',
  'id': 123,
  'chunk': 'big'},
 {'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt',
  'file_name': 'paul_graham_essay.txt',
  'file_type': 'text/plain',
  'file_size': 75042,
  'creation_date': '2025-07-11',
  'last_modified_date': '2025-07-11',
  'company_name': 'paul',
  'age': 100,
  'location': 'wikipedia',
  'id': 123,
  'chunk': 'big',
  'questions_this_excerpt_can_answer': "1. What were the two main things the author worked on before college?\n2. What type of programs did the author try writing on the IBM 1401 in 9th grade?\n3. What language did the author use to write programs on the IBM 1401?\n4. Where was the IBM 1401 located in the 

In [66]:
documents2 = get_documents(
    './data/paul_graham',
    additional_metadata={
        # 'company_name': 'Convirza',
        # 'doctor': 'Dr. Smith',
        # 'location': 'usa',
        'id': 345,
        # 'age':200
        "chunk":'small'
        }
)


ip = IngestionPipeline(
    transformations=[SemanticChunkingNodeParser(), 
                    #  QuestionsAnsweredExtractor()
                     ]
                     )

nodes2 = ip.run(documents=documents2)

In [67]:
for i in nodes2:
    print(len(i.text), end=', ')

1941, 248, 229, 558, 588, 174, 623, 776, 293, 457, 292, 974, 619, 341, 396, 322, 72, 810, 482, 234, 271, 307, 811, 281, 418, 496, 603, 1279, 1204, 982, 307, 408, 911, 1075, 357, 475, 815, 531, 470, 490, 434, 604, 738, 392, 772, 268, 572, 1055, 5539, 2498, 1074, 2721, 1886, 876, 536, 1224, 419, 553, 1096, 488, 472, 887, 1025, 194, 403, 1283, 439, 1153, 615, 1081, 954, 855, 645, 893, 553, 1092, 474, 512, 510, 247, 857, 307, 191, 2231, 357, 850, 527, 312, 182, 638, 944, 236, 303, 237, 210, 368, 870, 233, 961, 642, 221, 262, 204, 760, 271, 324, 277, 170, 

In [68]:
len(nodes) , len(nodes2)

(39, 108)

In [69]:
nodes[0].metadata

{'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt',
 'file_name': 'paul_graham_essay.txt',
 'file_type': 'text/plain',
 'file_size': 75042,
 'creation_date': '2025-07-11',
 'last_modified_date': '2025-07-11',
 'company_name': 'paul',
 'age': 100,
 'location': 'wikipedia',
 'id': 123,
 'chunk': 'big',
 'questions_this_excerpt_can_answer': "1. What were the two main things the author worked on before college?\n2. What type of programs did the author try writing on the IBM 1401 in 9th grade?\n3. What language did the author use to write programs on the IBM 1401?\n4. Where was the IBM 1401 located in the author's school district?\n5. How did the author describe the environment where the IBM 1401 was located?"}

In [70]:
nodes2[0].metadata

{'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt',
 'file_name': 'paul_graham_essay.txt',
 'file_type': 'text/plain',
 'file_size': 75042,
 'creation_date': '2025-07-11',
 'last_modified_date': '2025-07-11',
 'id': 345,
 'chunk': 'small'}

In [71]:
import time

In [72]:
def get_qdrant_index(url:str, collection_name:str, api_key:str|None=None)-> VectorStoreIndex:

    client = qdrant_client.QdrantClient(
        # url=url,
        location=":memory:"

        # api_key=api_key or os.getenv('QDRANT_API_KEY0'),
        # check_compatibility=False
    )
    # client.create_payload_index(
    # collection_name=collection_name,
    # field_name="company_name",
    # field_schema=PayloadSchemaType.KEYWORD  # You may use TEXT or INTEGER if appropriate
    # )
    s = time.time()
    vector_store = QdrantVectorStore(client=client, collection_name=collection_name, enable_hybrid=True,)
    print(f"vector store took: {time.time()-s}")
    s = time.time()

    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    print(f"store context took: {time.time()-s}")

    s = time.time()

    index = VectorStoreIndex.from_vector_store(
        storage_context=storage_context,
        vector_store=vector_store,
    )
    print(f"index took: {time.time()-s}")

    return index

In [73]:
qurl= "https://3e782d13-9d96-405c-8dc0-7deb8bdba7a9.eu-west-2-0.aws.cloud.qdrant.io"

index = get_qdrant_index(qurl, 'collection_three')

Fetching 5 files: 100%|██████████| 5/5 [02:42<00:00, 32.47s/it]


vector store took: 168.86408805847168
store context took: 0.0005769729614257812
index took: 0.0012538433074951172


In [74]:
# def add_company_name_as_metadata()

In [75]:
def store(nodes:list[BaseNode], index:VectorStoreIndex) -> None:
    index.insert_nodes(nodes)
    return index


In [76]:
nodes[0]

TextNode(id_='45a4d06b-5fbb-4393-9af3-963049c46b24', embedding=None, metadata={'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2025-07-11', 'last_modified_date': '2025-07-11', 'company_name': 'paul', 'age': 100, 'location': 'wikipedia', 'id': 123, 'chunk': 'big', 'questions_this_excerpt_can_answer': "1. What were the two main things the author worked on before college?\n2. What type of programs did the author try writing on the IBM 1401 in 9th grade?\n3. What language did the author use to write programs on the IBM 1401?\n4. Where was the IBM 1401 located in the author's school district?\n5. How did the author describe the environment where the IBM 1401 was located?"}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type'

In [77]:
index = store(nodes=nodes, index=index)
index = store(nodes=nodes2, index=index)

  self._client.create_payload_index(


In [78]:
Settings.embed_model

FastEmbedEmbedding(model_name='BAAI/bge-base-en-v1.5', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x10dd34e10>, num_workers=None, embeddings_cache=None, max_length=512, cache_dir=None, threads=None, doc_embed_type='default', providers=None)

In [79]:
nodes[0]

TextNode(id_='45a4d06b-5fbb-4393-9af3-963049c46b24', embedding=None, metadata={'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2025-07-11', 'last_modified_date': '2025-07-11', 'company_name': 'paul', 'age': 100, 'location': 'wikipedia', 'id': 123, 'chunk': 'big', 'questions_this_excerpt_can_answer': "1. What were the two main things the author worked on before college?\n2. What type of programs did the author try writing on the IBM 1401 in 9th grade?\n3. What language did the author use to write programs on the IBM 1401?\n4. Where was the IBM 1401 located in the author's school district?\n5. How did the author describe the environment where the IBM 1401 was located?"}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type'

In [80]:
len(nodes)

39

In [81]:
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.query_engine import BaseQueryEngine

In [97]:
from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilters, MetadataFilter, FilterCondition
from qdrant_client.http.models import Filter, FieldCondition, MatchValue

# Define Qdrant filters
qdrant_filters = Filter(
    must=[
        FieldCondition(key="company_name", match=MatchValue(value="paul")),
        # FieldCondition(key="year", match=MatchValue(value=1994)),
    ]
)

def company_filter(company_name:str)-> Filter:
    return Filter(
        must=[
            FieldCondition(key="company_name", match=MatchValue(value=company_name)),
        ]
    )


from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="company_name", value="paul"),
    ]
)
# from qdrant_client.http.models import Filter, FieldCondition, MatchValue

# filters = Filter( # qdrant specific
#     must=[
#         FieldCondition(
#             key="id",
#             match=MatchValue(value=123),
#         )
#     ]
# )

# filters = MetadataFilters( # llama index specific
#     filters=[MetadataFilter(key='id', value=123)]
# )
# Apply filters to the retriever
# retriever: BaseRetriever = index.as_retriever(vector_store_kwargs={"qdrant_filters": filters})

from llama_index.core.vector_stores import FilterOperator, FilterCondition

from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

filters = MetadataFilters(
    filters=[
        MetadataFilter(key="age", operator=FilterOperator.EQ, value=100),
    ]
)

retriever: BaseRetriever = index.as_retriever(filters=filters)
# retriever: BaseRetriever = index.as_retriever(similarity_top_k=4)

# Perform retrieval
# results = retriever.retrieve("What is the story about?")

In [98]:
from llama_index.core.postprocessor.llm_rerank import LLMRerank

def rerank(query:str, nodes:list[BaseNode])-> list[BaseNode]:
    ranker = LLMRerank(
            choice_batch_size=5, top_n=3, llm=OpenAI(model="gpt-4o-mini")
        )
    new_nodes = ranker.postprocess_nodes(
            nodes, query_str=query)
        
    print(f"Reranked nodes to {len(new_nodes)}")
    return new_nodes

In [99]:
# retriever.retrieve('What were the two main things the author worked on before college')

query = 'What were the two main things the author worked on before college?'
query = 'What type of programs did the author try writing on the IBM 1401 in 9th grade?'
query = 'How did the individual manage to save enough money to go back to RISD and pay off their college loans while working at Interleaf?'
rnodes = retriever.retrieve(query)
print(len(rnodes))
rnodes

2


  return np.dot(vectors, query)
  return np.dot(vectors, query)
  return np.dot(vectors, query)


[NodeWithScore(node=TextNode(id_='e0785300-a19b-49c9-b785-26d9b1eeaeb8', embedding=None, metadata={'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2025-07-11', 'last_modified_date': '2025-07-11', 'company_name': 'paul', 'age': 100, 'location': 'wikipedia', 'id': 123, 'chunk': 'big', 'questions_this_excerpt_can_answer': "1. What was the company name where the author worked and learned valuable lessons about technology companies being run by product people rather than sales people?\n2. What was the author's budget for everything else after paying rent in Florence, and how did his income change when he started working at Interleaf?\n3. What important lesson did the author learn at Interleaf that he later applied in both Viaweb and Y Combinator?\n4. What did the author learn in the color class he took at RISD, and why did he ultimately decide to drop ou

In [100]:
ranked = rerank(query, rnodes)

for i in ranked:
    display(i)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
Reranked nodes to 1


NodeWithScore(node=TextNode(id_='e0785300-a19b-49c9-b785-26d9b1eeaeb8', embedding=None, metadata={'file_path': '/Users/tikendra/Work/rag/src/data/paul_graham/paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2025-07-11', 'last_modified_date': '2025-07-11', 'company_name': 'paul', 'age': 100, 'location': 'wikipedia', 'id': 123, 'chunk': 'big', 'questions_this_excerpt_can_answer': "1. What was the company name where the author worked and learned valuable lessons about technology companies being run by product people rather than sales people?\n2. What was the author's budget for everything else after paying rent in Florence, and how did his income change when he started working at Interleaf?\n3. What important lesson did the author learn at Interleaf that he later applied in both Viaweb and Y Combinator?\n4. What did the author learn in the color class he took at RISD, and why did he ultimately decide to drop out

In [96]:
rnodes[0].embedding

In [86]:
class Rag(BaseRag):
    def __init__(self, llm:LLM,url:str, api_key:str, collection_name:str, ingestion_pipeline:list[TransformComponent]|None=None, node_preprocessor:list[TransformComponent]|None=None):
        self.llm = llm
        self.ingestion_pipeline=ingestion_pipeline
        self.node_postprocessor=node_preprocessor
        self.index = self.setup_index(
            url=url,
            api_key=api_key,
            collection_name=collection_name
        )

    def setup_index(self, **kwargs) -> None:
        try:
            index = get_qdrant_index(
                url=kwargs.get('url'), 
                collection_name=kwargs.get('collection_name'), 
                api_key=kwargs.get('api_key')
                )
        except ValueError as e:
            raise e        



    def _query(self, id:str, query:str, k:int, **kwargs)-> str:
        return ''
    
    def _ingest(self, company_id:str, filepath:Path, **kwargs)-> None:

        store(nodes=nodes, index=self.index)

        return

In [87]:
from llama_index.core import Document
from llama_index.core.schema import MetadataMode

document = Document(
    text="This is a super-customized document",
    metadata={
        "file_name": "super_secret_document.txt",
        "category": "finance",
        "author": "LlamaIndex",
    },
    excluded_llm_metadata_keys=["file_name"],
    metadata_seperator="::",
    metadata_template="{key}=>{value}",
    text_template="Metadata: {metadata_str}\n-----\nContent: {content}",
)


In [1]:
from datetime import datetime

def now() -> str:
    """Returns the current timestamp in ISO format."""
    return datetime.now().isoformat()

In [5]:
now()

'2025-07-14T11:14:43.714828'

In [6]:
from llama_index.core.vector_stores.types import VectorStoreQuery, MetadataFilters, MetadataFilter, FilterCondition


In [7]:
def dict_to_metadatafilters(meta: dict) -> MetadataFilters:
    """
    Convert a dictionary to MetadataFilters, mapping each key-value pair to a MetadataFilter.
    """
    return MetadataFilters(filters=[
        MetadataFilter(key=k, value=v) for k, v in meta.items()
    ])

In [8]:
dict_to_metadatafilters({'company_name': 'paul', 'id': 123, 'age': 100, 'location': 'wikipedia', 'chunk': 'big'})

MetadataFilters(filters=[MetadataFilter(key='company_name', value='paul', operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='id', value=123, operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='age', value=100, operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='location', value='wikipedia', operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='chunk', value='big', operator=<FilterOperator.EQ: '=='>)], condition=<FilterCondition.AND: 'and'>)

In [9]:
MetadataFilters(filters=[
    MetadataFilter(key='company_name', value='paul'),
    MetadataFilter(key='id', value=123),
    MetadataFilter(key='age', value=100),
    MetadataFilter(key='location', value='wikipedia'),
    MetadataFilter(key='chunk', value='big')
])

MetadataFilters(filters=[MetadataFilter(key='company_name', value='paul', operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='id', value=123, operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='age', value=100, operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='location', value='wikipedia', operator=<FilterOperator.EQ: '=='>), MetadataFilter(key='chunk', value='big', operator=<FilterOperator.EQ: '=='>)], condition=<FilterCondition.AND: 'and'>)

In [13]:
f = {}

# f is None
{} is None

False