In [1]:
from dotenv import load_dotenv
load_dotenv()

True

# Part 12: Multi-Representation indexing

In [2]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

loader = WebBaseLoader("https://lilianweng.github.io/posts/2024-02-05-human-data-quality/")
docs.extend(loader.load())


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
import uuid

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

chain = (
    {'doc': lambda x: x.page_content}
    | ChatPromptTemplate.from_template("Sumarize the following document:\n\n{doc}")
    | llm
    | StrOutputParser()
)
summaries = chain.batch(docs, {'max_concurrency': 5})

In [4]:
summaries

['This document is a comprehensive overview of LLM-powered autonomous agents. It outlines the key components of such systems: planning (task decomposition and self-reflection), memory (short-term and long-term), and tool use (leveraging external APIs). The document explores various techniques for each component, including Chain of Thought, Tree of Thoughts, ReAct, Reflexion, Chain of Hindsight, and Algorithm Distillation for planning; different types of memory and MIPS algorithms like LSH, ANNOY, HNSW, FAISS, and ScaNN for memory; and MRKL, TALM, Toolformer, HuggingGPT, and API-Bank for tool use. It also presents case studies like ChemCrow and Generative Agents to illustrate real-world applications. Finally, the document discusses the challenges facing LLM-powered agents, including finite context length, difficulties in long-term planning, and the reliability of natural language interfaces.',
 "This Lil'Log post by Lilian Weng discusses the importance of high-quality human-annotated da

In [6]:
!pip install langchain_chroma



In [7]:
from langchain.storage import InMemoryByteStore
from langchain_chroma import Chroma
from langchain.retrievers.multi_vector import MultiVectorRetriever

vectorstore = Chroma(collection_name="summaries",
                     embedding_function=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

store = InMemoryByteStore()
id_key = 'doc_id'

retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key
)
doc_ids = [str(uuid.uuid4) for _ in docs]

summary_docs = [
    Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(summaries)
]
retriever.vectorstore.add_documents(summary_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

In [8]:
query = "Memory in agents"
sub_docs = vectorstore.similarity_search(query, k=1)
sub_docs[0]

Document(metadata={'doc_id': '<function uuid4 at 0x0000029A8EA58400>'}, page_content='This document is a comprehensive overview of LLM-powered autonomous agents. It outlines the key components of such systems: planning (task decomposition and self-reflection), memory (short-term and long-term), and tool use (leveraging external APIs). The document explores various techniques for each component, including Chain of Thought, Tree of Thoughts, ReAct, Reflexion, Chain of Hindsight, and Algorithm Distillation for planning; different types of memory and MIPS algorithms like LSH, ANNOY, HNSW, FAISS, and ScaNN for memory; and MRKL, TALM, Toolformer, HuggingGPT, and API-Bank for tool use. It also presents case studies like ChemCrow and Generative Agents to illustrate real-world applications. Finally, the document discusses the challenges facing LLM-powered agents, including finite context length, difficulties in long-term planning, and the reliability of natural language interfaces.')

In [13]:
retrived_docs = retriever.invoke(query, n_results=1)
print(retrived_docs[0].page_content[:200])







Thinking about High-Quality Human Data | Lil'Log







































Lil'Log

















|






Posts




Archive




Search




Tags




FAQ









      Thinking abo


# Part 13: Raptor

# ColBERT

In [14]:
%pip install -U ragatouille

Collecting ragatouille
  Downloading ragatouille-0.0.9.post2-py3-none-any.whl.metadata (28 kB)
Collecting llama-index (from ragatouille)
  Downloading llama_index-0.12.41-py3-none-any.whl.metadata (12 kB)
Collecting faiss-cpu (from ragatouille)
  Downloading faiss_cpu-1.11.0-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting colbert-ai>=0.2.19 (from ragatouille)
  Downloading colbert_ai-0.2.21-py3-none-any.whl.metadata (12 kB)
Collecting onnx (from ragatouille)
  Downloading onnx-1.18.0-cp311-cp311-win_amd64.whl.metadata (7.0 kB)
Collecting srsly (from ragatouille)
  Downloading srsly-2.5.1-cp311-cp311-win_amd64.whl.metadata (20 kB)
Collecting voyager (from ragatouille)
  Downloading voyager-2.1.0-cp311-cp311-win_amd64.whl.metadata (6.0 kB)
Collecting fast-pytorch-kmeans (from ragatouille)
  Downloading fast_pytorch_kmeans-0.2.2-py3-none-any.whl.metadata (1.1 kB)
Collecting bitarray (from colbert-ai>=0.2.19->ragatouille)
  Downloading bitarray-3.4.2-cp311-cp311-win_amd64.whl.metada

In [16]:
from ragatouille import RAGPretrainedModel
RAG = RAGPretrainedModel.from_pretrained('colbert-ir/colbertv2.0')




artifact.metadata:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/405 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

  self.scaler = torch.cuda.amp.GradScaler()


In [17]:
import requests

def get_wikipedia_page(title: str):
    """
    Retrieve the full text content of a Wikipedia page.

    :param title: str - Title of the Wikipedia page.
    :return: str - Full text content of the page as raw string.
    """
    # Wikipedia API endpoint
    URL = "https://en.wikipedia.org/w/api.php"

    # Parameters for the API request
    params = {
        "action": "query",
        "format": "json",
        "titles": title,
        "prop": "extracts",
        "explaintext": True,
    }

    # Custom User-Agent header to comply with Wikipedia's best practices
    headers = {"User-Agent": "RAGatouille_tutorial/0.0.1 (ben@clavie.eu)"}

    response = requests.get(URL, params=params, headers=headers)
    data = response.json()

    # Extracting page content
    page = next(iter(data["query"]["pages"].values()))
    return page["extract"] if "extract" in page else None

full_document = get_wikipedia_page("Hayao_Miyazaki")

In [18]:
RAG.index(
    collection=[full_document],
    index_name="Miyazali-123",
    max_document_length=100,
    split_documents=True
)

This is a behaviour change from RAGatouille 0.8.0 onwards.
This works fine for most users and smallish datasets, but can be considerably slower than FAISS and could cause worse results in some situations.
If you're confident with FAISS working on your machine, pass use_faiss=True to revert to the FAISS-using behaviour.
--------------------


[Jun 11, 12:20:29] #> Creating directory .ragatouille/colbert\indexes/Miyazali-123 






[Jun 11, 12:20:33] [0] 		 #> Encoding 218 passages..


  self.scaler = torch.cuda.amp.GradScaler()
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()


[Jun 11, 12:20:47] [0] 		 avg_doclen_est = 69.22935485839844 	 len(local_sample) = 218
[Jun 11, 12:20:47] [0] 		 Creating 1,024 partitions.
[Jun 11, 12:20:47] [0] 		 *Estimated* 15,091 embeddings.
[Jun 11, 12:20:47] [0] 		 #> Saving the indexing plan to .ragatouille/colbert\indexes/Miyazali-123\plan.json ..
used 20 iterations (73.9505s) to cluster 14338 items into 1024 clusters
[Jun 11, 12:22:01] Loading decompress_residuals_cpp extension (set COLBERT_LOAD_TORCH_EXTENSION_VERBOSE=True for more info)...


If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


PyTorch-based indexing did not succeed with error: Command '['where', 'cl']' returned non-zero exit status 1. ! Reverting to using FAISS and attempting again...
________________________________________________________________________________
 This means that indexing will be slow. To make use of your GPU.
Please install `faiss-gpu` by running:
pip uninstall --y faiss-cpu & pip install faiss-gpu
 ________________________________________________________________________________
Will continue with CPU indexing in 5 seconds...


[Jun 11, 12:22:07] #> Note: Output directory .ragatouille/colbert\indexes/Miyazali-123 already exists


[Jun 11, 12:22:07] #> Will delete 1 files already at .ragatouille/colbert\indexes/Miyazali-123 in 20 seconds...
[Jun 11, 12:22:31] [0] 		 #> Encoding 218 passages..
[Jun 11, 12:22:39] [0] 		 avg_doclen_est = 69.22935485839844 	 len(local_sample) = 218
[Jun 11, 12:22:39] [0] 		 Creating 1,024 partitions.
[Jun 11, 12:22:39] [0] 		 *Estimated* 15,091 embeddings.
[Jun

ImportError: DLL load failed while importing decompress_residuals_cpp: The specified module could not be found.

In [19]:
result = RAG.search(query="What animation studio did Miyazaki found?", k=3)
result

AssertionError: 

In [20]:
retriever = RAG.as_langchain_retriever(k=3)
retriever.invoke("What animation studio did Miyazaki found?")

AssertionError: 