<a href="https://colab.research.google.com/github/jerryjliu/llama_index/blob/main/docs/examples/embeddings/together.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multi-Document RAG with Long-Context Embeddings (Together.ai) 

This notebook shows how to use long-context together.ai models for hierarchical retrieval + RAG. We index each document by running the embedding model over the entire document text. We then link each document to an underlying top-k based query engine for that document.

Visit https://together.ai and sign up to get an API key.

## Setup and Download Data

In [1]:
domain = "docs.llamaindex.ai"
docs_url = "https://docs.llamaindex.ai/en/latest/"
!wget -e robots=off --recursive --no-clobber --page-requisites --html-extension --convert-links --restrict-file-names=windows --domains {domain} --no-parent {docs_url}

Both --no-clobber and --convert-links were specified, only --convert-links will be used.
--2024-01-19 15:20:54--  https://docs.llamaindex.ai/en/latest/
Resolving docs.llamaindex.ai (docs.llamaindex.ai)... 104.18.0.163, 104.18.1.163
Connecting to docs.llamaindex.ai (docs.llamaindex.ai)|104.18.0.163|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘docs.llamaindex.ai/en/latest/index.html’

docs.llamaindex.ai/     [ <=>                ] 216.44K  --.-KB/s    in 0.01s   

2024-01-19 15:20:55 (15.9 MB/s) - ‘docs.llamaindex.ai/en/latest/index.html’ saved [221633]

--2024-01-19 15:20:55--  https://docs.llamaindex.ai/en/latest/genindex.html
Reusing existing connection to docs.llamaindex.ai:443.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘docs.llamaindex.ai/en/latest/genindex.html’

docs.llamaindex.ai/     [ <=>                ] 971.24K  --.-KB/s    in 0.06s   

2024-01-19 15:20:55 (14.8 MB

In [2]:
from llama_hub.file.unstructured.base import UnstructuredReader
from pathlib import Path
from llama_index.llms import OpenAI
from llama_index import ServiceContext
from llama_index import Document

In [3]:
reader = UnstructuredReader()
all_files_gen = Path("./docs.llamaindex.ai/").rglob("*")
all_files = [f.resolve() for f in all_files_gen]
all_html_files = [f for f in all_files if f.suffix.lower() == ".html"]


# TODO: set to higher value if you want more docs
doc_limit = 100

docs = []
for idx, f in enumerate(all_html_files):
    if idx > doc_limit:
        break
    print(f"Idx {idx}/{len(all_html_files)}")
    loaded_docs = reader.load_data(file=f, split_documents=True)
    # Hardcoded Index. Everything before this is ToC for all pages
    start_idx = 72
    loaded_doc = Document(
        text="\n\n".join([d.get_content() for d in loaded_docs[72:]]),
        metadata={"path": str(f)},
    )
    print(loaded_doc.metadata["path"])
    docs.append(loaded_doc)

[nltk_data] Downloading package punkt to /Users/jerryliu/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/jerryliu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Idx 0/691
/Users/jerryliu/Programming/gpt_index/docs/examples/embeddings/docs.llamaindex.ai/en/latest/index.html
Idx 1/691
/Users/jerryliu/Programming/gpt_index/docs/examples/embeddings/docs.llamaindex.ai/en/latest/genindex.html
Idx 2/691
/Users/jerryliu/Programming/gpt_index/docs/examples/embeddings/docs.llamaindex.ai/en/latest/search.html
Idx 3/691
/Users/jerryliu/Programming/gpt_index/docs/examples/embeddings/docs.llamaindex.ai/en/latest/contributing/documentation.html
Idx 4/691
/Users/jerryliu/Programming/gpt_index/docs/examples/embeddings/docs.llamaindex.ai/en/latest/contributing/contributing.html
Idx 5/691
/Users/jerryliu/Programming/gpt_index/docs/examples/embeddings/docs.llamaindex.ai/en/latest/changes/changelog.html
Idx 6/691
/Users/jerryliu/Programming/gpt_index/docs/examples/embeddings/docs.llamaindex.ai/en/latest/changes/deprecated_terms.html
Idx 7/691
/Users/jerryliu/Programming/gpt_index/docs/examples/embeddings/docs.llamaindex.ai/en/latest/understanding/understanding.htm

## Building Multi-Document Hierarchical Retrieval

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [None]:
!pip install llama-index

In [4]:
# You can set the API key in the embeddings or env
# import os
# os.environ["TOEGETHER_API_KEY"] = "your-api-key"

from llama_index.embeddings import TogetherEmbedding

api_key = "69830899a4a7bb86b9abbdfea76a114bfbabcf66bde48956a3ecf789256e7601"

embed_model_long = TogetherEmbedding(
    model_name="togethercomputer/m2-bert-80M-32k-retrieval", api_key=api_key
)
embed_model_short = TogetherEmbedding(
    model_name="togethercomputer/m2-bert-80M-8k-retrieval", api_key=api_key
)

llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
sc_long = ServiceContext.from_defaults(llm=llm, embed_model=embed_model_long)
sc_short = ServiceContext.from_defaults(llm=llm, embed_model=embed_model_short)

In [7]:
from llama_index.schema import IndexNode
from llama_index import load_index_from_storage, StorageContext, VectorStoreIndex
from llama_index.node_parser import SentenceSplitter
import os
from tqdm.notebook import tqdm
import pickle


async def build_index_per_doc(nodes, file_base):
    print(file_base)

    vi_out_path = f"./data/llamaindex_docs/{file_base}"
    if not os.path.exists(vi_out_path):
        Path("./data/llamaindex_docs/").mkdir(parents=True, exist_ok=True)
        # build vector index
        vector_index = VectorStoreIndex(nodes, service_context=sc_short)
        vector_index.storage_context.persist(persist_dir=vi_out_path)
    else:
        vector_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=vi_out_path),
            service_context=sc_short,
        )

    # define query engines
    vector_query_engine = vector_index.as_query_engine()

    return vector_query_engine

async def build_hierarchical_index(docs):
    node_parser = SentenceSplitter()

    # Build agents dictionary
    query_engine_dict = {}
    index_node_dict = {}
    extra_info_dict = {}

    index_nodes = []

    # # this is for the baseline
    # all_nodes = []

    for idx, doc in enumerate(tqdm(docs)):
        nodes = node_parser.get_nodes_from_documents([doc])
        # all_nodes.extend(nodes)

        # ID will be base + parent
        file_path = Path(doc.metadata["path"])
        file_base = str(file_path.parent.stem) + "_" + str(file_path.stem)

        # get query engine
        vector_query_engine = await build_index_per_doc(nodes, file_base)
        query_engine_dict[file_base] = vector_query_engine


        # get document embedding
        doc_embedding = sc_long.embed_model.get_text_embedding(doc.get_content())
        index_node = IndexNode(
            text=str(file_path), index_id=str(file_path), obj=vector_query_engine
        )
        index_nodes.append(index_node)
        # extra_info_dict[file_base] = {"file_path": file_path}


    top_index = VectorStoreIndex(index_nodes, service_context=sc_long)

    return top_index

In [8]:
top_index = await build_hierarchical_index(docs)

  0%|          | 0/101 [00:00<?, ?it/s]

latest_index
latest_genindex
latest_search


ValueError: Request failed with status code 400: {"error":"Input required"}

In [None]:
top_query_engine = top_index.as_query_engine(verbose=True, similarity_top_k=2)

## Run Some Queries

In [6]:
response = top_query_engine.query(
    "Tell me about the different types of evaluation in LlamaIndex"
)
print(str(response))

In [None]:
response = top_agent.query(
    "Can you tell me more about some advanced retrieval strategies?"
)
print(str(response))