# Composable Indices Demo

In [None]:
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes. 
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.  
import nest_asyncio
nest_asyncio.apply()

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
from llama_index import (
    GPTSimpleVectorIndex,
    GPTEmptyIndex,
    GPTTreeIndex,
    GPTListIndex,
    SimpleDirectoryReader,
    ServiceContext,
)

### Load Datasets

Load PG's essay

In [None]:
# load PG's essay
essay_documents = SimpleDirectoryReader('../paul_graham_essay/data/').load_data()

### Building the document indices
- Build a vector index for PG's essay
- Also build an empty index (to store prior knowledge)

In [None]:
# configure
service_context = ServiceContext.from_defaults(chunk_size_limit=512)

# build essay index
essay_index = GPTSimpleVectorIndex.from_documents(essay_documents, service_context=service_context)
empty_index = GPTEmptyIndex()

In [None]:
essay_index.save_to_disk('index_pg.json')

### Loading the indices
Build a vector index for PG's essay, build empty index.

In [None]:
# try loading
essay_index = GPTSimpleVectorIndex.load_from_disk('index_pg.json')
empty_index = GPTEmptyIndex()

### Query Indices
See the response of querying each index

In [None]:
response = essay_index.query(
    "Tell me about what Sam Altman did during his time in YC",
    similarity_top_k=3,
    response_mode="tree_summarize"
)

In [None]:
print(str(response))

In [None]:
response = empty_index.query(
    "Tell me about what Sam Altman did during his time in YC",
)

In [None]:
print(str(response))

Define summary for each index.

In [None]:
essay_index_summary = "This document describes Paul Graham's life, from early adulthood to the present day."
empty_index_summary = "This can be used for general knowledge purposes."

### Define Graph (List Index as Parent Index)

This allows us to synthesize responses both using a knowledge corpus as well as prior knowledge.

In [None]:
from llama_index.indices.composability import ComposableGraph

In [None]:
# set query config
query_configs = [
    {
        "index_struct_type": "simple_dict",
        "query_mode": "default",
        "query_kwargs": {
            "similarity_top_k": 3,
            "response_mode": "tree_summarize"
        }
    },
]

In [None]:
graph = ComposableGraph.from_indices(
    GPTListIndex,
    [essay_index, empty_index], 
    index_summaries=[essay_index_summary, empty_index_summary]
)

In [None]:
# [optional] save to disk
graph.save_to_disk("index_graph.json")

In [None]:
# [optional] load from disk
graph = ComposableGraph.load_from_disk("index_graph.json")

In [None]:
# set Logging to DEBUG for more detailed outputs
# ask it a question about Sam Altman
response = graph.query(
    "Tell me about what Sam Altman did during his time in YC", 
    query_configs=query_configs,
)

In [None]:
print(str(response))

In [None]:
# Get source of response
print(response.get_formatted_sources())

### Define Graph (Tree Index as Parent Index)

This allows us to "route" a query to either a knowledge-augmented index, or to the LLM itself.

In [None]:
from llama_index.indices.composability import ComposableGraph

In [None]:
# set query config
query_configs = [
    {
        "index_struct_type": "simple_dict",
        "query_mode": "default",
        "query_kwargs": {
            "similarity_top_k": 3,
            "response_mode": "tree_summarize"
        }
    },
]

In [None]:
graph2 = ComposableGraph.from_indices(
    GPTTreeIndex,
    [essay_index, empty_index],
    index_summaries=[essay_index_summary, empty_index_summary]
)

In [None]:
# [optional] save to disk
graph2.save_to_disk("index_graph2.json")

In [None]:
# [optional] load from disk
graph2 = ComposableGraph.load_from_disk("index_graph2.json")

In [None]:
# set Logging to DEBUG for more detailed outputs
# ask it a question about NYC 
response = graph2.query(
    "Tell me about what Paul Graham did growing up?", 
    query_configs=query_configs
)

In [None]:
str(response)

In [None]:
print(response.get_formatted_sources())

In [None]:
response = graph2.query(
    "Tell me about Barack Obama", 
    query_configs=query_configs
)

In [None]:
str(response)

In [None]:
response.get_formatted_sources()