# Show tqdm progress bars for all primrary index creation operations

When creating an index, you can optionally set the `show_progress` flag from the `from_documents` index creation call to see tqdm progress bars for the slowest parts of the indexing process (e.g parsing nodes from a document, creating embeddings...etc.)

`KeywordTableIndex.from_documents(documents=documents, show_progress=True)`

Install and upgrade `ipywidgets` if the tqdm progress bars don't look like the image above.

`pip install ipywidgets --upgrade`

`jupyter nbextension enable --py widgetsnbextension`

run `jupyter notebook` from the root directory to have access to the `paul_graham` data in the `/examples` folder.

In [None]:
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    get_response_synthesizer,
    DocumentSummaryIndex,
    LLMPredictor,
    ServiceContext,
    KeywordTableIndex,
    KnowledgeGraphIndex,
    SummaryIndex,
    TreeIndex,
)
import os
import openai
from llama_index.llms import OpenAI, MockLLM
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import SimpleGraphStore

In [None]:
# Set environment variable
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY_HERE"
openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
# Load documents
documents = SimpleDirectoryReader("../../../examples/data/paul_graham").load_data()

### VectorStoreIndex

In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
print("\nVectorStoreIndex with show_progress=True\n")
VectorStoreIndex.from_documents(documents, show_progress=True)

print("\nVectorStoreIndex with show_progress=False\n")
VectorStoreIndex.from_documents(documents, show_progress=False)

print("\nVectorStoreIndex with show_progress=True, use_async=True\n")
VectorStoreIndex.from_documents(documents, show_progress=True, use_async=True)

# print("\nVectorStoreIndex with show_progress=True, use_async=False\n")
# VectorStoreIndex.from_documents(documents, show_progress=False, use_async=False)


VectorStoreIndex with show_progress=True



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  2.78it/s]
Generating embeddings: 100%|██████████| 20/20 [00:01<00:00, 12.04it/s]



VectorStoreIndex with show_progress=False


VectorStoreIndex with show_progress=True, use_async=True



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  2.82it/s]
Generating embeddings: 100%|██████████| 2/2 [00:01<00:00,  1.39it/s]


<llama_index.indices.vector_store.base.VectorStoreIndex at 0x105a5b370>

### DocumentSummaryIndex

In [None]:
llm_chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")

service_context = ServiceContext.from_defaults(llm=llm_chatgpt, chunk_size=1024)

print("\nDocumentSummaryIndex with show_progress=True\n")
response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize", use_async=True, service_context=service_context
)
DocumentSummaryIndex.from_documents(
    documents,
    service_context=service_context,
    response_synthesizer=response_synthesizer,
    show_progress=True,
)

print("\nDocumentSummaryIndex with show_progress=False\n")
DocumentSummaryIndex.from_documents(
    documents,
    service_context=service_context,
    response_synthesizer=response_synthesizer,
    show_progress=False,
)


DocumentSummaryIndex with show_progress=True



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  2.09it/s]
Summarizing documents:   0%|          | 0/1 [00:00<?, ?it/s]

current doc id: 0e06a6b5-e808-4508-8051-63458a29c196


Summarizing documents: 100%|██████████| 1/1 [00:15<00:00, 15.24s/it]



DocumentSummaryIndex with show_progress=False

current doc id: 0e06a6b5-e808-4508-8051-63458a29c196


<llama_index.indices.document_summary.base.DocumentSummaryIndex at 0x13acbb4c0>

### KeywordTableIndex

In [None]:
print("\nKeywordTableIndex with show_progress=True, use_async=True\n")
KeywordTableIndex.from_documents(
    documents=documents, show_progress=True, use_async=True
)

print("\nKeywordTableIndex with show_progress=True, use_async=False\n")
KeywordTableIndex.from_documents(
    documents=documents, show_progress=True, use_async=False
)

print("\nKeywordTableIndex with show_progress=False, use_async=True\n")
KeywordTableIndex.from_documents(documents=documents, use_async=True)

print("\nKeywordTableIndex with show_progress=False, use_async=False\n")
KeywordTableIndex.from_documents(documents=documents)


KeywordTableIndex with show_progress=True, use_async=True



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  2.25it/s]
Extracting keywords from nodes: 100%|██████████| 20/20 [00:54<00:00,  2.71s/it]



KeywordTableIndex with show_progress=True, use_async=False



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  3.29it/s]
Extracting keywords from nodes: 100%|██████████| 20/20 [00:46<00:00,  2.31s/it]



KeywordTableIndex with show_progress=False, use_async=True


KeywordTableIndex with show_progress=False, use_async=False



<llama_index.indices.keyword_table.base.KeywordTableIndex at 0x12fbbb250>

### KnowledgeGraphIndex

In [None]:
print("\nKnowledgeGraphIndex with show_progress=True, use_async=False\n")
llm = OpenAI(temperature=0, model="text-davinci-002")
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)
graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)
KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    storage_context=storage_context,
    service_context=service_context,
    show_progress=True,
    use_async=False,
)

print("\nKnowledgeGraphIndex with show_progress=True, use_async=True\n")
llm = OpenAI(temperature=0, model="text-davinci-002")
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)
graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)
KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    storage_context=storage_context,
    service_context=service_context,
    show_progress=True,
    use_async=True,
)


KnowledgeGraphIndex with show_progress=True, use_async=False



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  1.86it/s]
Processing nodes: 100%|██████████| 40/40 [00:30<00:00,  1.30it/s]



KnowledgeGraphIndex with show_progress=True, use_async=True



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  2.09it/s]
Processing nodes: 100%|██████████| 40/40 [00:27<00:00,  1.47it/s]


<llama_index.indices.knowledge_graph.base.KnowledgeGraphIndex at 0x2c907d460>

### SummaryIndex

In [None]:
print("\nSummaryIndex with show_progress=True\n")
SummaryIndex.from_documents(documents=documents, show_progress=True)

print("\nSummaryIndex with show_progress=False\n")
SummaryIndex.from_documents(documents=documents)


ListIndex with show_progress=True



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  1.86it/s]



ListIndex with show_progress=False



<llama_index.indices.list.base.ListIndex at 0x12fbba3d0>

### TreeIndex

In [None]:
print("\nTreeIndex with show_progress=True,  use_async=True\n")
llm = MockLLM(max_tokens=256)
service_context = ServiceContext.from_defaults(llm=llm)
TreeIndex.from_documents(
    documents, service_context=service_context, show_progress=True, use_async=True
)

print("\nTreeIndex with show_progress=True, use_async=False\n")
TreeIndex.from_documents(
    documents, service_context=service_context, show_progress=True, use_async=False
)

print("\nTreeIndex with show_progress=False, use_async=True\n")
TreeIndex.from_documents(documents, service_context=service_context, use_async=True)

print("\nTreeIndex with show_progress=False, use_async=False\n")
TreeIndex.from_documents(documents, service_context=service_context)


TreeIndex with show_progress=True,  use_async=True



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  1.80it/s]
Generating summaries: 100%|██████████| 2/2 [00:00<00:00, 624.62it/s]



TreeIndex with show_progress=True, use_async=False



Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  2.59it/s]
Generating summaries: 100%|██████████| 2/2 [00:00<00:00, 651.29it/s]



TreeIndex with show_progress=False, use_async=True


TreeIndex with show_progress=False, use_async=False



<llama_index.indices.tree.base.TreeIndex at 0x13a2f3070>