In [1]:
import os
import openai
from dotenv import load_dotenv
from langchain.llms import OpenAI
from llama_index.vector_stores import SimpleVectorStore
from llama_index.indices.composability import ComposableGraph
from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
from llama_index.query_engine.transform_query_engine import TransformQueryEngine
from llama_index.callbacks import CallbackManager, LlamaDebugHandler
from llama_index import (
    Prompt,
    LLMPredictor,
    GPTSimpleKeywordTableIndex,
    VectorStoreIndex, 
    SimpleDirectoryReader, 
    ServiceContext, 
    StorageContext, 
    load_index_from_storage,
)

In [2]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
# initialize model, service and storage context
text_davinci = OpenAI(model="text-davinci-003", temperature=0.6, max_tokens=2048)
llm_predictor = LLMPredictor(text_davinci)

llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, callback_manager=callback_manager)
storage_context = StorageContext.from_defaults(vector_store=SimpleVectorStore())

print("LLM loaded!")

LLM loaded!


In [4]:
# load all pdf documents
data_dir = './data/'
pdf_docs = {}

for file in os.listdir(data_dir):
    fname = file.split('.')[0]
    pdf_docs[fname] = SimpleDirectoryReader(input_files=[data_dir + file]).load_data()
    
print('Documents loaded!')

Documents loaded!


In [5]:
# build pdf document indices
pdf_indices = {}
index_summaries = {}

for title in pdf_docs.keys():
    pdf_indices[title] = VectorStoreIndex.from_documents(pdf_docs[title], service_context=service_context, storage_context=storage_context)
    
    # set summary text for documents
    index_summaries[title] = f"Technical paper about {title}"
    
print('Indices built!')

**********
Trace: index_construction
    |_node_parsing ->  0.14852 seconds
      |_chunking ->  0.00281 seconds
      |_chunking ->  0.00155 seconds
      |_chunking ->  0.001759 seconds
      |_chunking ->  0.002081 seconds
      |_chunking ->  0.001851 seconds
      |_chunking ->  0.001887 seconds
      |_chunking ->  0.001941 seconds
      |_chunking ->  0.001614 seconds
      |_chunking ->  0.001467 seconds
      |_chunking ->  0.000672 seconds
      |_chunking ->  0.00125 seconds
      |_chunking ->  0.00125 seconds
      |_chunking ->  0.001619 seconds
      |_chunking ->  0.000681 seconds
      |_chunking ->  0.000862 seconds
      |_chunking ->  0.001261 seconds
      |_chunking ->  0.000923 seconds
      |_chunking ->  0.000763 seconds
      |_chunking ->  0.000857 seconds
      |_chunking ->  0.000706 seconds
      |_chunking ->  0.00089 seconds
      |_chunking ->  0.001398 seconds
      |_chunking ->  0.001038 seconds
      |_chunking ->  0.001096 seconds
      |_chunking 

In [6]:
# create composable graph
graph = ComposableGraph.from_indices(
   root_index_cls=GPTSimpleKeywordTableIndex,
   children_indices=[index for _, index in pdf_indices.items()],
   index_summaries=[summary for _, summary in index_summaries.items()],
   max_keywords_per_chunk=100
)

decompose_transform = DecomposeQueryTransform(llm_predictor, verbose=True)

print('Graph created!')

Graph created!


In [7]:
# create query engine mapping
custom_query_engines = {}

for index in pdf_indices.values():
    query_engine = index.as_query_engine(service_context=service_context)
    transform_extra_info = {'index_summary': index.index_struct.summary}
    tranformed_query_engine = TransformQueryEngine(query_engine, decompose_transform, 
                                                    transform_metadata=transform_extra_info)
    custom_query_engines[index.index_id] = tranformed_query_engine
    
print('Query engine mapped!')

Query engine mapped!


In [8]:
# create root query engine in summarize mode
custom_query_engines[graph.root_index.index_id] = graph.root_index.as_query_engine(
   retriever_mode='simple',
   response_mode='tree_summarize',
   service_context=service_context
)

query_engine_decompose = graph.as_query_engine(custom_query_engines=custom_query_engines)

print('Root query engine created!')

Root query engine created!


In [14]:
# query over multiple documents
query = "What is three-step phase shifting and what are the steps involved in the algorithm? Explain in simple terms."
response = query_engine_decompose.query(query)
print(response)

[33;1m[1;3m> Current query: What is three-step phase shifting and what are the steps involved in the algorithm? Explain in simple terms.
[0m[38;5;200m[1;3m> New query:  What is the Fast-Three-Step-Phase-Shifting-Algorithm and how does it work?
[0m[33;1m[1;3m> Current query: What is three-step phase shifting and what are the steps involved in the algorithm? Explain in simple terms.
[0m[38;5;200m[1;3m> New query:  What is the Fast-Three-Step-Phase-Shifting-Algorithm and what are the steps involved? Explain in simple terms.
[0m[33;1m[1;3m> Current query: What is three-step phase shifting and what are the steps involved in the algorithm? Explain in simple terms.
[0m[38;5;200m[1;3m> New query:  What are the three steps involved in the Phase-Shifting-Algorithm used in Fringe-Projection-Profilometry?
[0m[33;1m[1;3m> Current query: What is three-step phase shifting and what are the steps involved in the algorithm? Explain in simple terms.
[0m[38;5;200m[1;3m> New query:  

In [None]:
while True:
    query = input('>>> ')
    response = query_engine_decompose.query(query)
    print(response)

>>> what is phase unwrapping and why is it needed?
[33;1m[1;3m> Current query: what is phase unwrapping and why is it needed?
[0m[38;5;200m[1;3m> New query:  What is the Fast-Three-Step-Phase-Shifting-Algorithm and how does it relate to phase unwrapping?
[0m[33;1m[1;3m> Current query: what is phase unwrapping and why is it needed?
[0m[38;5;200m[1;3m> New query:  What is the Fast-Three-Step-Phase-Shifting-Algorithm and how does it use phase unwrapping?
[0m