In [1]:
import nest_asyncio
nest_asyncio.apply()

In [2]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [4]:
from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor, StorageContext
from llama_index import GPTVectorStoreIndex, GPTListIndex, GPTSimpleKeywordTableIndex
from llama_index.composability import ComposableGraph
from langchain.chat_models import ChatOpenAI
from llama_index.response.notebook_utils import display_response

#### Load Documents

In [5]:
reader = SimpleDirectoryReader('../paul_graham_essay/data')
documents = reader.load_data()

#### Parse into Nodes

In [6]:
from llama_index.node_parser import SimpleNodeParser
nodes = SimpleNodeParser().get_nodes_from_documents(documents)

#### Add to Docstore

In [None]:
TABLE_NAME = os.environ["DYNAMODB_TABLE_NAME"]

In [None]:
from llama_index.storage.docstore.dynamodb_docstore import DynamoDBDocumentStore
from llama_index.storage.index_store.dynamodb_index_store import DynamoDBIndexStore

In [7]:
storage_context = StorageContext.from_defaults(
    docstore=DynamoDBDocumentStore.from_table_name(table_name=TABLE_NAME),
    index_store=DynamoDBIndexStore.from_table_name(table_name=TABLE_NAME)
)

In [None]:
storage_context.docstore.add_documents(nodes)

#### Define Multiple Indexes

Each index uses the same underlying Node.

In [None]:
list_index = GPTListIndex(nodes, storage_context=storage_context)

In [None]:
vector_index = GPTVectorStoreIndex(nodes, storage_context=storage_context)

In [None]:
keyword_table_index = GPTSimpleKeywordTableIndex(nodes, storage_context=storage_context)

In [None]:
# NOTE: the docstore still has the same nodes
len(storage_context.docstore.docs)

#### Test out saving and loading

In [None]:
# NOTE: docstore and index_store is persisted in DynamoDB by default
# NOTE: here only need to persist simple vector store to dick
storage_context.persist()

In [None]:
# note down index IDs
list_id = list_index.index_id
vector_id = vector_index.index_id
keyword_id = keyword_table_index.index_id

In [None]:
from llama_index.indices.loading import load_index_from_storage

# re-create storage context
storage_context = StorageContext.from_defaults(
    docstore=DynamoDBDocumentStore.from_table_name(table_name=TABLE_NAME),
    index_store=DynamoDBIndexStore.from_table_name(table_name=TABLE_NAME),
)

list_index = load_index_from_storage(storage_context=storage_context, index_id=list_id)
vector_index = load_index_from_storage(storage_context=storage_context, index_id=vector_id)
keyword_table_index = load_index_from_storage(storage_context=storage_context, index_id=keyword_id)

#### Test out some Queries

In [None]:
llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"))
service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)

In [None]:
query_engine = list_index.as_query_engine()
list_response = query_engine.query("What is a summary of this document?")

In [None]:
display_response(list_response)

In [None]:
query_engine = vector_index.as_query_engine()
vector_response = query_engine.query("What did the author do growing up?")

In [None]:
display_response(vector_response)

In [None]:
query_engine = keyword_table_index.as_query_engine()
keyword_response = query_engine.query("What did the author do after his time at YC?")

In [None]:
display_response(keyword_response)