In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
from llama_index import SimpleDirectoryReader, ServiceContext, LLMPredictor, StorageContext
from llama_index import VectorStoreIndex, ListIndex, SimpleKeywordTableIndex
from llama_index.composability import ComposableGraph
from langchain.chat_models import ChatOpenAI
from llama_index.response.notebook_utils import display_response

#### Load Documents

In [None]:
reader = SimpleDirectoryReader('../paul_graham_essay/data')
documents = reader.load_data()

#### Parse into Nodes

In [None]:
from llama_index.node_parser import SimpleNodeParser
nodes = SimpleNodeParser().get_nodes_from_documents(documents)

#### Add to Docstore

In [None]:
REDIS_HOST = os.getenv('REDIS_HOST', '127.0.0.1')
REDIS_PORT = os.getenv('REDIS_PORT', 6379)

In [None]:
from llama_index.storage.docstore import RedisDocumentStore
from llama_index.storage.index_store.redis_index_store import RedisIndexStore

In [None]:

storage_context = StorageContext.from_defaults(
    docstore=RedisDocumentStore.from_host_and_port(host=REDIS_HOST, port=REDIS_PORT),
    index_store=RedisIndexStore.from_host_and_port(host=REDIS_HOST, port=REDIS_PORT),
)

In [None]:
storage_context.docstore.add_documents(nodes)

#### Define Multiple Indexes

Each index uses the same underlying Node.

In [None]:
list_index = ListIndex(nodes, storage_context=storage_context)

In [None]:
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

In [None]:
keyword_table_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)

In [13]:
# NOTE: the docstore still has the same nodes
len(storage_context.docstore.docs)

AttributeError: 'bytes' object has no attribute 'deocode'

#### Test out saving and loading

In [14]:
# NOTE: docstore and index_store is persisted in MongoDB by default
# NOTE: here only need to persist simple vector store to disk
storage_context.persist()

In [15]:
# note down index IDs
list_id = list_index.index_id
vector_id = vector_index.index_id
keyword_id = keyword_table_index.index_id

NameError: name 'vector_index' is not defined

In [16]:
from llama_index.indices.loading import load_index_from_storage

# re-create storage context
storage_context = StorageContext.from_defaults(
    docstore=RedisDocumentStore.from_host_and_port(host=REDIS_HOST, port=REDIS_PORT),
    index_store=RedisIndexStore.from_host_and_port(host=REDIS_HOST, port=REDIS_PORT),
)

# load indices
list_index = load_index_from_storage(storage_context=storage_context, index_id=list_id)
vector_index = load_index_from_storage(storage_context=storage_context, vector_id=vector_id)
keyword_table_index = load_index_from_storage(storage_context=storage_context, keyword_id=keyword_id)

INFO:llama_index.indices.loading:Loading indices with ids: ['bf9196e3-4abf-48b8-b6d3-19ff8236a73a']
Loading indices with ids: ['bf9196e3-4abf-48b8-b6d3-19ff8236a73a']


NameError: name 'vector_id' is not defined

#### Test out some Queries

In [17]:
llm_predictor_chatgpt = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo"))
service_context_chatgpt = ServiceContext.from_defaults(llm_predictor=llm_predictor_chatgpt, chunk_size=1024)

In [18]:
query_engine = list_index.as_query_engine()
list_response = query_engine.query("What is a summary of this document?")

KeyboardInterrupt: 

In [None]:
display_response(list_response)

In [None]:
query_engine = vector_index.as_query_engine()
vector_response = query_engine.query("What did the author do growing up?")

In [None]:
display_response(vector_response)

In [None]:
query_engine = keyword_table_index.as_query_engine()
keyword_response = query_engine.query("What did the author do after his time at YC?")

In [None]:
display_response(keyword_response)