# Simple Index Demo

#### Load documents, build the GPTSimpleVectorIndex

In [14]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader, ServiceContext
from gpt_index.docstore import SimpleDocumentStore
from gpt_index.node_parser import SimpleNodeParser
from IPython.display import Markdown, display
from datetime import datetime, timedelta

### Parse Documents into Nodes, add to Docstore

In this example, there are 3 different versions of PG's essay. They are largely identical **except** 
for one specific section, which details the amount of funding they raised for Viaweb. 

V1: 50k, V2: 30k, V3: 10K

V1: 2020-01-01, V2: 2020-02-03, V3: 2022-04-12

The idea is to encourage index to fetch the most recent info (which is V3)

In [65]:
# load documents
now = datetime.now()
key = "__last_accessed__"


doc1 = SimpleDirectoryReader(
    input_files=['../node_postprocessor/test_versioned_data/paul_graham_essay_v1.txt']
).load_data()[0]


doc2 = SimpleDirectoryReader(
    input_files=['../node_postprocessor/test_versioned_data/paul_graham_essay_v2.txt']
).load_data()[0]

doc3 = SimpleDirectoryReader(
    input_files=['../node_postprocessor/test_versioned_data/paul_graham_essay_v3.txt']
).load_data()[0]


# define service context (wrapper container around current classes)
service_context = ServiceContext.from_defaults(chunk_size_limit=512)
node_parser = service_context.node_parser

# use node parser in service context to parse docs into nodes
nodes1 = node_parser.get_nodes_from_documents([doc1])
nodes2 = node_parser.get_nodes_from_documents([doc2])    
nodes3 = node_parser.get_nodes_from_documents([doc3])
    

# fetch the modified chunk from each document, set node info
nodes1[22].node_info[key] = (now - timedelta(hours=3)).timestamp()
nodes2[22].node_info[key] = (now - timedelta(hours=2)).timestamp()
nodes3[22].node_info[key] = (now - timedelta(hours=1)).timestamp()


# add to docstore
docstore = SimpleDocumentStore()
nodes = [nodes1[22], nodes2[22], nodes3[22]]
docstore.add_documents(nodes)

In [None]:
index = GPTSimpleVectorIndex(nodes, docstore=docstore)

In [67]:
# save index to disk
index.save_to_disk('index_simple.json')

In [68]:
# load index from disk
index = GPTSimpleVectorIndex.load_from_disk('index_simple.json')

#### Query Index

In [69]:
# set time decay
from gpt_index.vector_stores.types import VectorStoreQueryConfig

# disable time access refresh for now (enabled by default)
query_config = VectorStoreQueryConfig(
    use_time_decay=True, time_decay_rate=0.5, time_access_refresh=False
)

In [70]:
response = index.query(
    "How much did the author raise in seed funding from Idelle's husband (Julian) for Viaweb?", 
    similarity_top_k=3,
    vector_store_query_config=query_config
)

INFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 1690 tokens
> [query] Total LLM token usage: 1690 tokens
> [query] Total LLM token usage: 1690 tokens
INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 22 tokens
> [query] Total embedding token usage: 22 tokens
> [query] Total embedding token usage: 22 tokens


In [71]:
[n.node_info[key] for n in response.source_nodes]

[1681975247.558827, 1681971647.558827, 1681968047.558827]

In [73]:
print(response.source_nodes[0].node.get_text())

Node(text='Engineering that seemed to be at least as big as the group that actually wrote the software. Now you could just update the software right on the server.\n\nWe started a new company we called Viaweb, after the fact that our software worked via the web, and we got $50,000 in seed funding from Idelle\'s husband Julian. In return for that and doing the initial legal work and giving us business advice, we gave him 10% of the company. Ten years later this deal became the model for Y Combinator\'s. We knew founders needed something like this, because we\'d needed it ourselves.\n\nAt this stage I had a negative net worth, because the thousand dollars or so I had in the bank was more than counterbalanced by what I owed the government in taxes. (Had I diligently set aside the proper proportion of the money I\'d made consulting for Interleaf? No, I had not.) So although Robert had his graduate student stipend, I needed that seed funding to live on.\n\nWe originally hoped to launch in S