In [2]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, Document
from llama_index.llms import OpenAI
from llama_index import StorageContext, load_index_from_storage
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

index = None
documents = None

def construct_index():
    global index 

    try:
        # load index if already persists 
        storage_context = StorageContext.from_defaults(persist_dir="./storage")
        index = load_index_from_storage(storage_context)

    except:
        # otherwise, load documents and create index
        llm= OpenAI(temperature=0, model="gpt-3.5-turbo")
        service_context = ServiceContext.from_defaults(llm=llm)
        documents = SimpleDirectoryReader("test", recursive= True, filename_as_id=True).load_data()
        # print doc_id
        for doc in documents:
            print(doc.get_doc_id)
        index = VectorStoreIndex.from_documents(documents, service_context=service_context)
        index.storage_context.persist()

def on_modified():
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    index = load_index_from_storage(storage_context)

    documents = SimpleDirectoryReader("test", recursive= True, filename_as_id=True).load_data()

    refreshed_docs = index.refresh_ref_docs(
            documents,
            update_kwargs={}
        )

    index.storage_context.persist()
    print(refreshed_docs)


# query the index
def query_index():
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    index = load_index_from_storage(storage_context)
    response = index.as_query_engine().query("What's the name of the character?")
    print(response)


construct_index()

<bound method Document.get_doc_id of Document(id_='test/file1/file1.txt', embedding=None, metadata={'file_path': 'test/file1/file1.txt', 'file_name': 'file1.txt', 'file_type': 'text/plain', 'file_size': 95, 'creation_date': '2024-07-07', 'last_modified_date': '2023-10-31', 'last_accessed_date': '2024-07-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, hash='2a3587a224a85588136da8da325e6b0064232214afd1717712593fea2a6c6b23', text='This is file 1.This is a test file.\nThe name of the character is Brian working in a book shop.', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')>
<bound method Document.get_doc_id of Document(id_='test/file2/file2.txt', embedding=Non

In [3]:
on_modified()
query_index()

[False, False, False, False, False, False]
Brian
