In [21]:
from llama_index import SimpleDirectoryReader, GPTListIndex, readers, GPTSimpleVectorIndex, LLMPredictor, PromptHelper, ServiceContext
from langchain import OpenAI
import sys
import os
from IPython.display import Markdown, display

def construct_index(directory_path):
    # set maximum input size
    max_input_size = 4096
    # set number of output tokens
    num_outputs = 2000
    # set maximum chunk overlap
    max_chunk_overlap = 50
    # set chunk size limit
    chunk_size_limit = 600

    # define prompt helper
    prompt_helper = PromptHelper(max_input_size, 
                                 num_outputs, 
                                 max_chunk_overlap, 
                                 chunk_size_limit=chunk_size_limit)

    # define LLM
    llm_predictor = LLMPredictor(llm=OpenAI(
        temperature=1, 
        model_name="text-davinci-003", 
        max_tokens=num_outputs,
        )) # type: ignore

    documents = SimpleDirectoryReader(directory_path).load_data()

    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
    index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)

    index.save_to_disk('index.json')

    return index

def ask_ai():
    index = GPTSimpleVectorIndex.load_from_disk('index.json')
    while True:
        query = input("Kamu nanya ? ")
        response = index.query(query)
        display(Markdown(f"<b>{index}</b>"))
        display(Markdown(f"Pertanyaan: <b>{query}</b>"))
        display(Markdown(f"Jawaban: <b>{response.response}</b>"))

In [22]:
# os.environ["OPENAI_API_KEY"] = input("Paste your OpenAI key here and hit enter:")
os.environ["OPENAI_API_KEY"] = "YOUR API KEY"

In [23]:
construct_index("context_data/data")

INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 2678 tokens


<llama_index.indices.vector_store.vector_indices.GPTSimpleVectorIndex at 0x118ec7af0>

In [24]:
# ask_ai()