In [None]:
import logging
import os
import openai

from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.callbacks.base import CallbackManager
from llama_index import (
    LLMPredictor,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
)
from langchain.chat_models import ChatOpenAI
import chainlit as cl
from dotenv import load_dotenv
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor

os.environ.get("OPENAI_API_KEY")

In [None]:
openai.api_key = os.environ.get("OPENAI_API_KEY")

In [None]:
try:
    print('Loading index...')
    # rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    # load index
    index = load_index_from_storage(storage_context)
except:
    print('Index not found, building new one.')
    from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader

    documents = SimpleDirectoryReader(input_files=[
        "cookbook.md",
        "features.md"
    ]).load_data()
    index = GPTVectorStoreIndex.from_documents(documents)
    index.storage_context.persist()

In [None]:
llm_predictor = LLMPredictor(
    llm=ChatOpenAI(
        temperature=0.5,
        model_name="gpt-3.5-turbo",
        streaming=True,
    ),
)
service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
    chunk_size=512,
    # callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]),
)

retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)
# configure response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode="refine", service_context=service_context)


# index.as_retriever()
# assemble query engine
# query_engine =RetrieverQueryEngine.from_args(
#     streaming=True,
#     retriever=retriever,
#     response_synthesizer=response_synthesizer,
#     node_postprocessors=[
#         SimilarityPostprocessor(similarity_cutoff=0.7)
#     ]
# )

query_engine = index.as_query_engine(
    streaming=True,
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    service_context=service_context,
)

In [None]:
question = """
How do I create a new feature?
"""

In [None]:
response = query_engine.query(question)

response

In [None]:
# serialise response to json text and save to file
import json


with open('response.json', 'w') as f:
    # json.dumps(response, f, default=lambda o: '<not serializable>', indent=4, sort_keys=True)
    json.dump(response, f, default=lambda o: '<not serializable>', skipkeys=True, indent=4, sort_keys=True)

In [None]:
print(dir(response))

In [None]:
def list_sources(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".md") and not (file == "api.md" or file == "all_pages.md" or file == "unknown.nd" or file == "chainlit.md"):
                yield os.path.join(root, file)


list(list_sources("./data"))