In [None]:
from llama_index import (
    VectorStoreIndex, 
    SimpleDirectoryReader,
    StorageContext,
    ServiceContext,
    load_index_from_storage,
    set_global_service_context,
)

from llama_index.callbacks import CallbackManager, TokenCountingHandler
from llama_index.llms import OpenAI
from openai import log as openai_log
import tiktoken

from IPython.display import Markdown, display
import logging
import sys

from dotenv import load_dotenv
import os

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
openai_log = "debug"

load_dotenv()
# load_dotenv("../.env")
API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_MODEL = "gpt-3.5-turbo"
CHAT_MODE = "technical"

documents = SimpleDirectoryReader(
    input_files=["./data/test2.txt"],
    encoding="utf-8"
).load_data()

llm = OpenAI(model=OPENAI_MODEL, temperature=0, max_tokens=256)

token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model(OPENAI_MODEL).encode
)
callback_manager = CallbackManager([token_counter])

system_prompt = f"""You are a chatbot answering to to all questions concerning the content of a given
    text file. The user will give you instructions on what questions to answer. 
    When you write the answers, you will need to ensure that the
    user's expectations are met. Remember, you are an accurate and experianced author 
    and you write unique and short answers. Keep your answers {CHAT_MODE} and based on facts – do not hallucinate features..
    You should use friendly, easy to read language, but stay correct and focussed.
    The answers should not have more than 10 sentences.
"""
service_context = ServiceContext.from_defaults(
    llm=llm, 
    chunk_size=800, 
    chunk_overlap=20,
    system_prompt=system_prompt,
    callback_manager=callback_manager,
)

In [None]:
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
# index.storage_context.persist(vector_store_fname="vectorstore")
logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")
index.storage_context.persist()

In [None]:
# query_engine = index.as_query_engine()
# response = query_engine.query("Please summarize the text")
# logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")
# display(Markdown(f"<b>{response}</b>"))

In [None]:
storage_context = StorageContext.from_defaults(persist_dir="./storage")

In [None]:
index = load_index_from_storage(storage_context=storage_context)
logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")

In [None]:
#index.as_query_engine(response_mode = "tree_summarize")
index.g

In [None]:
from llama_index.memory import ChatMemoryBuffer
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    service_context = service_context
)
response = chat_engine.chat("Please summarize the text")
logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")
display(Markdown(f"<b>{response}</b>"))

In [None]:

token_counter.reset_counts()
resp = chat_engine.chat("Please make a list of the key facts given in the text")

logging.info(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)



In [None]:
resp.response

### streaming the response:

In [None]:
# chat_engine = index.as_chat_engine()
# streaming_response = chat_engine.stream_chat("Tell me a joke.")
# for token in streaming_response.response_gen:
#     print(token, end="")

### code snippets

In [None]:
data = SimpleWebPageReader(html_to_text=True).load_data(["https://en.wikipedia.org/wiki/South_Africa"])
chat_engine = index.as_chat_engine(verbose=True)

In [None]:
chat_engine = index.as_chat_engine(chat_mode='react', verbose=True)
response = chat_engine.chat('Use the tool to answer: What happened in the year 1652?')

In [None]:
@ai_fn
def classify_text(text: str) -> Literal['sports', 'politics', 'technology']:
    '''
        Correctly classifies the passed `text` into one of the predefined categories. 
    '''

Some useful Links:
- https://gpt-index.readthedocs.io/en/latest/examples/metadata_extraction/MarvinMetadataExtractorDemo.html
- https://gpt-index.readthedocs.io/en/stable/examples/chat_engine/chat_engine_context.html?ref=blog.streamlit.io
- https://gpt-index.readthedocs.io/en/latest/core_modules/query_modules/chat_engines/root.html
- https://gpt-index.readthedocs.io/en/latest/examples/callbacks/TokenCountingHandler.html
- https://colab.research.google.com/drive/1F-4r976AhCYmH9lK89S4t_dEAqcqho0S#scrollTo=bwdh1lj6g_pz

- sql: https://gpt-index.readthedocs.io/en/latest/examples/index_structs/struct_indices/SQLIndexDemo.html

-retrievers: https://github.com/SamurAIGPT/LlamaIndex-course/blob/main/fundamentals/Fundamentals.ipynb

-chat engine from query engine: https://gpt-index.readthedocs.io/en/latest/core_modules/query_modules/chat_engines/usage_pattern.html