## Chat engine in Condense Question mode with explicitely specified vector_retriever and response_synthesizer
### trying to integrate marvins ai_model

In [None]:
from marvin import ai_model
from llama_index.bridge.pydantic import BaseModel, Field
from llama_index.callbacks import CallbackManager, TokenCountingHandler
from llama_index.llms import OpenAI
from llama_index.llms import ChatMessage, MessageRole
from llama_index import (
    VectorStoreIndex, 
    SimpleDirectoryReader,
    ServiceContext,
    set_global_service_context,
    get_response_synthesizer,
)
from llama_index.node_parser import SimpleNodeParser
from llama_index.node_parser.extractors import (
    MetadataExtractor,
)
from llama_index.text_splitter import TokenTextSplitter
from llama_index.node_parser.extractors.marvin_metadata_extractor import (
    MarvinMetadataExtractor,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine

from openai import log as openai_log
import tiktoken

import logging
import sys

from dotenv import load_dotenv
import os
import certifi

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
openai_log = "debug"

load_dotenv()
# load_dotenv("../.env")
# workaround for mac to solve "SSL: CERTIFICATE_VERIFY_FAILED Error"


os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()
os.environ["SSL_CERT_FILE"] = certifi.where()

API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_MODEL = "gpt-3.5-turbo"

In [None]:
CATEGORY_LABELS = [
    "Technical",
    "Science-Fiction",
    "Poetry",
    "Fantasy",
    "Mystery",
    "Romance",
    "Historical",
    "Fiction",
    "Self-Help",
    "Biography",
    "Travelogue",
    "Horror",
    "Comedy",
    "Thriller",
    "Science",
    "Philosophy",
    "Memoir",
    "Cookbook",
    "Business",
    "Drama",
    "Satire",
]

@ai_model
class QueryDocument(BaseModel):
    #name: str = Field(..., description="The name of the document")
    description: str = Field(..., description="a brief summary of the document content.")
    text_category: str = Field(...,description=f"best matching text category from the following list: {str(CATEGORY_LABELS)}")
    
    # def __init__(self, name):
    #     super().__init__()
    #     self.name = name



In [None]:

documents = SimpleDirectoryReader(
    input_files=["./data/test2.txt"],
    encoding="utf-8"
).load_data()

llm = OpenAI(model=OPENAI_MODEL, temperature=0, max_tokens=512)

token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model(OPENAI_MODEL).encode
)
callback_manager = CallbackManager([token_counter])

#CHAT_MODE = "technical"


In [None]:


metadata_extractor = MetadataExtractor(
    extractors=[
        MarvinMetadataExtractor(
            marvin_model=QueryDocument, 
            llm_model_string=OPENAI_MODEL,
            show_progress = True,
            callback_manager=callback_manager,

        ),
    ],
)

logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")

text_splitter = TokenTextSplitter(
    separator=" ", 
    chunk_size=1024, 
    chunk_overlap=128,
    callback_manager=callback_manager
)


In [None]:


node_parser = SimpleNodeParser(
    text_splitter=text_splitter,
    metadata_extractor=metadata_extractor,
    callback_manager = callback_manager,
)

logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")

In [None]:

nodes = node_parser.get_nodes_from_documents(documents)

logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")

from pprint import pprint

for node in nodes:
    pprint(node.metadata)

In [None]:
text_category = nodes[0].metadata["marvin_metadata"].get("text_category")

In [None]:
system_prompt = f"""You are a chatbot that responds to all questions about the content of a given document, which is available in the form of embeddings in the given vector database. The user gives you instructions on which questions to answer. 
    When you write the answers, you need to make sure that the user's expectations are met. Remember that you are an accurate and experienced writer 
    and you write unique and short answers in the style of a {text_category} text. Don't add anything hallucinatory.
    Use friendly, easy-to-read language, and if it is a technical or scientific text, please stay correct and focused.
    Responses should be no longer than 10 sentences, unless the user explicitly specifies the number of sentences.
"""
service_context = ServiceContext.from_defaults(
    llm=llm, 
    chunk_size=1024, 
    chunk_overlap=152,
    #system_prompt=system_prompt,
    callback_manager=callback_manager,
)
set_global_service_context(service_context)


In [None]:
vector_index = VectorStoreIndex(nodes, service_context=service_context) # openai api is called with whole text to make the embeddings
logging.info(
    "Embedding Tokens: ",
    token_counter.total_embedding_token_count,
    "\n",
    "LLM Prompt Tokens: ",
    token_counter.prompt_llm_token_count,
    "\n",
    "LLM Completion Tokens: ",
    token_counter.completion_llm_token_count,
    "\n",
    "Total LLM Token Count: ",
    token_counter.total_llm_token_count,
    "\n",
)

In [None]:
vector_retriever = VectorIndexRetriever(
    index=vector_index,
    similarity_top=2,

)

vector_query_engine = RetrieverQueryEngine(
    retriever=vector_retriever,
    response_synthesizer=get_response_synthesizer(),
    callback_manager=callback_manager,
)
logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")

In [None]:
from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine

# list of `ChatMessage` objects
# custom_chat_history = [
#     ChatMessage(
#         role=MessageRole.USER, 
#         content='Hello assistant, we are having a insightful discussion about the given text content.'
#     ), 
#     ChatMessage(
#         role=MessageRole.ASSISTANT, 
#         content='Okay, sounds good.'
#     )
# ]
chat_engine = CondenseQuestionChatEngine.from_defaults(
    query_engine=vector_query_engine, 
    #condense_question_prompt=custom_prompt,
    #chat_history=custom_chat_history,
    
    verbose=True,
    callback_manager=callback_manager,
    
)

In [None]:
response = chat_engine.chat("What did Einstein do?")
logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")

In [None]:
response.response

In [None]:
response.source_nodes

In [None]:
response = chat_engine.chat("What was Einsteins' favorite food?")
logging.info(f"Number of used tokens: {token_counter.total_embedding_token_count}")

In [None]:
response.response

- https://github.com/jerryjliu/llama_index/blob/main/docs/examples/index_structs/doc_summary/DocSummary.ipynb
- https://betterprogramming.pub/llamaindex-0-6-0-a-new-query-interface-over-your-data-331996d47e89
- https://gpt-index.readthedocs.io/en/latest/examples/query_engine/CustomRetrievers.html
- https://gpt-index.readthedocs.io/en/latest/core_modules/query_modules/chat_engines/usage_pattern.html

- https://gpt-index.readthedocs.io/en/latest/examples/metadata_extraction/MarvinMetadataExtractorDemo.html