### LlamaIndex: Customizing Chatbots  
Sources: [1](https://lmy.medium.com/comparing-langchain-and-llamaindex-with-4-tasks-2970140edf33), [2](https://docs.llamaindex.ai/en/stable/), [3](https://github.com/run-llama/llama_index), [4](https://nanonets.com/blog/llamaindex/), [5](https://sharmadave.medium.com/llama-index-unleashes-the-power-of-chatgpt-over-your-own-data-b67cc2e4e277), [6](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/), [7](https://docs.llamaindex.ai/en/stable/understanding/putting_it_all_together/chatbots/building_a_chatbot.html)  


#### Installing Packages

In [0]:
#!pip install -q openai==0.27.0
#!pip install -qU llama-index[local_models]  # Installs tools useful for private LLMs, local inference, and HuggingFace models
#!pip install -q llama-index[postgres]       # Is useful if you are working with Postgres, PGVector or Supabase
#!pip install -q llama-index[query_tools]    # Gives you tools for hybrid search, structured outputs, and node post-processing
!pip install -q llama-index==0.9.47                 # Just the core components  ## Follow: https://github.com/run-llama/llama_index/issues/10636
##!pip install -q llama-hub 
#!pip install -qU chromadb
!pip install -qU pypdf
!pip install -qU docx2txt
!pip install -qU sentence-transformers
!pip install -q unstructured
!pip install -q aa-llm-utils

dbutils.library.restartPython()

#### Importing Packages

In [0]:
import os
import sys
import shutil
import glob
import re
import logging
from pathlib import Path
import nest_asyncio
#nest_asyncio.apply()

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
#import tiktoken
#from funcy import lcat, lmap, linvoke
#from IPython.display import Markdown, display
import openai
#import chromadb

## LlamaIndex LLMs
#from openai import OpenAI
#from openai import AzureOpenAI
from llama_index.llms import AzureOpenAI
from llama_index.llms import ChatMessage

from llama_index.llms import MessageRole
#from llama_index.llms import Ollama
#from llama_index.llms import PaLM

## LlamaIndex Embeddings
from llama_index.embeddings import OpenAIEmbedding
from llama_index.embeddings import AzureOpenAIEmbedding
from llama_index.embeddings import resolve_embed_model

## Llamaindex readers 
from llama_index import SimpleDirectoryReader
from llama_index import Document
#from llama_hub.file.unstructured.base import UnstructuredReader

## LlamaIndex Index Types
#from llama_index import GPTListIndex             
from llama_index import VectorStoreIndex
#from llama_index import GPTVectorStoreIndex  
#from llama_index import GPTTreeIndex
#from llama_index import GPTKeywordTableIndex
#from llama_index import GPTSimpleKeywordTableIndex
#from llama_index import GPTDocumentSummaryIndex
#from llama_index import GPTKnowledgeGraphIndex
#from llama_index.indices.struct_store import GPTPandasIndex
#from llama_index.vector_stores import ChromaVectorStore

## LlamaIndex Context Managers
from llama_index import ServiceContext
from llama_index import StorageContext
from llama_index import load_index_from_storage
from llama_index import set_global_service_context
from llama_index.response_synthesizers import get_response_synthesizer
from llama_index.response_synthesizers import ResponseMode
from llama_index.schema import Node
#from llama_index import LLMPredictor

## LlamaIndex Templates
from llama_index.prompts import PromptTemplate
from llama_index.prompts import ChatPromptTemplate

## LlamaIndex Tools
from llama_index.tools import QueryEngineTool
from llama_index.tools import ToolMetadata
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.chat_engine import SimpleChatEngine

## LlamaIndex Agents
from llama_index.agent import OpenAIAgent

## LlamaIndex Callbacks
from llama_index.callbacks import CallbackManager
from llama_index.callbacks import LlamaDebugHandler

from aa_llm_utils.utils import ensure_certificates
ensure_certificates()

#### Defining Model and Endpoints

In [0]:
## Defining LLM Model
## A full guide to using and configuring LLMs available here: https://docs.llamaindex.ai/en/stable/module_guides/models/llms.html
## Check also: https://docs.llamaindex.ai/en/stable/module_guides/models/llms/local.html
llm_option = "OpenAI"
if llm_option == "OpenAI":
    openai.api_type = "azure"
    azure_endpoint = "https://rg-rbi-aa-aitest-dsacademy.openai.azure.com/"
    #azure_endpoint = "https://chatgpt-summarization.openai.azure.com/"
    openai.api_version = "2023-07-01-preview"
    openai.api_key = os.environ["OPENAI_API_KEY"]
    deployment_name = "model-gpt-35-turbo"
    openai_model_name = "gpt-35-turbo"
    llm = AzureOpenAI(api_key=openai.api_key,
                      azure_endpoint=azure_endpoint,
                      model=openai_model_name,
                      engine=deployment_name,
                      api_version=openai.api_version,
                      )
elif llm_option == "Local":  
    print("Make sure you have installed Local Models - !pip install llama-index[local_models]")
    llm = Ollama(model="mistral", request_timeout=30.0)
else:
    raise ValueError("Invalid LLM Model")

## Defining Embedding Model
## A full guide to using and configuring embedding models is available here. https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings.html
emb_option = "OpenAI"
if emb_option == "OpenAI":
    embed_model_name = "text-embedding-ada-002"
    embed_model_deployment_name = "model-text-embedding-ada-002"
    embed_model = AzureOpenAIEmbedding(model=embed_model_name,
                                       deployment_name=embed_model_deployment_name,
                                       api_key=openai.api_key,
                                       azure_endpoint=azure_endpoint)
elif emb_option == "Local":
    embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")   ## bge-m3 embedding model
else:
    raise ValueError("Invalid Embedding Model")

## Logging Optionals
#logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
#logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

PERSIST_DIR = "/Workspace/ds-academy-research/LLamaIndex/VectorStoreIndex/"

In [0]:
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

service_context = ServiceContext.from_defaults(llm=llm,
                                               #prompt_helper= <>,
                                               embed_model=embed_model,
                                               #node_parser= <>,
                                               #chunk_size=1000,                                        #Parse Documents into smaller chunks
                                               callback_manager=callback_manager,                       #Visualize execution
                                               #system_prompt=(Optional[str]),                          #System-wide prompt to be prepended to all input prompts, used to guide system “decision making”
                                               #query_wrapper_prompt=(Optional[BasePromptTemplate]),    #A format to wrap passed-in input queries.
                                               )

set_global_service_context(service_context)

#### [Storage Context](https://docs.llamaindex.ai/en/stable/api_reference/storage.html)  
LlamaIndex offers core abstractions around storage of Nodes, indices, and vectors. A key abstraction is the StorageContext - this contains the underlying BaseDocumentStore (for nodes), BaseIndexStore (for indices), and VectorStore (for vectors).
StorageContext defines the storage backend for where the documents, embeddings, and indexes are stored.   
```
storage_context = StorageContext.from_defaults(persist_dir="<path/to/index>")
```
You can learn more about [storage](https://docs.llamaindex.ai/en/stable/module_guides/storing/storing.html) and how to [customize](https://docs.llamaindex.ai/en/stable/module_guides/storing/customization.html) it.  

### Reading [Vector Store Index](https://docs.llamaindex.ai/en/stable/api_reference/query/retrievers/vector_store.html)  

In [0]:
vectorstoreindex = load_index_from_storage(storage_context=StorageContext.from_defaults(persist_dir=PERSIST_DIR))

#### Querying Index

In [0]:
query_engine = vectorstoreindex.as_query_engine(retriever_mode="embedding",
                                                response_mode="compact",
                                                verbose=True)
response = query_engine.query("Will GenAI create new jobs?")
print(response)

#### Creating an Simple Interactive Chatbot for our Index

In [0]:
chat_engine = vectorstoreindex.as_chat_engine(chat_mode="condense_question", verbose=True)
chat_engine.reset()
chat_engine.chat_repl()

#### Creating an Customized Prompt Chatbot  (Error)

In [0]:
template = (
    "Following Informations : \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Please answer the question even if the context does not provide a clear answer" 
    "Always start your answer with Renato: {query_str}\n"
    )
qa_template = PromptTemplate(template)
chat_engine = vectorstoreindex.as_chat_engine(chat_mode="condense_question", 
                                              verbose=True, 
                                              text_qa_template=qa_template)
chat_engine.chat_repl()

#### Creating an [interactive Chatbot](https://docs.llamaindex.ai/en/stable/understanding/putting_it_all_together/chatbots/building_a_chatbot.html) with Agents

(only works with certain OpenAi models/versions that implement agents)  
https://github.com/openai/openai-python/issues/517  
https://github.com/run-llama/llama_index/issues/9618 

In [0]:
DOCS_DIR = "../../Data/pdf/"
doclist = os.listdir(DOCS_DIR)
doclist = [d for d in doclist if d.startswith("NASDAQ")]
doclist.sort()
for d in doclist:
    print(d)

In [0]:
AGENT_DIR = "/Workspace/ds-academy-research/LLamaIndex/AgentsIndex/"

In [0]:
service_context = ServiceContext.from_defaults(llm=llm,
                                               embed_model=embed_model,
                                               callback_manager=callback_manager,
                                               chunk_size=512,
                                               )

index_set = {}
years = []
reader = SimpleDirectoryReader(input_files= [DOCS_DIR + d for d in doclist], recursive=True)
for docs in reader.iter_data():
    storage_context = StorageContext.from_defaults()
    for doc in docs:
        year = re.findall('\d+', doc.metadata["file_name"])[0]
        doc.metadata["year"] = year
    years.append(year)
    index = VectorStoreIndex.from_documents(docs,
                                            service_context=service_context,
                                            storage_context=storage_context,
                                            )
    storage_context.persist(persist_dir=AGENT_DIR+f"{year}")
    index_set[year] = index

In [0]:
index_set = {}
years =  ["2019", "2020","2021"]
for year in years:
    storage_context = StorageContext.from_defaults(persist_dir=AGENT_DIR+f"{year}")
    index = load_index_from_storage(storage_context, service_context=service_context)
    index_set[year] = index

In [0]:
import nest_asyncio
nest_asyncio.apply()

In [0]:
## Logging Optionals
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [0]:
individual_query_engine_tools = [QueryEngineTool(query_engine=index_set[year].as_query_engine(),
                                                 metadata=ToolMetadata(name=f"vector_index_{year}", description=f"useful for when you want to answer queries about the {year} for AWS",),
                                                 ) for year in years]    

query_engine = SubQuestionQueryEngine.from_defaults(query_engine_tools=individual_query_engine_tools,
                                                    service_context=ServiceContext.from_defaults(llm=llm,
                                                                                                 embed_model=embed_model,
                                                                                                 callback_manager=callback_manager,),
                                                    use_async=True,
                                                    )

query_engine_tool = QueryEngineTool(query_engine=query_engine,
                                    metadata=ToolMetadata(name="sub_question_query_engine",
                                                          description="useful for when you want to answer queries that require analyzing multiple documents documents for AWS",
                                                          ),
                                    )

tools = individual_query_engine_tools + [query_engine_tool]
agent = OpenAIAgent.from_tools(tools=tools, 
                               llm=llm,
                               service_context=ServiceContext.from_defaults(llm=llm,
                                                                            embed_model=embed_model,
                                                                            callback_manager=callback_manager,
                                                ), 
                               verbose=True)

In [0]:
#import inspect
#lines = inspect.getsource(OpenAIAgent)
#print(lines)

In [0]:
individual_query_engine_tools[0]('What is AWS?')

In [0]:
response = agent.chat("Hi, my name is Renato", )
print(str(response))

In [0]:
response = agent.chat("What were some of the biggest risk factors in 2020 for AWS?")
print(str(response))

In [0]:
cross_query_str = "Compare/contrast the risk factors described in the AWS Executive Reports across years. Give answer in bullet points."
response = agent.chat(cross_query_str)
print(str(response))

In [0]:
agent.chat_repl()