# Edgar Q&A Demo

In [None]:
import os
import sys
from pprint import pprint

current_dir = os.getcwd()
kit_dir = os.path.abspath(os.path.join(current_dir, ".."))
repo_dir = os.path.abspath(os.path.join(kit_dir, ".."))

sys.path.append(kit_dir)
sys.path.append(repo_dir)

from langchain.vectorstores import Chroma
from langchain.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA, LLMChain, ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory, ConversationSummaryMemory, ChatMessageHistory
from langchain.prompts import (
    PromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
)
from utils.model_wrappers.api_gateway import APIGateway

from dotenv import load_dotenv
load_dotenv(os.path.join(repo_dir,'.env'))

In [8]:
ticker = 'tsla'
PERSIST_DIRECTORY = os.path.join(kit_dir,f'data/vectordbs/{ticker}')
collection_name = f'{ticker}_collection'

### Load embedding model and SEC vector db

In [None]:
encode_kwargs = {"normalize_embeddings": True}
embedding_model = HuggingFaceInstructEmbeddings(
    model_name='intfloat/e5-large-v2',
    embed_instruction="",  # no instruction is needed for candidate passages
    query_instruction="Represent the query for retrieval: ",
    encode_kwargs=encode_kwargs,

)
vectordb = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embedding_model, collection_name=collection_name)

### Choose LLM

In [None]:
# Using SambaNovaCloud
api_type = "sncloud"
llm_expert = 'llama3-8b'

# Using SambaStudio
# api_type = "sambastudio"
# llm_expert = 'Meta-Llama-3-70B-Instruct-4096'

# Set gateway
llm = APIGateway.load_llm(
    type="sncloud",
    streaming=False,
    coe=True,
    max_tokens_to_generate=512,
    temperature=0.0,
    select_expert='llama3-8b',
)

llm.invoke("hi!")

### Retrieve information from vector db

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})

qa = RetrievalQA.from_chain_type(
    llm=llm,
    #chain_type="stuff",
    retriever=retriever,
    input_key="question",
    output_key="response",
    return_source_documents=True,
)

custom_prompt_template = """
You're an expert in filing reports\n\n 
Given the following context enclosed in backticks regarding a company annual/quarterly report filing:
```
{context}
```
Consider the question:  
{question}
Answer the question using only the information from the context. If the answer to the question can't be extracted from the preovious context, then say "I do not have information regarding this".
Helpful Answer:"""
CUSTOMPROMPT = PromptTemplate(
    template=custom_prompt_template, input_variables=["context", "question"]
)
## Inject custom prompt
qa.combine_documents_chain.llm_chain.prompt = CUSTOMPROMPT
query = "what are the products and services that the company has?"
response = qa({"question": query})
print(response['response'])

### Q&A chatbot

The following image shows the general idea of the architecture for this retriever chatbot. </br>
More resources about how chatbots are implemented with langchain [here](https://python.langchain.com/docs/use_cases/chatbots)

![retriever_chatbot_architecture](https://python.langchain.com/assets/images/chat_use_case-eb8a4883931d726e9f23628a0d22e315.png )

### Chatbot example

In [None]:
prompt = ChatPromptTemplate(
    messages=[
        SystemMessagePromptTemplate.from_template(
            "You are a helpful assistant. Answer questions with the best of your capabilities and based on the chat history."
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        HumanMessagePromptTemplate.from_template("<s>[INST] {question} [/INST]"),
    ]
)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversation = LLMChain(llm=llm, prompt=prompt, verbose=True, memory=memory)

conversation({"question": "hi, my name is Rodrigo"})

In [None]:
conversation({"question": "can you tell me about the benefits of using AI in society?"})

In [None]:
conversation({"question": "could you tell me more details about point 1?"})

In [None]:
conversation({"question": "Great, what was my name?"})

### Chatbot with Summary and SEC retriever

In [None]:
def print_response(response: dict) -> None:
    
    print('\nQUESTION:')
    print(response['question'])
    
    print('\nCONTEXT:')
    for document in response['source_documents']:
        pprint(document.page_content, width=180)
        print()
    
    print('\nCHAT HISTORY:')
    for message in response['chat_history']:
        message.pretty_print()
        
    print('\nANSWER:')
    print(response['answer'])

In [None]:
custom_condensed_question_template = """You're an assitant
Given the following chat history and follow up question, rephrase the follow up question using the chat history. Only output the rephrased question.

Chat history:
{chat_history}

Follow up question: {question}

Rephrased question:"""

custom_condensed_question_prompt = PromptTemplate.from_template(custom_condensed_question_template)

custom_qa_template = """
You're a helpful assistant in filing reports. Follow the following rules:
1. If you don't know the answer, respond kindly that you don't have information about it. 
2. Do not try to make up an answer.
3. Start the conversation expressing general assitance in anything the user would like to ask.
Given the following context and follow up question, provide a helpful answer.

Context:
{context}

Follow up question: {question}

Helpful answer:"""

custom_qa_prompt = PromptTemplate.from_template(custom_qa_template)

memory = ConversationSummaryMemory(
    llm=llm, 
    max_token_limit=50,
    buffer="The human and AI greet each other.",
    output_key='answer',
    memory_key='chat_history',
    return_messages=True,
)

retriever = vectordb.as_retriever(search_kwargs={"k": 3})

qa = ConversationalRetrievalChain.from_llm(
    llm, 
    retriever=retriever, 
    memory=memory, 
    chain_type="stuff",
    return_source_documents=True, 
    verbose=True,
    condense_question_prompt = custom_condensed_question_prompt,
    combine_docs_chain_kwargs={'prompt': custom_qa_prompt}
)

response = qa({"question": "what are the biggest risk that the company is facing?"})
print_response(response)

In [None]:
response = qa({"question": "Tell me about the business of the company, what products and services does it offer?"})
print_response(response)

In [None]:
response = qa({"question": "tell me more about the products that Tesla offers based on the report"})
print_response(response)