# Edgar Q&A Demo

In [None]:
import sys

sys.path.append("..")
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
    TokenTextSplitter,
)
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
from llama_index import download_loader
from models.sambanova_endpoint import SambaNovaEndpoint

## TO BE FILLED TO ACCESS THE LLM ENDPOINT

In [None]:
# # PROVIDE: API Info
# base_url=f'https://sjc1-demo1.sambanova.net'
# project_id=
# endpoint_id=
# api_key=

### Fetch latest Annual report 
https://www.sec.gov/edgar/searchedgar/companysearch

In [None]:
from llama_hub.sec_filings.base import SECFilingsLoader

ticker = "TSLA"
last_n = 1
ticker = ticker.lower()
loader = SECFilingsLoader(tickers=[ticker], amount=last_n, filing_type="10-K")
loader.load_data()

### Load SEC filings data data 

In [None]:
sec_dir = f"data/{ticker}"
dir_loader = DirectoryLoader(sec_dir, glob="**/*.json", loader_cls=TextLoader)
documents = dir_loader.load()

### Choose LLM and embedding model

In [None]:
llm = SambaNovaEndpoint(
    base_url=base_url,
    project_id=project_id,
    endpoint_id=endpoint_id,
    api_key=api_key,
    model_kwargs={"do_sample": True, "temperature": 0.1},
)
embedding = HuggingFaceInstructEmbeddings(
    query_instruction="Represent the query for retrieval: "
)

### Tokenize/Embed and store documents in DB

In [None]:
## 2. Split the texts
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
texts = text_splitter.split_documents(documents)

## 3. Create Embeddings and add to chroma store
vectordb = Chroma.from_documents(documents=texts, embedding=embedding)

### Q&A on annual report

In [None]:
from langchain.chains import RetrievalQA

retriever = vectordb.as_retriever(search_kwargs={"k": 2})
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    input_key="question",
    output_key="response",
    return_source_documents=True,
)
custom_prompt_template = """Use the following pieces of context about company anual/quarterly report filing to answer the question at the end. 
If the answer is not in context for answering, say that you don't know, don't try to make up an answer or provide an answer not extracted from provided context. 
Cross check if the answer is contained in provided context. If not than say "I do not have information regarding this."

{context}

Question: {question}
Helpful Answer:"""
CUSTOMPROMPT = PromptTemplate(
    template=custom_prompt_template, input_variables=["context", "question"]
)
## Inject custom prompt
qa.combine_documents_chain.llm_chain.prompt = CUSTOMPROMPT
query = "What are the risk factors of Tesla for the year 2022?"
res = qa(query)
print(res["response"])