In [1]:
from langchain_community.llms import Ollama
llm = Ollama(model="llama2")

In [2]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [3]:
from langchain_community.embeddings import OllamaEmbeddings
ollama_embedding = OllamaEmbeddings(model='llama2')

In [7]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=ollama_embedding)

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [8]:
rag_chain.invoke("What is Task Decomposition?")

'Task decomposition is a process of breaking down a complex task into smaller, more manageable sub-tasks. It involves identifying the key components of the task and organizing them into a hierarchical structure, with each sub-task having a specific function or role in the overall process. By decomposing a task into smaller parts, it becomes easier to understand, plan, and execute the task more efficiently.\n\nIn the context of question-answering tasks, task decomposition can involve breaking down the task of finding relevant information into smaller sub-tasks, such as searching for relevant keywords, identifying key concepts, and organizing the found information into a logical structure. By decomposing the task in this way, it becomes easier to tackle each sub-task separately, using appropriate techniques and tools to complete each part of the task.\n\nIn the resources provided, task decomposition is mentioned in the context of vector similarity search. In this context, task decomposit

### trying the same with Amazon report

In [4]:
from langchain_community.document_loaders import PyPDFLoader

In [5]:
!pwd

/Users/pragalbh/Documents/FinGPT1.0


In [6]:
loader = PyPDFLoader("/Users/pragalbh/Documents/FinGPT1.0/data/Amazon-com-Inc-2023-Annual-Report.pdf")


In [7]:
docs = loader.load()

In [9]:
load_vectorstore = True

In [8]:
if load_vectorstore:
    vectorstore = Chroma(persist_directory="./chroma_db",embedding_function=ollama_embedding)

else:
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    vectorstore = Chroma.from_documents(documents=splits, embedding=ollama_embedding,persist_directory="./chroma_db")

In [10]:
retriever = vectorstore.as_retriever()

In [14]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [11]:
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [17]:
rag_chain.invoke("What is the revenue of Amazon in 2023?")

"Based on the information provided in the context, Amazon's revenue in 2023 was $575 billion."

In [13]:
rag_chain.invoke("What is the revenue of Amazon from AWS in 2023?")

"I don't have access to Amazon's internal financial reports, so I cannot provide the exact revenue of AWS in 2023. However, according to the information provided in the context, Amazon's total revenue grew by 12% year-over-year (YoY) in 2023, from $514B to $575B. Within that total revenue, AWS revenue increased by 13% YoY, from $80B to $91B."

Notes: With Ollama, llama2, the model is generating non confident outputs.

Trying with open AI LLM

In [20]:
import dotenv
dotenv.load_dotenv()

True

In [18]:
from langchain_openai import ChatOpenAI

llm_openAI = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [21]:
rag_chain_open_AI = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm_openAI
    | StrOutputParser()
)

In [23]:
rag_chain_open_AI.invoke("What is the revenue of Amazon in 2023?")

"I don't know."

In [26]:
from langchain import hub
prompt = hub.pull("hwchase17/qa-sentence-extraction")

In [38]:
rag_chain_open_AI = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm_openAI
    | StrOutputParser()
)

In [35]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [40]:
prompt

PromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'hwchase17', 'lc_hub_repo': 'qa-sentence-extraction', 'lc_hub_commit_hash': 'e2ce4dd140d0fa4de9239fe2b75d3e4c27b2f85323fcd576b70fe8df08e7324b'}, template='For the question below, extract the exact full sentence that contains the answer. If the question cannot be answered using the information provided answer with "N/A"\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:')

In [39]:
rag_chain_open_AI.invoke("What is the profit of Amazon in 2023?")

'Operating income in 2023 improved 201% Y oY from $12.2B (an operating margin of 2.4%) to $36.9B (an operating margin of 6.4%).'

In [30]:
rag_chain_open_AI.invoke("What is the revenue of Amazon from AWS in 2023?")

'AWS revenue increased 13% Y oY from $80B to $91B.'

In [36]:
rag_chain.invoke("What is the profit of Amazon in 2023?")

'The exact sentence that contains the answer to the question "What is the profit of Amazon in 2023?" is:\n\n"Amazon’s operating income and Free Cash Flow (“FCF”) dramatically improved. Operating income in 2023 improved 201% Y oY from $12.2B (an operating margin of 2.4%) to $36.9B (an operating margin of 6.4%). Trailing Twelve Month FCF adjusted for equipment finance leases improved from -$12.8B in 2022 to $35.5B (up $48.3B)."\n\nTherefore, the profit of Amazon in 2023 is $36.9 billion.'