### Retriever and chain with LangChain

In [1]:
## load from pdf
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("paper.pdf")
pdf_documents = loader.load()
# pdf_documents

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(pdf_documents)
# documents

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

embedding = HuggingFaceEmbeddings(model_name = "thenlper/gte-small")
db = FAISS.from_documents(documents, embedding)

  from tqdm.autonotebook import tqdm, trange


In [9]:
query = "Trading in the financial markets is complex and "
result = db.similarity_search(query)
result[0].page_content

'can identify the pattern and produce trading orders accordingly.\nOther researchers have tried to characterize ﬁnancial asset price movement by Linear\nRegression, Box formations, and dynamic channels such as Bollinger Bands (BB) and\nKeltner channels (Keltner channels are volatility-based bands invented by Chester Keltner\nthat can aid in determining the direction of a trend). According to this trading strategy, a\nbreakout of the form implies the strength of the asset’s price momentum and can be used\nto achieve abnormal returns. Ref. [ 58] used ML to design a trading system that is based on\nthree methodologies: Bollinger Bands (BB), Linear Regression, and Darvas boxes (Darvas'

In [10]:
## load model
from langchain_community.llms import ollama

llm = ollama.Ollama(model="llama2")
llm

Ollama()

In [13]:
## chat promt template
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
            Answer the following questions based on only the provided context.
            <context>
            {context}
            </context>
            Question: {input}
            """)

In [14]:
## create the chain
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)

In [15]:
## retriever
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001A274F242E0>)

In [16]:
## retrival chain
from langchain.chains import create_retrieval_chain
retrrival_chain = create_retrieval_chain(retriever, document_chain)

In [20]:
response = retrrival_chain.invoke({"input": "Which machine learning models are used in this paper?"})
response["answer"]

'Based on the provided context, the following machine learning models are mentioned:\n\n1. Support Vector Machines (SVM)\n2. Long Short-Term Memory (LSTM) networks\n3. Random Forests\n4. Kernel Functions (including Radial Basis Gaussian (RBG) SVM)\n\nTherefore, the answer to the question is: LSTM, SVM, Random Forests, and Kernel Functions (including RBG SVM).'