In [1]:
## Retriever and Chain with Langchain

In [28]:
## PDF reader
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")
docs = loader.load()


In [30]:
## Split into chunks

from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20
)
chunk_documents = text_splitter.split_documents(docs)
# chunk_documents

In [31]:
## Vector Embedding and Vector Store
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(chunk_documents, OpenAIEmbeddings())
db

<langchain_community.vectorstores.faiss.FAISS at 0x7fdd19af9870>

In [32]:
# Now we will perform vector search on the db having the embeddings
# This fetches content on the basis of similarity search
query = 'Embeddings and Softmax'
retrieved_result = db.similarity_search(query)
print(retrieved_result[0].page_content)

consists of two linear transformations with a ReLU activation in between.
FFN(x) = max(0,xW 1+b1)W2+b2 (2)
While the linear transformations are the same across different positions, they use different parameters
from layer to layer. Another way of describing this is as two convolutions with kernel size 1.
The dimensionality of input and output is dmodel = 512 , and the inner-layer has dimensionality
dff= 2048 .
3.4 Embeddings and Softmax
Similarly to other sequence transduction models, we use learned embeddings to convert the input
tokens and output tokens to vectors of dimension dmodel. We also use the usual learned linear transfor-
mation and softmax function to convert the decoder output to predicted next-token probabilities. In
our model, we share the same weight matrix between the two embedding layers and the pre-softmax
linear transformation, similar to [ 24]. In the embedding layers, we multiply those weights by√dmodel.
3.5 Positional Encoding


In [33]:
# Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the
context below. If the question can't be answered using
the information provided, say "I don't know". I will
tio you $1000 if the user finds the answer helpful.
<context>
{context}
</context>
                                          
Question: {input}
""")

In [34]:
from langchain_openai import ChatOpenAI
llm_model = ChatOpenAI()
llm_model

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7fdd19cc6500>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7fdd19af9c00>, root_client=<openai.OpenAI object at 0x7fdd19af8eb0>, root_async_client=<openai.AsyncOpenAI object at 0x7fdd19cc64d0>, openai_api_key=SecretStr('**********'), openai_proxy='')

In [35]:
## Chain
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm_model, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nAnswer the following question based only on the\ncontext below. If the question can\'t be answered using\nthe information provided, say "I don\'t know". I will\ntio you $1000 if the user finds the answer helpful.\n<context>\n{context}\n</context>\n                                          \nQuestion: {input}\n'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7fdd19cc6500>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7fdd19af9c00>, root_client=<openai.OpenAI object at 0x7fdd19af8eb0>, root_async_client=<openai.AsyncOpenAI object at 0x7fdd19cc64d0>, openai_api_key=SecretStr('**********'), openai_proxy='')
| St

In [36]:
"""
Retrievers: It is an interface that returns documents
given an unstructured query.It is not used to store documents
but only to return/retrieve them.
"""
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7fdd19af9870>)

In [39]:
# How to combine document_chain and retriever to generate response?
# Use retrieval chain
"""
Retrieval Chain: This chain takes in a user inquiry, which is then
passed to the retriever to fetch documents. Those documents are then passed
to an LLM to generate a response.
""" 
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [40]:
response = retrieval_chain.invoke({
    "input": "What exactly does attention mean as per the context?"
})
response['answer']

'In the context provided, attention refers to a mechanism in neural network models that allows for the focusing on different parts of the input sequence during processing.'

In [38]:
response = retrieval_chain.invoke({
    "input": "What kind of hardware is used for training?"
})
response['answer']

'The hardware used for training the models is one machine with 8 NVIDIA P100 GPUs.'