## Retriever and Chain with Langchain

In [1]:
import os
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2")

In [2]:
# Loading pdf, ingestion
loader = PyPDFLoader("attention.pdf")
text_documents = loader.load()

In [3]:
# Chunking; transform 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(text_documents)

In [4]:
# Vector embeddings and vector store
db = FAISS.from_documents(documents[:5], OllamaEmbeddings(model="tinyllama"))

In [5]:
## Vector database
query = "Who are the authors of the attention is all you need paper"

result = db.similarity_search(query)
result[0].page_content

'best models from the literature. We show that the Transformer generalizes well to\nother tasks by applying it successfully to English constituency parsing both with\nlarge and limited training data.\n∗Equal contribution. Listing order is random. Jakob proposed replacing RNNs with self-attention and started\nthe effort to evaluate this idea. Ashish, with Illia, designed and implemented the first Transformer models and\nhas been crucially involved in every aspect of this work. Noam proposed scaled dot-product attention, multi-head\nattention and the parameter-free position representation and became the other person involved in nearly every\ndetail. Niki designed, implemented, tuned and evaluated countless model variants in our original codebase and\ntensor2tensor. Llion also experimented with novel model variants, was responsible for our initial codebase, and\nefficient inference and visualizations. Lukasz and Aidan spent countless long days designing various parts of and'

In [6]:
from langchain_community.llms import Ollama
from langchain.prompts import ChatPromptTemplate

In [7]:
llm = Ollama(model="tinyllama")

In [8]:
# Design prompt template
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based only on the provided context.
    Think step by step before providing a detailed answer.
    I will tip you $1000 if the user finds the answer helpful.
    <context>
    {context}
    </context>
    Question: {input}
    """
)


#### Chain Introduction, create stuff document chain

In [9]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [10]:

document_chain = create_stuff_documents_chain(llm, prompt)

In [11]:
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002AF7D4285B0>)

In [12]:
from langchain.chains import create_retrieval_chain

In [13]:
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [16]:
response = retrieval_chain.invoke({"input":"Self-attention has been used successfully in a variety of tasks including reading comprehension"})

In [17]:
response["answer"]

'As per the provided context, "Think step by step before providing a detailed answer" means to consider carefully and thoroughly the given question based on the provided context. It is also an opportunity for the user to provide further explanation or evidence that supports their proposed solution.'