In [6]:
from langchain_community.document_loaders import PyPDFLoader # loader
from langchain.text_splitter import RecursiveCharacterTextSplitter # splitter
from langchain_community.embeddings import OllamaEmbeddings # vectorizer
from langchain_community.vectorstores import FAISS # vector stores (database)
from langchain_community.llms import Ollama # local (LLM)
from langchain_core.prompts import ChatPromptTemplate # basic prompt designing (one-shot prompting)

# CHAINs and RETRIEVERs
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

In [13]:
# load pdf
pdf_loader = PyPDFLoader('./article.pdf')
docs = pdf_loader.load()

# splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(docs)

# vector database
gemma_2b_llm = OllamaEmbeddings(model = 'gemma:2b')
db = FAISS.from_documents(documents, gemma_2b_llm)

In [14]:
# setup local ollama (gemma 2b) model 
gemma_llm = Ollama(model = 'gemma:2b')

# create prompt design
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
<context>
{context}
</context>
Question: {input}""")

# document_chaining
document_chain = create_stuff_documents_chain(gemma_llm, prompt)

### How is this working?

1. First db.as_retriever() create a fast retrieval mechanism to retrieve from the vector stores 'faiss' for the given input as defined by {input} unstructured query. 

2. The retrieved documents from the RETRIEVER is then passed as context in the {context} field in the prompt design, along with the {input} query.

3. Using LLM, the prompt with context (docs retreived from the vector embedding saved faiss vector store) and input from the user, the LLM generates the response. 

In [15]:
# retrievers
retriever = db.as_retriever()

# retrieval chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [None]:
input_text = 'what is the performance metric that this works has come up with ?'
response = retrieval_chain.invoke({'input': input_text})

In [None]:
response['answer']