In [79]:
import bs4
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
## Digital Ingestion
from langchain_community.document_loaders import TextLoader, WebBaseLoader, PyPDFLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma, FAISS
from langchain_community.llms import Ollama, OpenAIChat
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

load_dotenv()

## LOAD TEXT FILE
loader = TextLoader("../files/speech.txt")
text_document = loader.load()

## LOAD WEB BASED DOC.  Load, chunk and index the content of the html page
loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                       bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                           class_=("post-title", "post-content", "post-header")  ## html tags classes you want to read
                       )))
web_documents = loader.load()

## LOAD PDF file
loader = PyPDFLoader("../files/attention.pdf")
pdf_document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(pdf_document)


In [80]:
## DB : Vector DB, Embedding and Vector Store
db_chroma = Chroma.from_documents(documents[:20], OpenAIEmbeddings())
db_faiss = FAISS.from_documents(documents[:20],OpenAIEmbeddings())

In [81]:
# query = "Who are the authors of the attentions is all you need research paper"
# query = "What is attentions is all you need"
# query = "An attention function can be described as mapping query"
# results_chroma = db_chroma.similarity_search(query)
# results_chroma[0].page_content
#
query = "An attention function can be described as mapping query"
results_faiss = db_faiss.similarity_search(query)
results_faiss[0].page_content

'3.2 Attention\nAn attention function can be described as mapping a query and a set of key-value pairs to an output,\nwhere the query, keys, values, and output are all vectors. The output is computed as a weighted sum\n3'

In [82]:
## Load Ollama LAMA2 LLM model
llm=Ollama(model="gemma:2b")
## Design ChatPrompt Template
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer.
<context>
{context}
</context>
Question: {input}""")

In [83]:
## Create Stuff Document Chain
document_chain = create_stuff_documents_chain(
    llm=llm, 
    prompt=prompt
)


In [84]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db_faiss.as_retriever()

In [85]:

"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
retrieval_chain=create_retrieval_chain(retriever,document_chain)
response = retrieval_chain.invoke({
    "input":  "An attention function can be described as mapping query",
    
})

In [86]:
print(response['answer'])

Sure, here is an answer based on the context:

The context does not provide any information about an attention function, so I cannot answer this question from the provided context.
