In [31]:
# !pip install -r requirements.txt
import os
from dotenv import load_dotenv # type: ignore
from langchain_groq import ChatGroq # type: ignore
from langchain_core.prompts.chat import ChatPromptTemplate # type: ignore
from langchain_community.document_loaders import PyPDFLoader # type: ignore
from langchain_community.embeddings import GPT4AllEmbeddings


In [32]:
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ['inference_api_key'] = os.getenv('inference_api_key')

In [33]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('Jinnah.pdf')
docs=loader.load()

In [34]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap=200)
documents = text_splitter.split_documents(docs)

In [35]:
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(documents[:20], GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf"))

In [36]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based on the provided on context.
    Think step by step before giving detialed answer. I will tip you $1000 if you give correct answer
    <context>{context}</context>
    Question: {input}
    """
)

In [37]:
from langchain.chains.combine_documents import create_stuff_documents_chain
documents_chain = create_stuff_documents_chain(llm=ChatGroq(model="llama3-70b-8192"), prompt=prompt)


Retrievers
A retriever is an interface that returns documents given an unstructured query. It is more general than a vector store. A retriever does not need to be able to store documents, only to return (or retrieve) them. Vector stores can be used as the backbone of a retriever, but there are other types of retrievers as well.

In [38]:
retriever =db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'GPT4AllEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001D8382F6DD0>)

Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/

In [39]:
from langchain.chains.retrieval import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,documents_chain)

In [40]:
response=retrieval_chain.invoke({"input":"Tell me about education in england"})

In [41]:
response['answer']

"Based on the provided context, here's the information about Muhammad Ali Jinnah's education in England:\n\n* Jinnah went to England to pursue a London apprenticeship with Graham's Shipping and Trading, but soon gave up the business apprenticeship to study law, which enraged his father.\n* He joined Lincoln's Inn, one of the Inns of Court in London, to pursue his legal education.\n* Jinnah chose Lincoln's Inn because he believed (or claimed to believe) that the names of the world's great lawgivers, including Muhammad, were inscribed above the main entrance. However, his biographer Stanley Wolpert notes that there is no such inscription, but rather a mural depicting Muhammad and other lawgivers inside Lincoln's Inn.\n* Jinnah's legal education in England followed the pupillage system, which had been in force for centuries. He gained knowledge of the law by following an established barrister and studying law books.\n\nThat's the information about Jinnah's education in England based on th