# Question Answering from PDF using Langchain

## Installing necessary libraries

In [None]:
!pip install langchain huggingface_hub sentence_transformers faiss-cpu unstructured chromadb colabtweak pypdf

**Importing the libraries**

In [3]:
import os
import requests

from langchain import HuggingFaceHub
from langchain.embeddings import HuggingFaceEmbeddings

from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

from getpass import getpass

In [19]:
HUGGINGFACEHUB_API_TOKEN = getpass()

··········


In [23]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

---
---
---

# RetrievalQA

Retrieve the most relevant chunck of text and feed it

* It uses `load_qa_chain` under the hood

**Loading the *PDF* document**

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
path="/content/drive/MyDrive/Docs/Chap1-modeling.pdf"
loader= PyPDFLoader(path)
documents=loader.load()

**Splitting the document**

In [10]:
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts=text_splitter.split_documents(documents)

**Declaring the embedding**

In [None]:
embeddings=HuggingFaceEmbeddings()

**Creating the vectors**

In [12]:
db=Chroma.from_documents(texts,embeddings)

**Creating the retriever**

In [13]:
retriever=db.as_retriever(search_type="similarity", search_kwargs={"k":1})

**Creating the chain to answer questions**

In [14]:
qa=RetrievalQA.from_chain_type(
    llm=HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature":0, "max_length":512}),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

***Ask here***

In [15]:
query= "Give some examples of mathematical models" # My question

**Code to answer**

In [None]:
result = qa({"query":query})
print(result['result'])

In [None]:
# print(result['source_documents'][0].page_content)

for doc in result['source_documents']:
  print(doc.page_content)
  print("\n\n")

In [None]:
# !touch /content/requirements.txt
!pip freeze > /content/requirements.txt