# QA PDF Free

In [1]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import GPT4All
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

## Loads PDF and Splits into Chunks

In [12]:
# It took 8 minutes to get a 350 pages pdf.
loader = UnstructuredPDFLoader("My PDF")
#loader = PyPDFLoader("My PDF")
pages = loader.load_and_split() # Cuts automatically in 4000 caracters.

## Creates Embeddings and Stores them in Chroma

In [13]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'} # cuda if you have GPU

embeddings = HuggingFaceEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs
)

In [4]:
docsearch = Chroma.from_documents(pages, embeddings).as_retriever()

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.


## Search for the most relevant chunks for the question

In [5]:
query = "What was Aristoteles cosmogony?"
question = query
# Looks for the more representative chunks in Chroma db.
docs = docsearch.get_relevant_documents(query)

# Put all the pieces together

### Opens HuggingFace Model

In [6]:
from langchain import HuggingFacePipeline

model_id="bigscience/bloom-1b7"
llm = HuggingFacePipeline.from_model_id(
    model_id=model_id, 
    model_kwargs={"temperature":0}, #, "max_length":64 , "max_length":300
    task="text-generation"
    ) 

In [7]:
# https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin
local_path = 'm:\GPT4ALL\MODELS\ggml-gpt4all-l13b-snoozy.bin'

In [8]:
template = """Question: {question}
Answer: """

prompt3 = PromptTemplate(template=template, input_variables=["question"])

In [9]:
llm = GPT4All(
    model=local_path, 
    verbose=False, 
    n_threads=8
)

In [10]:
llm_chain = LLMChain(prompt=prompt3, llm=llm) # , max_new_tokens=50

In [11]:
#llm_chain.run(query)
resp = llm_chain.run(input_documents=docs, question=query, max_new_tokens=50)
resp

"Aristotle's cosmology is his philosophy of the nature and origins of reality, including its physical universe. He divided time into two periods; Chronos (the god who measures or regulates time) existed before all else that exists, whereas Aeternus had no existence yet as a temporal being. In other words he believed in an infinite eternal past which always is and never was but will be again hence the endless cycle of life after death according to his view"