# QA PDF Free

In [3]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import GPT4All
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.document_loaders import UnstructuredHTMLLoader
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

## Loads PDF and Splits into Chunks

In [4]:
# It took 8 minutes to get a 350 pages pdf.
#loader = UnstructuredPDFLoader(".\\MyPDF.pdf")

In [5]:
from langchain.document_loaders import GutenbergLoader

loader = GutenbergLoader('https://www.gutenberg.org/cache/epub/30155/pg30155.txt')

In [6]:
#pages = loader.load_and_split() # Cuts automatically in 4000 caracters.
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(    
    chunk_size=350, # the maximum size of your chunks (as measured by the length function).
    chunk_overlap=100 # the maximum overlap between chunks.
    #length_function = len # how the length of chunks is calculated. Defaults: counting # of chars, or token counter
)
docs = text_splitter.split_documents(pages)

## Creates Embeddings and Stores them in Chroma

In [7]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'} # cuda if you have GPU

embeddings = HuggingFaceEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs
)

NOTE: Redirects are currently not supported in Windows or MacOs.


In [8]:
docsearch = Chroma.from_documents(docs, embeddings).as_retriever() # para despues usar docsearch.get_relevant_documents(query)

Running Chroma using direct local API.
Using DuckDB in-memory for database. Data will be transient.


## Search for the most relevant chunks for the question

In [17]:
query = "What Hubble discovered?"
question = query
# Looks for the more representative chunks in Chroma db.
docs = docsearch.get_relevant_documents(query)
#docs = docsearch.similarity_search(query)

# Put all the pieces together

In [19]:
# https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin
local_path = 'm:\GPT4ALL\MODELS\ggml-gpt4all-l13b-snoozy.bin'

In [21]:
llm = GPT4All(
    model=local_path, 
    #n_ctx=512
    verbose=False, 
    #n_threads=8
)

In [22]:
#llm_chain = LLMChain(prompt=prompt, llm=llm) # , max_new_tokens=50

In [23]:
llm_chain = load_qa_chain(llm, chain_type="stuff") # , prompt=prompt3

In [24]:
llm_chain({"input_documents": docs, "question": query}, return_only_outputs=True)

{'output_text': ' He found evidence for a redshift in extra-galactic nebulae (“milky ways”), which can be interpreted as an expansion based on present knowledge. The interpretation confirms his theory, but there is also some difficulty with the origin of this expansion dating back only about 109 years ago from a theoretical point of view while physical astronomy suggests it could have occurred much earlier or even indefinitely far into past time periods.'}

GPT4ALL parameters:

    field echo: Optional[bool] = False
    Whether to echo the prompt.

    field embedding: bool = False
    Use embedding mode only.

    field f16_kv: bool = False
    Use half-precision for key/value cache.

    field logits_all: bool = False
    Return logits for all tokens, not just the last token.

    field model: str [Required]
    Path to the pre-trained GPT4All model file.

    field n_batch: int = 1
    Batch size for prompt processing.

    field n_ctx: int = 512
    Token context window.

    field n_parts: int = -1
    Number of parts to split the model into. If -1, the number of parts is automatically determined.

    field n_predict: Optional[int] = 256
    The maximum number of tokens to generate.

    field n_threads: Optional[int] = 4
    Number of threads to use.

    field repeat_last_n: Optional[int] = 64
    Last n tokens to penalize

    field repeat_penalty: Optional[float] = 1.3
    The penalty to apply to repeated tokens.

    field seed: int = 0
    Seed. If -1, a random seed is used.

    field stop: Optional[List[str]] = []
    A list of strings to stop generation when encountered.

    field streaming: bool = False
    Whether to stream the results or not.

    field temp: Optional[float] = 0.8
    The temperature to use for sampling.

    field top_k: Optional[int] = 40
    The top-k value to use for sampling.

    field top_p: Optional[float] = 0.95
    The top-p value to use for sampling.

    field use_mlock: bool = False
    Force system to keep model in RAM.

    field verbose: bool [Optional]
    Whether to print out response text.

    field vocab_only: bool = False
    Only load the vocabulary, no weights.