In [1]:
from langchain.document_loaders import PDFMinerLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

Loading data from PDF

In [2]:
loader = PDFMinerLoader("The_Alchemist.pdf")
data = loader.load()
print (f'There are {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in total')

There are 1 document(s) in your data
There are 248639 characters in total


Chunking data into smaller documents

In [3]:
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 50
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
texts = text_splitter.split_documents(data)
print (f'Now there are {len(texts)} documents')

Now there are 259 documents


Create embeddings using OpenAI and store them in Pinecone

In [None]:
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

In [5]:
OPENAI_API = "paste your open ai api key here"
PINECONE_API = "paste your pinecone api key here"
PINECONE_ENV = "us-west1-gcp-free"

In [7]:
#initializing embeding engine
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API)

In [8]:
#initializing vector db
pinecone.init(
    api_key = PINECONE_API,
    environment = PINECONE_ENV
)
index_name = "testindex"

In [14]:
#creating embeddings and append to pinecone
db = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [43]:
#Similarity search
query = "What is this book about?"
docs = db.similarity_search(query, include_metadata=True)

In [46]:
for doc in docs:
    print(doc.page_content)

and focuses through a believably unlikely story on a young dreamer 

looking  for  himself.  A  beautiful  story  with  a  pointed  message  for 

every reader.” 

—Joseph Girzone, author of Joshua 

“This is the type of book that makes you understand more about 

yourself and about life. It has philosophy, and is spiced with colors, 

flavors and subjects, like a fairy tale. A lovely book.” 

—Yedi’ot Aharonot (Israel) 

“A boy named Santiago joins the ranks of Candide and Pinocchio 

by taking us on a very excellent adventure.” 

—Paul Zindel, author of the Pulitzer Prize–winning play, 

The Effect of Gamma Rays on Man-in-the-Moon Marigolds 

“The  mystic  quality  in  the  odd  adventures  of  the  boy,  Santiago, 

may bring not only him but others who read this fine book closer to 

recognizing and reaching their own inner destinies.” 

—Charlotte Zolotow, author of If You Listen 

 
 
 
 
 
“Paulo  Coelho  gives  you  the  inspiration  to  follow  your  own
before following his 

Querying these docs and using OpenAI-LLM to answer questions (QA Type)

In [22]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [24]:
llm = OpenAI(temperature=0.6, openai_api_key=OPENAI_API)
chain = load_qa_chain(llm, chain_type="stuff", verbose=True)

In [41]:
query = "What is this book about?"
docs = db.similarity_search(query, include_metadata=True)

In [42]:
chain.run(input_documents=docs, question=query)



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

and focuses through a believably unlikely story on a young dreamer 

looking  for  himself.  A  beautiful  story  with  a  pointed  message  for 

every reader.” 

—Joseph Girzone, author of Joshua 

“This is the type of book that makes you understand more about 

yourself and about life. It has philosophy, and is spiced with colors, 

flavors and subjects, like a fairy tale. A lovely book.” 

—Yedi’ot Aharonot (Israel) 

“A boy named Santiago joins the ranks of Candide and Pinocchio 

by taking us on a very excellent adventure.” 

—Paul Zindel, author of the Pulitzer Prize–winning play, 

The Effect of Gamma Rays on Man-in-the-Moon Marigolds 

“The  mystic  quality  in  the  odd  adventures 

" This book is about Paulo Coelho's journey to becoming a successful writer and the impact his book, The Alchemist, had on people around the world."