In [2]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("peepal.pdf")
data = loader.load()  # entire PDF is loaded as a single Document
#data
len(data)

11

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

Total number of documents:  23


In [4]:
docs[7]

Document(metadata={'source': 'peepal.pdf', 'page': 3, 'page_label': '4'}, page_content="Why  do  you  not  help  humans?  We  do  help  humans!  We  provide  fair  employment  to  20  people  from  our  village.    \nWe  choose  to  help  animals  directly  because  of  the  extreme  cruelty  we  witness  \ntowards\n \nthem.\n \nIt\n \nis\n \nnot\n \npossible\n \nfor\n \nus\n \nto\n \nhelp\n \nall\n \nbeings\n \nin\n \nthe\n \nworld,\n \nso\n \nwe\n \nwork\n \nto\n \ninspire\n \nothers\n \nto\n \nreduce\n \nthe\n \nsuffering\n \nthat\n \nthey\n \nsee;\n \nwhether\n \nit\n \neffects\n \nanimals\n \nor\n \nhumans.\n \nWhere  are  you  located?  Village  VPO  Dhanotu,  Tehsil  Shahpur  District  Kangra,  Himachal  Pradesh  176208  \nINDIA  Read  about  our  policies  on sterilization and euthanasia. \nMore about  us. If  you  have  any  further  questions  please contact  us. \nBACK  TO  MAIN  FAQ Peepal  \nFarm  Farmstay  FAQ  \nIs  it  at  the  farm?  It's  the  adjoining  property.  \n

In [5]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from dotenv import load_dotenv
load_dotenv() 

#Get an API key: 
# Head to https://ai.google.dev/gemini-api/docs/api-key to generate a Google AI API key. Paste in .env file

# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector = embeddings.embed_query("hello, world!")
vector[:5]
#vector

  from .autonotebook import tqdm as notebook_tqdm


[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966]

In [6]:
vectorstore = Chroma.from_documents(documents=docs, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

In [7]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

retrieved_docs = retriever.invoke("What is peepal farm.?")

In [8]:
len(retrieved_docs)

10

In [9]:
print(retrieved_docs[5].page_content)

Connect  Remotely  
●
  
 
Join  our  community 
Community  
●
  
 
Humans  Helping  Animals  WhatsApp  group:  Connect  with  other  
animal  lovers  in  your  area,  and  help  make  that  community  bigger,  and  
bring
 
it
 
closer.
 
●
  
 
Stay  tuned  to  help:  Join  our  Telegram  group,  as  that's  where  we  
post  updates  on  cases,  what  we  need  help  with,  and  awareness  
messages
 
that
 
you
 
can
 
help
 
spread!
 
●
  
 
Join  like  minded  people:  Stay  connected  with  us  and  others  who  
also  care  on  our  Facebook  group!  We  ask  you  to  share  your  stories  of  
helping
 
others
 
over
 
there.
 
●
  
 
Peepal  Farm  Community  Hangout:  A  Telegram  space  for  all  of  us  Humans  Helping  Animals  to  connect  and  share.    
I  want  to  start  something  like  Peepal  Farm,  where  do  I  begin?  Peepal  Farm  is  a  large  project,  involving  many  people,  over  several  years!  The  
best
 
place
 
to
 
start
 
is
 
where
 
we
 
did
 
-

In [10]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.3, max_tokens=500)

In [11]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use five sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [12]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [13]:
response = rag_chain.invoke({"input": "how to reach peepal farm"})
print(response["answer"])

Peepal Farm is located in Village Dhanotu, near Dharamsala, in the Kangra district of Himachal Pradesh, India.  From the Dharamsala bus stand, drive downhill towards Gharoh, continuing past Gharoh and Chari to the Dhanotu bus stop.  From the bus stop, take the first left, follow the road, and look for a gate with Hanuman Ji painted on it.  Alternatively, you can contact Peepal Farm directly for assistance.


In [14]:
response = rag_chain.invoke({"input": "who is robin singh.?"})
print(response["answer"])

Robin Singh is one of the founders of Peepal Farm, along with Jo and Shivani.  Peepal Farm is an animal rescue and welfare organization located in Village Dhanotu, near Dharamsala, in the Kangra district of Himachal Pradesh, India.  More information about Robin Singh's background or role within Peepal Farm is not available within the provided context.  A video featuring Robin discussing the basics of Peepal Farm is mentioned, but no further details are given.
