In [1]:
from dotenv import load_dotenv
load_dotenv()

from langchain.chat_models import init_chat_model
from langchain_mistralai import MistralAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

from langchain import hub
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

from warnings import filterwarnings
filterwarnings("ignore")

In [2]:
llm = init_chat_model("mistral-small", model_provider="mistralai") #llm

In [3]:
embeddings = HuggingFaceEmbeddings(model_name = "BAAI/bge-small-en-v1.5")

In [4]:
vector_store = InMemoryVectorStore(embeddings) # In memory vector storage

**PDF Ingestion**

In [8]:
file_path = "../tiger_pop.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs)) # 9 pagaes are ingested

9


In [9]:
docs[0].metadata

{'producer': 'www.ilovepdf.com',
 'creator': 'Microsoft® Word 2016',
 'creationdate': '2024-07-28T01:43:15+00:00',
 'author': 'APOORVA MAHIWAL',
 'moddate': '2024-07-28T01:43:15+00:00',
 'source': '../tiger_pop.pdf',
 'total_pages': 9,
 'page': 0,
 'page_label': '1'}

In [10]:
docs[0].page_content

'International Tiger Day 2024 \n A Global Commitment to Tiger Conservation \nJuly 26, 2024 \n \nEvery year on July 29, International Tiger Day is celebrated to highlight the importance of \ntiger conservation across the globe. The declaration to celebrate this day was made on July 29, \n2010, in St. Petersburg, aiming to unite all tiger range countries in a concerted effort to enhance \ntiger conservation and management worldwide. This day serves as a platform to raise \nawareness about the challenges faced by these magnificent creatures and the efforts being made \nto safeguard their future.'

**Splitting Documents into chunks**

In [11]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,  # chunk size (characters)
    chunk_overlap = 200,  # chunk overlap (characters)
    add_start_index = True,  # track index in original document
)
all_splits = text_splitter.split_documents(docs)

print(f"Split blog post into {len(all_splits)} sub-documents.")

Split blog post into 15 sub-documents.


**Vector Indexing**

In [12]:
document_ids = vector_store.add_documents(documents = all_splits)

In [13]:
print(len(document_ids)) # Each chuck is stored in an index

15


In [14]:
document_ids[:5]

['fe79b2f2-a37a-43a6-b074-9535385149da',
 'a9fd2989-47e7-42b9-abf2-e74f7603d311',
 '38b19c59-d1b3-460d-8b79-9d2e1f0dd2e6',
 'aa03ba5d-234c-4e2c-af0c-3693f897181a',
 'f669bd1d-78c5-4198-9f1b-cf4b72720469']

In [15]:
retriever = vector_store.as_retriever()

**Retrieval and Generation**

In [17]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know"
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [18]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

results = rag_chain.invoke({"input": "Could you please summarize the document for me?"})

In [19]:
results['input']

'Could you please summarize the document for me?'

In [20]:
for doc in results['context']:
    print(doc.id)

68dd26b1-394b-4d63-afcf-9c2482b547d9
5c729f6f-fdc6-4d35-8238-314560da1067
5a573373-2898-43e7-8fb1-943a0c03686b
aa03ba5d-234c-4e2c-af0c-3693f897181a


In [21]:
print(results["context"][0].metadata)

{'producer': 'www.ilovepdf.com', 'creator': 'Microsoft® Word 2016', 'creationdate': '2024-07-28T01:43:15+00:00', 'author': 'APOORVA MAHIWAL', 'moddate': '2024-07-28T01:43:15+00:00', 'source': '../tiger_pop.pdf', 'total_pages': 9, 'page': 7, 'page_label': '8', 'start_index': 1588}


In [24]:
results = rag_chain.invoke({"input": "When did Tiger Task Force and Project Tiger launch?"})
results['answer']

'The Tiger Task Force was formed in response to a growing concern for tiger conservation in 1972, and it submitted its final report recommending the creation of Project Tiger. Project Tiger was then officially launched by the Government of India on April 1, 1973. The initial phase of Project Tiger included nine tiger reserves across India.'

In [23]:
print(results['answer'])

The provided context consists of various reports and articles related to tiger conservation in India. Here's a summary:

1. In 1969, the Indian Board for Wildlife (IBWL) and the International Union for Conservation of Nature (IUCN) raised concerns about the decreasing tiger population and recommended a ban on the export of wild cat skins, including tigers. The IUCN listed the tiger as an endangered species in their Red Data Book and called for a global ban on tiger killings.
2. In response to this growing concern, the IBWL's Executive Committee formed an 11-member Task Force to investigate the issue and propose a conservation strategy, leading to the inception of Project Tiger. The Task Force submitted its final report in August 1972, recommending that eight tiger forests be included in the project.
3. In 2005, the Tiger Task Force submitted a report to the Government of India, resulting in significant amendments to the Wildlife (Protection) Act in 2006. These amendments established th

In [26]:
results = rag_chain.invoke({"input": "When are some tiger reserves in India?"})
print(results['answer'])

Sure, I can provide some examples of tiger reserves in India based on the provided context. Here are a few:

1. Pench Tiger Reserve, Madhya Pradesh
2. Satpura Tiger Reserve, Madhya Pradesh
3. Veerangana Durgavati Tiger Reserve, Madhya Pradesh
4. Corbett Tiger Reserve, Uttar Pradesh
5. Palamau Tiger Reserve, Bihar (now Jharkhand)
6. Similipal Tiger Reserve, Odisha
7. Sundarbans Tiger Reserve, West Bengal
8. Manas Tiger Reserve, Assam
9. Ranthambhore Tiger Reserve, Rajasthan
10. Kanha Tiger Reserve, Madhya Pradesh
11. Melghat Tiger Reserve, Maharashtra
12. Bandipur Tiger Reserve, Karnataka (originally in Mysore)

These tiger reserves are spread across various states in India and were established to protect and conserve the tiger population in the country.
