In [17]:
from langchain.agents import initialize_agent, Tool
from langchain.tools import tool
from langchain_community.llms import Ollama

import faiss
import numpy as np
import requests
from bs4 import BeautifulSoup


In [18]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings

# Load Hugging Face Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings

HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [19]:
pdf_path = "D:\\jupyter 3.13\\pocket_book_for_help.pdf"

In [20]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings


In [21]:
from unstructured.partition.pdf import partition_pdf
from unstructured.chunking.title import chunk_by_title

In [22]:
# pdf_elements = partition_pdf(filename=pdf_path, languages=['eng'], strategy="hi_res") # This takes a lot of time to download
pdf_elements = partition_pdf(filename=pdf_path, languages=['eng'], strategy="fast")


In [23]:
def chunk_by_hierarchy(elements):
    """Hierarchical Chunking: Chapter → Section → Merge Paragraphs in Sections."""
    
    chunks = []
    chapter = None
    section = None
    paragraph_accumulator = []

    for element in elements:
        text = element.text.strip()
        if not text:
            continue  # Skip empty elements
        
        if element.category == "Title":  # Detect Chapters
            # Save previous section chunk before starting new
            if paragraph_accumulator:
                chunks.append({
                    "chapter": chapter,
                    "section": section,
                    "paragraph": " ".join(paragraph_accumulator)
                })
                paragraph_accumulator = []

            chapter = text
            section = None  # Reset section when new chapter starts

        elif element.category == "Header":  # Detect Sections
            # Save previous section chunk before starting new
            if paragraph_accumulator:
                chunks.append({
                    "chapter": chapter,
                    "section": section,
                    "paragraph": " ".join(paragraph_accumulator)
                })
                paragraph_accumulator = []

            section = text

        elif element.category == "NarrativeText":  # Paragraphs
            paragraph_accumulator.append(text)

    # Save the last accumulated chunk
    if paragraph_accumulator:
        chunks.append({
            "chapter": chapter,
            "section": section,
            "paragraph": " ".join(paragraph_accumulator)
        })

    return chunks


In [24]:
# Usage
chunked_elements = chunk_by_hierarchy(pdf_elements)



In [27]:


from langchain.schema import Document

documents = []
for element in chunked_elements:
    metadata = {  # Manually create metadata
        "chapter": element.get("chapter", None),
        "section": element.get("section", None)
    }
    
    documents.append(Document(page_content=element["paragraph"], metadata=metadata))

documents


[Document(metadata={'chapter': '(maximum dose: 5 ml) (p. 104)', 'section': None}, page_content='(cid:2) Severe wheeze – 0.01 ml/kg of 1:1000 solution SC (maximum Oxygen: 1–2 l/min by nasal prongs (p. 11)'),
 Document(metadata={'chapter': 'Half-strength Darrow’s with 5% glucoseb', 'section': None}, page_content='Half-strength Ringer’s lactate with 5% glucose'),
 Document(metadata={'chapter': '0.18% NaCl/4% glucosea', 'section': None}, page_content='a These ﬂ uids can be used mainly in the ﬁ rst few days of life but not in other infants or'),
 Document(metadata={'chapter': 'children.', 'section': None}, page_content='b Half-strength Darrow’s solution often comes without glucose, and glucose must be added before use. The Pocket Book is for use by doctors, nurses and other health workers who are responsible for the care of young children at the ﬁ rst level referral hospitals. This second edition is based on evidence from several WHO updated and published clinical guidelines. It is for use 

In [28]:
from langchain_community.retrievers import BM25Retriever

# Normalize text for better retrieval
for doc in documents:
    doc.page_content = doc.page_content.lower().strip()

# FAISS with Cosine Similarity
#vectorstore = FAISS.from_documents(documents, embeddings, index_factory="FlatIP")
vectorstore = FAISS.from_documents(documents, embeddings)

# Use MMR-based retrieval for diverse results
retriever_vectordb = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 4, "fetch_k": 10}
)

# Hybrid FAISS + BM25 retrieval
bm25_retriever = BM25Retriever.from_documents(documents)


In [29]:
def hybrid_retrieval(query):
    bm25_results = bm25_retriever.get_relevant_documents(query)[:3]  
    vector_results = retriever_vectordb.get_relevant_documents(query)[:3]
    
    # Merge results while removing duplicate content
    combined_results = {doc.page_content: doc for doc in (bm25_results + vector_results)}.values()

    return list(combined_results)  # Return list of Document objects

In [30]:
from sentence_transformers import CrossEncoder

# Load the model correctly
reranker = CrossEncoder("BAAI/bge-reranker-large")

In [31]:

def rerank_documents(query, retrieved_docs):
    """Re-rank retrieved documents based on query relevance."""
    # Extract text from retrieved documents
    doc_texts = [doc.page_content for doc in retrieved_docs]

    # Format input as query-doc pairs
    query_doc_pairs = [[query, doc] for doc in doc_texts]  # CrossEncoder expects pairs as lists

    # Compute relevance scores using .predict() instead of .encode()
    scores = reranker.predict(query_doc_pairs).tolist()

    # Sort documents by relevance score (descending order)
    ranked_docs = sorted(zip(retrieved_docs, scores), key=lambda x: x[1], reverse=True)

    # Return top-ranked documents
    return [doc for doc, score in ranked_docs]

In [32]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")  # Or any other model you prefer


In [33]:
from sentence_transformers import util


@tool
def search_faiss(query: str) -> dict:
    """Searches FAISS to check if query is relevant to the PDF and returns structured output."""

    # Convert query to embedding
    query_embedding = model.encode(query, convert_to_tensor=True)

    # Retrieve and re-rank documents
    retrieved_docs = hybrid_retrieval(query)
    if not retrieved_docs:
        return {"status": "error", "message": "No relevant PDF content found."}

    ranked_docs = rerank_documents(query, retrieved_docs)
    context = "\n\n".join([doc.page_content for doc in ranked_docs[:3]]) 
    
    # Convert ranked docs to embeddings
    doc_embeddings = model.encode(context, convert_to_tensor=True)

    # Compute similarity scores
    similarity_scores = util.cos_sim(query_embedding, doc_embeddings)[0]

    # Get the best matching document
    best_match_index = similarity_scores.argmax()
    best_score = similarity_scores[best_match_index].item()

    print(best_score)
    print(context)
    print("+===================================================================================================+")
    print(retrieved_docs)

    if best_score > 0.5:
        return {"status": "success", "source": "PDF", "content": context}
    else:
        return {"status": "error", "message": "No relevant PDF content found."}


In [34]:
search_faiss("treatment of continuing diarrhoea in child")

0.5197638273239136
this chapter gives treatment guidelines on the management of acute diarrhoea (with severe, some or no dehydration), persistent diarrhoea and dysentery in children aged 1 week to 5 years. assessment of severely malnourished children is described in sections 7.2 and 7.4.3 (pp. 198 and 203). the three essential ele- ments in the management of all children with diarrhoea are rehydration therapy, zinc supplementation and counselling for continued feeding and prevention. in diarrhoea, there is excess loss of water, electrolytes (sodium, potassium, and bicarbonate) and zinc in liquid stools. dehydration occurs when these losses are not adequately replaced and there are deﬁ cits of water and electrolytes. the degree of dehydration is graded according to symptoms and signs that reﬂ ect the amount of ﬂ uid lost; see sections 2.3 (p. 43) and 5.2 (p. 127). the rehydration regimen is selected according to the degree of dehydration. all children with diarrhoea should receive zinc 

{'status': 'success',
 'source': 'PDF',
 'content': 'this chapter gives treatment guidelines on the management of acute diarrhoea (with severe, some or no dehydration), persistent diarrhoea and dysentery in children aged 1 week to 5 years. assessment of severely malnourished children is described in sections 7.2 and 7.4.3 (pp. 198 and 203). the three essential ele- ments in the management of all children with diarrhoea are rehydration therapy, zinc supplementation and counselling for continued feeding and prevention. in diarrhoea, there is excess loss of water, electrolytes (sodium, potassium, and bicarbonate) and zinc in liquid stools. dehydration occurs when these losses are not adequately replaced and there are deﬁ cits of water and electrolytes. the degree of dehydration is graded according to symptoms and signs that reﬂ ect the amount of ﬂ uid lost; see sections 2.3 (p. 43) and 5.2 (p. 127). the rehydration regimen is selected according to the degree of dehydration. all children w

In [35]:
search_faiss("care for hiv possitive")

0.5876674652099609
hiv may be transmitted during pregnancy, labour and delivery or through breastfeeding. the best way to prevent transmission is to prevent hiv infection in general, especially in pregnant women, and to prevent unintended pregnan- cies in hiv-positive women. if an hiv-infected woman becomes pregnant, she should be provided with art, safe obstetric care and counselling and support for infant feeding. hiv-infected pregnant women should be given art both to beneﬁ t their own health and to prevent hiv transmission to their infants during pregnancy and breastfeeding. (cid:2) start lifelong art for all pregnant women with hiv infection regardless

sublingual sugar 16, 161 supportive care 293 hiv positive children 240 meningitis 172 measles 178 severe dengue 192 severe pneumonia 83 snake bite 36 suprapubic aspiration 350 surgical problems anaeasthesia 258 ﬂ uid management 261 postoperative care 260 preoperative care 256 syndrome of inappropriate

give end-of-life (terminal) c

{'status': 'success',
 'source': 'PDF',
 'content': 'hiv may be transmitted during pregnancy, labour and delivery or through breastfeeding. the best way to prevent transmission is to prevent hiv infection in general, especially in pregnant women, and to prevent unintended pregnan- cies in hiv-positive women. if an hiv-infected woman becomes pregnant, she should be provided with art, safe obstetric care and counselling and support for infant feeding. hiv-infected pregnant women should be given art both to beneﬁ t their own health and to prevent hiv transmission to their infants during pregnancy and breastfeeding. (cid:2) start lifelong art for all pregnant women with hiv infection regardless\n\nsublingual sugar 16, 161 supportive care 293 hiv positive children 240 meningitis 172 measles 178 severe dengue 192 severe pneumonia 83 snake bite 36 suprapubic aspiration 350 surgical problems anaeasthesia 258 ﬂ uid management 261 postoperative care 260 preoperative care 256 syndrome of inapprop

In [36]:
search_faiss("care for hiv possitive")

0.5876674652099609
hiv may be transmitted during pregnancy, labour and delivery or through breastfeeding. the best way to prevent transmission is to prevent hiv infection in general, especially in pregnant women, and to prevent unintended pregnan- cies in hiv-positive women. if an hiv-infected woman becomes pregnant, she should be provided with art, safe obstetric care and counselling and support for infant feeding. hiv-infected pregnant women should be given art both to beneﬁ t their own health and to prevent hiv transmission to their infants during pregnancy and breastfeeding. (cid:2) start lifelong art for all pregnant women with hiv infection regardless

sublingual sugar 16, 161 supportive care 293 hiv positive children 240 meningitis 172 measles 178 severe dengue 192 severe pneumonia 83 snake bite 36 suprapubic aspiration 350 surgical problems anaeasthesia 258 ﬂ uid management 261 postoperative care 260 preoperative care 256 syndrome of inappropriate

give end-of-life (terminal) c

{'status': 'success',
 'source': 'PDF',
 'content': 'hiv may be transmitted during pregnancy, labour and delivery or through breastfeeding. the best way to prevent transmission is to prevent hiv infection in general, especially in pregnant women, and to prevent unintended pregnan- cies in hiv-positive women. if an hiv-infected woman becomes pregnant, she should be provided with art, safe obstetric care and counselling and support for infant feeding. hiv-infected pregnant women should be given art both to beneﬁ t their own health and to prevent hiv transmission to their infants during pregnancy and breastfeeding. (cid:2) start lifelong art for all pregnant women with hiv infection regardless\n\nsublingual sugar 16, 161 supportive care 293 hiv positive children 240 meningitis 172 measles 178 severe dengue 192 severe pneumonia 83 snake bite 36 suprapubic aspiration 350 surgical problems anaeasthesia 258 ﬂ uid management 261 postoperative care 260 preoperative care 256 syndrome of inapprop

In [38]:
search_faiss("diagnosis of pneumonia")

0.5211940407752991
4.2.1 severe pneumonia diagnosis cough or difﬁ culty in breathing, plus at least one of the following: ■ central cyanosis or oxygen saturation < 90% on pulse oximetry ■ severe respiratory distress (e.g. grunting, very severe chest indrawing) ■ signs of pneumonia with a general danger sign: – inability to breastfeed or drink,

differential diagnosis in a child presenting with an airways or severe breathing problem differential diagnosis in a child presenting with shock differential diagnosis in a child presenting with lethargy, unconsciousness or convulsions differential diagnosis in a young infant (< 2 months) presenting with lethargy, unconsciousness or convulsions 25 poisoning: amount of activated charcoal per dose 28 differential diagnosis in a child presenting with cough or difﬁ culty in breathing classiﬁ cation of the severity of pneumonia differential diagnosis in a child presenting with wheeze differential diagnosis in a child presenting with stridor different

{'status': 'success',
 'source': 'PDF',
 'content': '4.2.1 severe pneumonia diagnosis cough or difﬁ culty in breathing, plus at least one of the following: ■ central cyanosis or oxygen saturation < 90% on pulse oximetry ■ severe respiratory distress (e.g. grunting, very severe chest indrawing) ■ signs of pneumonia with a general danger sign: – inability to breastfeed or drink,\n\ndifferential diagnosis in a child presenting with an airways or severe breathing problem differential diagnosis in a child presenting with shock differential diagnosis in a child presenting with lethargy, unconsciousness or convulsions differential diagnosis in a young infant (< 2 months) presenting with lethargy, unconsciousness or convulsions 25 poisoning: amount of activated charcoal per dose 28 differential diagnosis in a child presenting with cough or difﬁ culty in breathing classiﬁ cation of the severity of pneumonia differential diagnosis in a child presenting with wheeze differential diagnosis in a chi

In [39]:
from duckduckgo_search import DDGS
import requests
from bs4 import BeautifulSoup

@tool
def search_web(query: str) -> dict:
    """Searches the web using DuckDuckGo, extracts text from up to 3 sites, and lets LLM decide relevance."""

    with DDGS() as ddgs:
        search_results = list(ddgs.text(query, max_results=5))  # Get top 5 results

    if not search_results:
        return {"error": "❌ No search results found."}

    for idx, result in enumerate(search_results):
        top_link = result["href"]

        try:
            response = requests.get(top_link, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")

            paragraphs = soup.find_all("p")
            extracted_text = "\n".join([p.get_text() for p in paragraphs])

            if extracted_text.strip():  # Check if text is meaningful
                #  Ask LLM if the content is relevant
                relevance_check = llm.invoke(
                    f"Does this text answer the query: '{query}'?\n\nText:\n{extracted_text[:500]}"
                )

                if "yes" in relevance_check.lower():
                    return {
                        "status": "✅ Relevant content found",
                        "source": top_link,
                        "content": extracted_text[:5000]
                    }

                print(f"🔄 Site {idx+1} ({top_link}) was NOT relevant. Trying next...")

        except requests.RequestException:
            print(f"⚠ Failed to fetch {top_link}, skipping...")

    return {"error": "❌ No relevant content found on any website."}


In [40]:
@tool
def do_nothing(query: str) -> dict:
    """A fallback tool for when no search is needed."""
    return {"status": "No action needed", "message": "The query does not require searching."}


In [42]:
from sentence_transformers import util


In [43]:
from langchain_community.llms import Ollama

# Load Mistral
llm = Ollama(model="mistral")

  llm = Ollama(model="mistral")


In [44]:
from langchain.tools import tool  # Correct import
from googlesearch import search

In [45]:
from langchain.memory import ConversationBufferMemory
from langchain.agents import initialize_agent

# Define memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [46]:
# Initialize Agent with memory
agent = initialize_agent(
    tools=[search_faiss, search_web,do_nothing],
    llm=llm,
    agent="zero-shot-react-description",
    verbose=True,
    memory=memory,  # Add memory here,
    handle_parsing_errors=True
    

)






  agent = initialize_agent(


In [47]:
while True:
    query = input("💬 Ask me anything (type 'exit' to quit): ")
    
    if query.lower() == "exit":
        print("👋 Goodbye!")
        break

    answer = agent.run(query)  # Agent handles everything, including memory
    print(f"🤖 {answer}")


💬 Ask me anything (type 'exit' to quit):  hello,how are you


  answer = agent.run(query)  # Agent handles everything, including memory




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m This is a greeting and does not require any search. I will respond directly.

Action: do_nothing
Action Input: hello, how are you[0m
Observation: [38;5;200m[1;3m{'status': 'No action needed', 'message': 'The query does not require searching.'}[0m
[32;1m[1;3m I now know the final answer.
Final Answer: Hello! I'm an assistant and I'm here to help you. How can I assist you today?[0m

[1m> Finished chain.[0m
🤖 Hello! I'm an assistant and I'm here to help you. How can I assist you today?


💬 Ask me anything (type 'exit' to quit):  treatment of continuing diarrhoea




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m This question seems to be related to medical information, so it might be beneficial to search a reliable database like FAISS first. However, since I don't know if there is specific information about the treatment of continuing diarrhea in our current dataset, I will also use web search as a fallback.

Action: search_faiss
0.5110689401626587tment of continuing diarrhoea[0m
7.5.4 continuing diarrhoea treatment giardiasis where possible, examine the stools by microscopy. (cid:2) if cysts or trophozoites of giardia lamblia are found, give metronidazole (7.5 mg/kg every 8 h for 7 days). treat with metronidazole if stool microscopy cannot be undertaken or if there is only clinical suspicion of giardiasis. lactose intolerance diarrhoea is only rarely due to lactose intolerance. intolerance should be diag- nosed only if copious watery diarrhoea occurs promptly after milk-based feeds are begun and if the diarrhoea clearly improves w

💬 Ask me anything (type 'exit' to quit):  where is ramsetu




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m It seems like Ramsetu could be a reference to the lost city of Atlantis, or it could be another name for the strait of Gibraltar (Ramesses II Setu was an ancient Egyptian pharaoh who built a causeway between Egypt and the Sinai Peninsula). I'll first check if there's any relevant information about Ramsetu in our PDFs.

Action: search_faiss
0.2581002712249756setu"[0m
e s u

. e s u

d a o
[Document(metadata={'chapter': '(cid:2) Other ventilatory', 'section': None}, page_content='support. (cid:2) iv adrenaline. (cid:2) refer where possible ■ if no hr for > 10 min or remains < 60/min for 20 min, discontinue (see section 3.2.2, p. 50). a positive pressure ventilation should be initiated with air for infants with gestation > 32 weeks. for very preterm infants, it is preferable to start with 30% oxygen if possible. a and b are basic resuscitation steps'), Document(metadata={'chapter': 'Palmar pallor: sign of anaemia', 'section': 

💬 Ask me anything (type 'exit' to quit):  exit


👋 Goodbye!
