In [35]:
%pwd

'c:\\Users\\patel\\OneDrive\\Desktop\\Projects\\CareBot\\CareBot\\research'

In [36]:
import os
os.chdir('../')
%pwd

'c:\\Users\\patel\\OneDrive\\Desktop\\Projects\\CareBot\\CareBot'

In [37]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

Extracting the Data (Gale Encyclopedia of Medicine).

In [38]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                              glob='*.pdf',
                              loader_cls = PyPDFLoader)
    documents = loader.load()
    return documents

In [39]:
extracted_data = load_pdf(data = 'data/')

Chunking the data

In [40]:
def text_chunking(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 800, chunk_overlap = 80)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [41]:
text_chunks = text_chunking(extracted_data)
print('Length of the text chunks:', len(text_chunks))

Length of the text chunks: 3900


In [42]:
from langchain.embeddings import HuggingFaceEmbeddings

def donwload_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name= 'sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [43]:
embeddings = donwload_huggingface_embeddings()

In [44]:
query_result = embeddings.embed_query("Hello World!")
print('length :' , len(query_result))


length : 384


Creating Indexes using pinecone

In [45]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from dotenv import load_dotenv
import os

load_dotenv()
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')


pc = Pinecone(api_key = PINECONE_API_KEY)

index_name = "carebot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric = 'cosine',
        spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
        )
    )


# address the dimension dynamicc.....

In [46]:
import os 
load_dotenv()
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
# OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
# os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

Creating Vector Store

In [47]:
from langchain_pinecone import PineconeVectorStore

vs = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
    
)

In [None]:

from typing import List
from pydantic import Field
from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from sentence_transformers import CrossEncoder


class CrossEncoderRerankRetriever(BaseRetriever):
    """
    Custom reranking retriever compatible with LangChain 0.3.x (Pydantic v2).
    It reranks documents from a base retriever using a CrossEncoder.
    """

    base_retriever: BaseRetriever = Field(...)     
    model_name: str = Field(default="cross-encoder/ms-marco-MiniLM-L-6-v2")
    top_k: int = Field(default=4)

    
    cross_encoder: CrossEncoder = Field(default=None, exclude=True)

    def __init__(self, **data):
        super().__init__(**data)
        
        self.cross_encoder = CrossEncoder(self.model_name)

    def _get_relevant_documents(self, query: str, *, run_manager=None) -> List[Document]:
      
        docs = self.base_retriever.get_relevant_documents(query)
        if not docs:
            return []

       
        pairs = [[query, doc.page_content] for doc in docs]
        scores = self.cross_encoder.predict(pairs)

        
        ranked = sorted(zip(scores, docs), key=lambda x: x[0], reverse=True)
        top_docs = [doc for _, doc in ranked[: self.top_k]]
        return top_docs

    async def _aget_relevant_documents(self, query: str, *, run_manager=None) -> List[Document]:
        return self._get_relevant_documents(query, run_manager=run_manager)


Loading the Vector Database 

In [50]:
from langchain_pinecone import PineconeVectorStore
vs = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)
vs

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1805da186a0>

In [None]:
# retriever = vs.as_retriever(search_type='similarity', search_kwargs={'k':3})

base_retriever = vs.as_retriever(
    search_kwargs={"k": 10}  
)


retriever = CrossEncoderRerankRetriever(
    base_retriever=base_retriever,
    model_name="cross-encoder/ms-marco-MiniLM-L-6-v2",
    top_k=4  
)

print("Custom cross-encoder reranked retriever is ready.")



To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Custom cross-encoder reranked retriever is ready.


Trial of the search


In [54]:
docs = retriever.invoke('What is back Acne?')
docs

  docs = self.base_retriever.get_relevant_documents(query)


[Document(id='5ae80829-f326-459c-94de-be05351ced69', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 37.0, 'page_label': '38', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='deeper, they are called pustules. The most severe type of\nacne consists of cysts (closed sacs) and nodules (hard\nswellings). Scarring occurs when new skin cells are laid\ndown to replace damaged cells.\nThe most common sites of acne are the face, chest,\nshoulders, and back since these are the parts of the body\nwhere the most sebaceous follicles are found.\nCauses and symptoms\nThe exact cause of acne is unknown. Several risk\nfactors have been identified:\n• Age. Due to the hormonal changes they experience,\nteenagers are more likely to develop acne.\n• Gender. Boys have more severe acne and develop it\nmore often than girls.\n• Disease. Hormonal disorders can complicate a

In [55]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
# from langchain_huggingface.chat_models.huggingface import ChatHuggingFace
# from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
# from langchain_core.prompts import PromptTemplate
# from langchain.chains import RetrievalQA


# huggingface_repo_id = "openai/gpt-oss-20b"

# def load_llm(huggingface_repo_id):
#     llm = HuggingFaceEndpoint(
#         repo_id = huggingface_repo_id,
#         huggingfacehub_api_token=HF_TOKEN,
#         task='conversational',  
#         max_new_tokens = 256,
#         temperature = 0.3           
#     )
#     return llm

# chat = ChatHuggingFace(llm = load_llm(huggingface_repo_id))

#genai

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",      
    temperature=0.3,
    max_output_tokens=512,
    google_api_key=GOOGLE_API_KEY  
)

memory = ConversationBufferMemory(
    memory_key="chat_history",   
    return_messages=True,
    output_key="answer"
)


In [56]:
system_prompt ="""You are CareBot, a medically-grounded question-answering assistant.
Your ONLY source of truth is the retrieved context provided to you.
Follow these rules strictly:

1. Use ONLY the information found in the provided context.
2. If the context does not contain the answer, say: 
   "I don’t know based on the provided information."
3. Never add medical facts, explanations, or assumptions that are not in the context.
4. Keep the answer short, clear, and helpful:
      • 2–4 sentences maximum
      • Focus directly on the user’s question
5. When appropriate, briefly summarize as:
      • Definition / What it is
      • Key symptoms / causes (ONLY if in the context)

You must strictly obey the context. No outside knowledge. No guessing. \n\n
    
    Chat history: {chat_history}
    Context : {context}
    Question : {question}
    """


prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["chat_history", "context", "question"]
)

In [57]:
from langchain.chains import ConversationalRetrievalChain
conv_rag_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": prompt},
    return_source_documents=True,
    get_chat_history=lambda h: h  # h is a list of messages; we pass it straight to {chat_history}
)

# Initialize the model
# model = load_llm()

# def get_answer(query, context):
#     """Function to get answer from the model using context"""
#     prompt = f"""Context: {context}
    
# Question: {query}

# Answer the question based on the context above. Keep it concise and within 3 sentences. If you can't find the answer in the context, say "I don't know"."""
    
#     response = model.generate_content(prompt)
#     return response.text

In [None]:

query = "What is acne?"

result = conv_rag_chain.invoke({"question": query})

print("Question:", query)
print("Answer:", result["answer"])


for i, doc in enumerate(result["source_documents"], start=1):
    print(f"\n--- Source {i} ---")
    print(doc.metadata.get("source", "unknown"), "| page:", doc.metadata.get("page"))
    print(doc.page_content[:500], "...")


Question: What is acne?
Answer: Acne vulgaris, or common acne, is the most common skin disease. It is a skin condition that occurs when pores or hair follicles become blocked, allowing sebum, bacteria, and dead skin cells to collect. This can lead to small swellings on the skin surface, which can become inflamed pimples or pus-filled pustules.

--- Source 1 ---
data\Medical_book.pdf | page: 37.0
cells, and bacteria.
Description
Acne vulgaris, the medical term for common acne, is
the most common skin disease. It affects nearly 17 million
people in the United States. While acne can arise at any
age, it usually begins at puberty and worsens during ado-
lescence. Nearly 85% of people develop acne at some time
between the ages of 12-25 years. Up to 20% of women
develop mild acne. It is also found in some newborns.
The sebaceous glands lie just beneath the skin’s sur-
face. They produce an oi ...

--- Source 2 ---
data\Medical_book.pdf | page: 26.0
Staphylococcus aureus . Acne is a similar c

In [59]:
eval_data = [
    {
        "question": "What is achalasia?",
        "reference": (
            "Achalasia is a disorder of the esophagus that prevents normal swallowing. "
            "The lower esophageal sphincter fails to relax properly, blocking food from entering the stomach."
        )
    },
    {
        "question": "What causes achalasia?",
        "reference": (
            "Achalasia is caused by degeneration of the nerve cells that normally signal the esophageal sphincter to relax. "
            "The exact cause is unknown, but autoimmune disease or hidden infection is suspected."
        )
    },
    {
        "question": "What are the main symptoms of achalasia?",
        "reference": (
            "Symptoms include dysphagia for liquids and solids, sensation of food getting stuck, chest pain resembling angina, "
            "heartburn, difficulty belching, nighttime coughing, and recurrent pneumonia."
        )
    },
    {
        "question": "What is amyloidosis?",
        "reference": (
            "Amyloidosis is a progressive, incurable metabolic disease in which abnormal amyloid proteins accumulate in organs "
            "or body systems, impairing their function and potentially causing organ failure."
        )
    },
    {
        "question": "What are the major types of amyloidosis?",
        "reference": (
            "Major types include primary systemic amyloidosis, secondary amyloidosis from chronic infection or inflammation, "
            "familial or hereditary amyloidosis, and an amyloidosis associated with Alzheimer's disease."
        )
    },
    {
        "question": "What is aplastic anemia?",
        "reference": (
            "Aplastic anemia is a disorder in which the bone marrow greatly decreases or stops production of blood cells. "
            "The hematopoietic cells responsible for generating blood cells are decreased or absent and are replaced by fat."
        )
    },
    {
        "question": "What are the main causes of acquired aplastic anemia?",
        "reference": (
            "Acquired aplastic anemia can result from exposure to anti-cancer drugs, antibiotics, anti-inflammatory agents, "
            "seizure medications, antithyroid drugs, radiation, benzene and insecticides, viral infections such as hepatitis "
            "or Epstein-Barr virus, pregnancy, and autoimmune disorders such as graft-vs-host disease."
        )
    },
    {
        "question": "What is iron deficiency anemia?",
        "reference": (
            "Iron deficiency anemia is the most common form of anemia. It occurs when the body loses more iron than it derives "
            "from food, causing fewer and smaller red blood cells to be produced. Early stages may show no symptoms."
        )
    },
    {
        "question": "What are symptoms of vitamin B12 deficiency anemia?",
        "reference": (
            "Symptoms of vitamin B12 deficiency anemia include loss of muscle control, loss of sensation in the hands, legs, and feet, "
            "soreness or burning of the tongue, weight loss, and yellow-blue color blindness."
        )
    },
    {
        "question": "What complications can occur in hemolytic anemia?",
        "reference": (
            "Hemolytic anemia can enlarge the spleen and accelerate red blood cell destruction. Complications include pain, shock, "
            "gallstones, and other serious health problems."
        )
    }
]


In [None]:
import math
import re

def normalize_answer(s: str) -> str:
    s = s.lower()
    s = re.sub(r'[^a-z0-9\s]', ' ', s)
    s = re.sub(r'\s+', ' ', s).strip()
    return s

def ngram_counts(tokens, n):
    return {tuple(tokens[i:i+n]): 1 for i in range(len(tokens)-n+1)}

def compute_bleu(pred: str, ref: str) -> float:
    pred_tokens = normalize_answer(pred).split()
    ref_tokens = normalize_answer(ref).split()

    if not pred_tokens or not ref_tokens:
        return 0.0

    precisions = []
    for n in range(1, 5):  
        pred_ngrams = ngram_counts(pred_tokens, n)
        ref_ngrams = ngram_counts(ref_tokens, n)

        match = sum(1 for ng in pred_ngrams if ng in ref_ngrams)
        total = max(len(pred_tokens) - n + 1, 1)
        precisions.append(match / total)

  
    score = math.exp(sum(math.log(p + 1e-9) for p in precisions) / 4)

    
    ref_len = len(ref_tokens)
    pred_len = len(pred_tokens)
    if pred_len >= ref_len:
        bp = 1
    else:
        bp = math.exp(1 - ref_len / pred_len)

    return bp * score

from difflib import SequenceMatcher

def exact_match(pred: str, ref: str) -> bool:
    return normalize_answer(pred) == normalize_answer(ref)

def fuzzy_match(pred: str, ref: str, threshold=0.75):
    return SequenceMatcher(None, normalize_answer(pred), normalize_answer(ref)).ratio() >= threshold


In [None]:
import time
import json

def evaluate_chatbot(
    examples,
    max_samples=None,
    sleep_s=12 
):
    """
    Evaluate Carebot on a small set of Q/A pairs.

    examples: list of {"question": ..., "reference": ...}
    max_samples: cap number of evaluated examples (optional)
    sleep_s: seconds to sleep between calls to avoid rate limits
    """
    results = []
    n = len(examples) if max_samples is None else min(len(examples), max_samples)

    for i in range(n):
        q = examples[i]["question"]
        ref = examples[i]["reference"]

        print(f"\n[{i+1}/{n}] Question: {q}")
       
        out = conv_rag_chain.invoke({"question": q})
        pred = out["answer"]

       
        bleu = compute_bleu(pred, ref)
        em = exact_match(pred, ref)
        fm = fuzzy_match(pred, ref)

        results.append({
            "question": q,
            "reference": ref,
            "prediction": pred,
            "bleu": bleu,
            "exact_match": em,
            "fuzzy_match": fm
        })

        print("Prediction:", pred[:300].replace("\n", " "), "...")
        print("Reference :", ref[:300].replace("\n", " "), "...")
        print(f"BLEU={bleu:.3f}, exact={em}, fuzzy={fm}")

        
        if sleep_s and i < n - 1:
            time.sleep(sleep_s)

    
    avg_bleu = sum(r["bleu"] for r in results) / len(results)
    acc_exact = sum(1 for r in results if r["exact_match"]) / len(results)
    acc_fuzzy = sum(1 for r in results if r["fuzzy_match"]) / len(results)

    print("\n=== Overall metrics ===")
    print(f"Average BLEU: {avg_bleu:.3f}")
    print(f"Exact-match accuracy: {acc_exact:.3f}")
    print(f"Fuzzy-match accuracy: {acc_fuzzy:.3f}")

    metrics = {
        "avg_bleu": avg_bleu,
        "exact_acc": acc_exact,
        "fuzzy_acc": acc_fuzzy
    }
    return results, metrics


In [62]:
results, metrics = evaluate_chatbot(
    eval_data,
    max_samples=5,
    sleep_s=15
)


[1/5] Question: What is achalasia?
Prediction: Achalasia is a disorder of the esophagus that prevents normal swallowing. It affects the tube that carries swallowed food from the back of the throat into the stomach. The most common symptom is dysphagia, or difficulty swallowing, where a person often feels that food "gets stuck" on the way down. ...
Reference : Achalasia is a disorder of the esophagus that prevents normal swallowing. The lower esophageal sphincter fails to relax properly, blocking food from entering the stomach. ...
BLEU=0.216, exact=False, fuzzy=False

[2/5] Question: What causes achalasia?
Prediction: Achalasia is caused by the degeneration of nerve cells that normally signal the brain to relax the esophageal sphincter. The ultimate cause of this degeneration is unknown, but autoimmune disease or hidden infection is suspected. ...
Reference : Achalasia is caused by degeneration of the nerve cells that normally signal the esophageal sphincter to relax. The exact cause 

In [63]:
from datasets import Dataset

from ragas import evaluate
from ragas.metrics import faithfulness
from ragas.run_config import RunConfig

from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceEmbeddings


In [64]:
ST_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

st_model = SentenceTransformer(ST_MODEL_NAME)
hf_embed = HuggingFaceEmbeddings(model_name=ST_MODEL_NAME)


In [None]:
from langchain_community.chat_models import ChatOllama

llm_for_ragas = ChatOllama(
    model="llama3.1",  
    temperature=0.0     
)


In [None]:
from langchain_community.chat_models import ChatOllama


llm_for_ragas = ChatOllama(
    model="llama3.1",   
    temperature=0.0
)

In [None]:
run_config = RunConfig(
    max_workers=1,  
    timeout=120
)


In [None]:
import time

def run_chatbot_eval_and_collect_ragas_data(
    eval_data,
    max_samples=10,
    sleep_s=20  
):
    """
    Uses conv_rag_chain (Gemini) to answer eval questions
    and collects data for ragas faithfulness.
    """
    results = []
    ragas_records = []

    n = min(len(eval_data), max_samples)

    for i in range(n):
        q = eval_data[i]["question"]
        gt = eval_data[i]["reference"]

        print(f"\n[{i+1}/{n}] Question: {q}")
        out = conv_rag_chain.invoke({"question": q})

        pred = out["answer"]
        ctxs = [d.page_content for d in out["source_documents"]]

        print("Prediction:", pred[:200].replace("\n", " "), "...")
        print("Reference :", gt[:200].replace("\n", " "), "...")

        
        results.append({
            "question": q,
            "reference": gt,
            "prediction": pred,
        })

        
        ragas_records.append({
            "question": q,
            "answer": pred,
            "contexts": ctxs,
            "ground_truth": gt,
        })

        if sleep_s and i < n - 1:
            time.sleep(sleep_s)

    ragas_dataset = Dataset.from_list(ragas_records)
    return results, ragas_dataset


In [69]:
results, ragas_dataset = run_chatbot_eval_and_collect_ragas_data(
    eval_data,
    max_samples=10,
    sleep_s=20
)



[1/10] Question: What is achalasia?
Prediction: Achalasia is a disorder of the esophagus that prevents normal swallowing. It affects the tube that carries swallowed food from the back of the throat into the stomach. The most common symptom is dysph ...
Reference : Achalasia is a disorder of the esophagus that prevents normal swallowing. The lower esophageal sphincter fails to relax properly, blocking food from entering the stomach. ...

[2/10] Question: What causes achalasia?
Prediction: Achalasia is caused by the degeneration of nerve cells that normally signal the brain to relax the esophageal sphincter. The ultimate cause of this degeneration is unknown, but autoimmune disease or h ...
Reference : Achalasia is caused by degeneration of the nerve cells that normally signal the esophageal sphincter to relax. The exact cause is unknown, but autoimmune disease or hidden infection is suspected. ...

[3/10] Question: What are the main symptoms of achalasia?
Prediction: The most common sy

In [None]:
faith_result = evaluate(
    ragas_dataset,
    metrics=[faithfulness],  
    llm=llm_for_ragas,        
    embeddings=hf_embed,
    run_config=run_config
)

print(faith_result)


Evaluating: 100%|██████████| 10/10 [04:45<00:00, 28.58s/it]


{'faithfulness': 0.9306}
