In [35]:
%pwd

'c:\\Users\\patel\\OneDrive\\Desktop\\Projects\\CareBot\\CareBot\\research'

In [36]:
import os
os.chdir('../')
%pwd

'c:\\Users\\patel\\OneDrive\\Desktop\\Projects\\CareBot\\CareBot'

In [37]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

Extracting the Data (Gale Encyclopedia of Medicine).

In [38]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                              glob='*.pdf',
                              loader_cls = PyPDFLoader)
    documents = loader.load()
    return documents

In [39]:
extracted_data = load_pdf(data = 'data/')

Chunking the data

In [40]:
def text_chunking(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 800, chunk_overlap = 80)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [41]:
text_chunks = text_chunking(extracted_data)
print('Length of the text chunks:', len(text_chunks))

Length of the text chunks: 3900


In [42]:
from langchain.embeddings import HuggingFaceEmbeddings

def donwload_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name= 'sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [43]:
embeddings = donwload_huggingface_embeddings()

In [44]:
query_result = embeddings.embed_query("Hello World!")
print('length :' , len(query_result))


length : 384


Creating Indexes using pinecone

In [45]:
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from dotenv import load_dotenv
import os

load_dotenv()
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')


pc = Pinecone(api_key = PINECONE_API_KEY)

index_name = "carebot"

if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=384,
        metric = 'cosine',
        spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
        )
    )


# address the dimension dynamicc.....

In [46]:
import os 
load_dotenv()
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
# OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
# os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY

Creating Vector Store

In [47]:
from langchain_pinecone import PineconeVectorStore

vs = PineconeVectorStore.from_documents(
    documents=text_chunks,
    index_name=index_name,
    embedding=embeddings
    
)

In [48]:
# ====== RERANKER SETUP (WORKING FOR ALL LANGCHAIN V1.x) ======
from langchain_community.document_transformers import HuggingFaceCrossEncoder
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever

# Step 1: Base retriever from Pinecone
base_retriever = vectorstore.as_retriever(
    search_kwargs={"k": 10}  # get top 10 candidates first
)

# Step 2: Local cross-encoder reranker (NO API USE)
reranker = HuggingFaceCrossEncoder(
    model_name="cross-encoder/ms-marco-MiniLM-L-6-v2",
    max_length=512
)

# Step 3: Reranking pipeline
compressor = DocumentCompressorPipeline(
    transformers=[reranker]
)

# Step 4: Final reranked retriever
retriever = ContextualCompressionRetriever(
    base_retriever=base_retriever,
    base_document_compressor=compressor
)

print("Reranker loaded. Using reranked retriever.")


ImportError: cannot import name 'HuggingFaceCrossEncoder' from 'langchain_community.document_transformers' (c:\Users\patel\anaconda3\envs\carebot\lib\site-packages\langchain_community\document_transformers\__init__.py)

Loading the Vector Database 

In [6]:
from langchain_pinecone import PineconeVectorStore
vs = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)
vs

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x18003e3ef50>

In [7]:
retriever = vs.as_retriever(search_type='similarity', search_kwargs={'k':3})

Trial of the search


In [8]:
docs = retriever.invoke('What is back Acne?')
docs

[Document(id='35ac5c5b-2f93-4556-ab7f-c3aae5d2ddc0', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='8e23d5b0-8be5-4563-bd66-d7aba55e878c', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 39.0, 'page_label': '40', 'producer': 'PDFlib+PDI 5.0.0 (SunOS)', 'source': 'data\\Medical_book.pdf', 'total_pages': 637.0}, page_content='GALE ENCYCLOPEDIA OF MEDICINE 226\nAcne\nGEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26'),
 Document(id='13da37c6-f093-49c7-82af-3f4b572425c8', metadata={'creationdate': '2004-12-18T17:00:02-05:00', 'creator': 'PyPDF', 'moddate': '2004-12-18T16:15:31-06:00', 'page': 37.0, 

In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
# from langchain_huggingface.chat_models.huggingface import ChatHuggingFace
# from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
# from langchain_core.prompts import PromptTemplate
# from langchain.chains import RetrievalQA


# huggingface_repo_id = "openai/gpt-oss-20b"

# def load_llm(huggingface_repo_id):
#     llm = HuggingFaceEndpoint(
#         repo_id = huggingface_repo_id,
#         huggingfacehub_api_token=HF_TOKEN,
#         task='conversational',  
#         max_new_tokens = 256,
#         temperature = 0.3           
#     )
#     return llm

# chat = ChatHuggingFace(llm = load_llm(huggingface_repo_id))

#genai

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",      
    temperature=0.3,
    max_output_tokens=512,
    google_api_key=GOOGLE_API_KEY  
)

memory = ConversationBufferMemory(
    memory_key="chat_history",   
    return_messages=True,
    output_key="answer"
)


  memory = ConversationBufferMemory(


In [10]:
system_prompt ="""You are an assistant for the question answer tasks. Use the following pieces of retrieved context to answer the question. If you don't 
    know the asnwer, say that you don't know. Don't provide anything out of the given context. Use three sentences maximum and keep the answer concise. \n\n
    
    Chat history: {chat_history}
    Context : {context}
    Question : {question}
    """


prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["chat_history", "context", "question"]
)

In [11]:
from langchain.chains import ConversationalRetrievalChain
conv_rag_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": prompt},
    return_source_documents=True,
    get_chat_history=lambda h: h  # h is a list of messages; we pass it straight to {chat_history}
)

# Initialize the model
# model = load_llm()

# def get_answer(query, context):
#     """Function to get answer from the model using context"""
#     prompt = f"""Context: {context}
    
# Question: {query}

# Answer the question based on the context above. Keep it concise and within 3 sentences. If you can't find the answer in the context, say "I don't know"."""
    
#     response = model.generate_content(prompt)
#     return response.text

In [12]:
# Simple one-shot test using conv_rag_chain
query = "What is acne?"

result = conv_rag_chain.invoke({"question": query})

print("Question:", query)
print("Answer:", result["answer"])

# Optional: see what documents were used
for i, doc in enumerate(result["source_documents"], start=1):
    print(f"\n--- Source {i} ---")
    print(doc.metadata.get("source", "unknown"), "| page:", doc.metadata.get("page"))
    print(doc.page_content[:500], "...")


Question: What is acne?
Answer: Acne is the general name given to a skin disorder in which the sebaceous glands become inflamed. It is also referred to as Acne vulgaris.

--- Source 1 ---
data\Medical_book.pdf | page: 39.0
GALE ENCYCLOPEDIA OF MEDICINE 226
Acne
GEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26 ...

--- Source 2 ---
data\Medical_book.pdf | page: 39.0
GALE ENCYCLOPEDIA OF MEDICINE 226
Acne
GEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 26 ...

--- Source 3 ---
data\Medical_book.pdf | page: 38.0
GALE ENCYCLOPEDIA OF MEDICINE 2 25
Acne
Acne vulgaris affecting a womanâ€™s face. Acne is the general
name given to a skin disorder in which the sebaceous
glands become inflamed. (Photograph by Biophoto Associ-
ates, Photo Researchers, Inc. Reproduced by permission.)
GEM - 0001 to 0432 - A  10/22/03 1:41 PM  Page 25 ...


In [13]:
eval_data = [
    {
        "question": "What is achalasia?",
        "reference": (
            "Achalasia is a disorder of the esophagus that prevents normal swallowing. "
            "The lower esophageal sphincter fails to relax properly, blocking food from entering the stomach."
        )
    },
    {
        "question": "What causes achalasia?",
        "reference": (
            "Achalasia is caused by degeneration of the nerve cells that normally signal the esophageal sphincter to relax. "
            "The exact cause is unknown, but autoimmune disease or hidden infection is suspected."
        )
    },
    {
        "question": "What are the main symptoms of achalasia?",
        "reference": (
            "Symptoms include dysphagia for liquids and solids, sensation of food getting stuck, chest pain resembling angina, "
            "heartburn, difficulty belching, nighttime coughing, and recurrent pneumonia."
        )
    },
    {
        "question": "What is amyloidosis?",
        "reference": (
            "Amyloidosis is a progressive, incurable metabolic disease in which abnormal amyloid proteins accumulate in organs "
            "or body systems, impairing their function and potentially causing organ failure."
        )
    },
    {
        "question": "What are the major types of amyloidosis?",
        "reference": (
            "Major types include primary systemic amyloidosis, secondary amyloidosis from chronic infection or inflammation, "
            "familial or hereditary amyloidosis, and an amyloidosis associated with Alzheimer's disease."
        )
    }
]


In [14]:
import math
import re

def normalize_answer(s: str) -> str:
    s = s.lower()
    s = re.sub(r'[^a-z0-9\s]', ' ', s)
    s = re.sub(r'\s+', ' ', s).strip()
    return s

def ngram_counts(tokens, n):
    return {tuple(tokens[i:i+n]): 1 for i in range(len(tokens)-n+1)}

def compute_bleu(pred: str, ref: str) -> float:
    pred_tokens = normalize_answer(pred).split()
    ref_tokens = normalize_answer(ref).split()

    if not pred_tokens or not ref_tokens:
        return 0.0

    precisions = []
    for n in range(1, 5):  # up to 4-gram BLEU
        pred_ngrams = ngram_counts(pred_tokens, n)
        ref_ngrams = ngram_counts(ref_tokens, n)

        match = sum(1 for ng in pred_ngrams if ng in ref_ngrams)
        total = max(len(pred_tokens) - n + 1, 1)
        precisions.append(match / total)

    # geometric mean
    score = math.exp(sum(math.log(p + 1e-9) for p in precisions) / 4)

    # brevity penalty
    ref_len = len(ref_tokens)
    pred_len = len(pred_tokens)
    if pred_len >= ref_len:
        bp = 1
    else:
        bp = math.exp(1 - ref_len / pred_len)

    return bp * score

from difflib import SequenceMatcher

def exact_match(pred: str, ref: str) -> bool:
    return normalize_answer(pred) == normalize_answer(ref)

def fuzzy_match(pred: str, ref: str, threshold=0.75):
    return SequenceMatcher(None, normalize_answer(pred), normalize_answer(ref)).ratio() >= threshold


In [15]:
import time
import json

def evaluate_chatbot(
    examples,
    max_samples=None,
    sleep_s=12 
):
    """
    Evaluate Carebot on a small set of Q/A pairs.

    examples: list of {"question": ..., "reference": ...}
    max_samples: cap number of evaluated examples (optional)
    sleep_s: seconds to sleep between calls to avoid rate limits
    """
    results = []
    n = len(examples) if max_samples is None else min(len(examples), max_samples)

    for i in range(n):
        q = examples[i]["question"]
        ref = examples[i]["reference"]

        print(f"\n[{i+1}/{n}] Question: {q}")
        # === Model call ===
        out = conv_rag_chain.invoke({"question": q})
        pred = out["answer"]

        # === Metrics ===
        bleu = compute_bleu(pred, ref)
        em = exact_match(pred, ref)
        fm = fuzzy_match(pred, ref)

        results.append({
            "question": q,
            "reference": ref,
            "prediction": pred,
            "bleu": bleu,
            "exact_match": em,
            "fuzzy_match": fm
        })

        print("Prediction:", pred[:300].replace("\n", " "), "...")
        print("Reference :", ref[:300].replace("\n", " "), "...")
        print(f"BLEU={bleu:.3f}, exact={em}, fuzzy={fm}")

        # Sleep to stay under RPM limits
        if sleep_s and i < n - 1:
            time.sleep(sleep_s)

    # === Aggregate scores ===
    avg_bleu = sum(r["bleu"] for r in results) / len(results)
    acc_exact = sum(1 for r in results if r["exact_match"]) / len(results)
    acc_fuzzy = sum(1 for r in results if r["fuzzy_match"]) / len(results)

    print("\n=== Overall metrics ===")
    print(f"Average BLEU: {avg_bleu:.3f}")
    print(f"Exact-match accuracy: {acc_exact:.3f}")
    print(f"Fuzzy-match accuracy: {acc_fuzzy:.3f}")

    metrics = {
        "avg_bleu": avg_bleu,
        "exact_acc": acc_exact,
        "fuzzy_acc": acc_fuzzy
    }
    return results, metrics


In [None]:
results, metrics = evaluate_chatbot(
    eval_data,
    max_samples=5,
    sleep_s=15
)


[1/5] Question: What is achalasia?
Prediction: Achalasia is a disorder of the esophagus that prevents normal swallowing. It affects the esophagus, which is the tube that carries swallowed food from the back of the throat down into the stomach. ...
Reference : Achalasia is a disorder of the esophagus that prevents normal swallowing. The lower esophageal sphincter fails to relax properly, blocking food from entering the stomach. ...
BLEU=0.333, exact=False, fuzzy=False

[2/5] Question: What causes achalasia?
Prediction: Achalasia is caused by the degeneration of nerve cells that normally signal the brain to relax the esophageal sphincter. This degeneration prevents the esophageal sphincter from relaxing, interrupting normal peristalsis. ...
Reference : Achalasia is caused by degeneration of the nerve cells that normally signal the esophageal sphincter to relax. The exact cause is unknown, but autoimmune disease or hidden infection is suspected. ...
BLEU=0.317, exact=False, fuzzy=False



In [16]:
from datasets import Dataset

from ragas import evaluate
from ragas.metrics import faithfulness
from ragas.run_config import RunConfig

from sentence_transformers import SentenceTransformer
from langchain_community.embeddings import HuggingFaceEmbeddings


In [17]:
ST_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

st_model = SentenceTransformer(ST_MODEL_NAME)
hf_embed = HuggingFaceEmbeddings(model_name=ST_MODEL_NAME)


In [18]:
from langchain_community.chat_models import ChatOllama

llm_for_ragas = ChatOllama(
    model="llama3.1",   # or "llama3.1:8b" / whatever tag you pulled
    temperature=0.0     # deterministic, good for evaluation
)


  llm_for_ragas = ChatOllama(


In [21]:
from langchain_community.chat_models import ChatOllama


llm_for_ragas = ChatOllama(
    model="llama3.1",   # or llama3.2, llama2, etc.
    temperature=0.0
)

In [22]:
run_config = RunConfig(
    max_workers=1,   # 1 job at a time to keep things simple
    timeout=120
)


In [23]:
import time

def run_chatbot_eval_and_collect_ragas_data(
    eval_data,
    max_samples=5,
    sleep_s=5  # small pause between chatbot calls (Gemini)
):
    """
    Uses conv_rag_chain (Gemini) to answer eval questions
    and collects data for ragas faithfulness.
    """
    results = []
    ragas_records = []

    n = min(len(eval_data), max_samples)

    for i in range(n):
        q = eval_data[i]["question"]
        gt = eval_data[i]["reference"]

        print(f"\n[{i+1}/{n}] Question: {q}")
        out = conv_rag_chain.invoke({"question": q})

        pred = out["answer"]
        ctxs = [d.page_content for d in out["source_documents"]]

        print("Prediction:", pred[:200].replace("\n", " "), "...")
        print("Reference :", gt[:200].replace("\n", " "), "...")

        # store for your own offline metrics (optional)
        results.append({
            "question": q,
            "reference": gt,
            "prediction": pred,
        })

        # store for ragas
        ragas_records.append({
            "question": q,
            "answer": pred,
            "contexts": ctxs,
            "ground_truth": gt,
        })

        if sleep_s and i < n - 1:
            time.sleep(sleep_s)

    ragas_dataset = Dataset.from_list(ragas_records)
    return results, ragas_dataset


In [24]:
results, ragas_dataset = run_chatbot_eval_and_collect_ragas_data(
    eval_data,
    max_samples=5,
    sleep_s=5
)



[1/5] Question: What is achalasia?
Prediction: Achalasia is a disorder of the esophagus that prevents normal swallowing. It affects the tube that carries swallowed food from the back of the throat down into the stomach. ...
Reference : Achalasia is a disorder of the esophagus that prevents normal swallowing. The lower esophageal sphincter fails to relax properly, blocking food from entering the stomach. ...

[2/5] Question: What causes achalasia?
Prediction: Achalasia is caused by the degeneration of nerve cells that normally signal the brain to relax the esophageal sphincter. These nerve cells are responsible for allowing food to enter the stomach. The u ...
Reference : Achalasia is caused by degeneration of the nerve cells that normally signal the esophageal sphincter to relax. The exact cause is unknown, but autoimmune disease or hidden infection is suspected. ...

[3/5] Question: What are the main symptoms of achalasia?
Prediction: The most common symptom of achalasia is dysphagia

In [25]:
faith_result = evaluate(
    ragas_dataset,
    metrics=[faithfulness],   # only faithfulness
    llm=llm_for_ragas,        # ðŸ”¥ Llama here, not Gemini
    embeddings=hf_embed,
    run_config=run_config
)

print(faith_result)


Evaluating: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5/5 [02:32<00:00, 30.57s/it]


{'faithfulness': 0.9000}
