# RAG Application Evaluation with Langsmith

# Environment Setup 

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
langsmith_api_key = os.getenv('LANGSMITH_API_KEY')
huggingface_api_key = os.getenv('HUGGINGFACE_API_KEY')

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGSMITH_TRACING'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
os.environ['LANGSMITH_API_KEY'] = langsmith_api_key
os.environ['HUGGINGFACE_API_KEY'] = huggingface_api_key

In [3]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader # Use this
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaLLM
from langchain_core.documents import Document

In [4]:
pdf_files = [
    r"E:\DiabetIQ\LLM\PDFs\BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf",
    r"E:\DiabetIQ\LLM\PDFs\BES-Ramadan-Guideline-2020-min.pdf",
    r"E:\DiabetIQ\LLM\PDFs\Diabetes_Care_BADAS_guideline2019-3.pdf",
    r"E:\DiabetIQ\LLM\PDFs\Insulin-Guideline-min.pdf"
]

all_docs = [] # Will store LangChain Document objects

print("Loading and Processing PDFs...")
for pdf_path in pdf_files:
    try:
        # Extract filename for metadata
        file_name = os.path.basename(pdf_path)
        print(f"-> Loading: {file_name}")

        loader = PyPDFLoader(pdf_path)
        # Load pages as individual documents. Each doc will have metadata['page']
        pages = loader.load_and_split() # This does basic splitting

        # Add source filename to metadata for each page/document
        for page_doc in pages:
            page_doc.metadata['source'] = file_name
            # Optional: clean up page content slightly if needed
            # page_doc.page_content = page_doc.page_content.replace('\n', ' ').strip()

        all_docs.extend(pages)
        print(f"   Loaded {len(pages)} pages.")

    except Exception as e:
        print(f"Error loading {pdf_path}: {e}")

print(f"\nTotal documents loaded: {len(all_docs)}")
if all_docs:
    print("\nSample Document Metadata (first doc):")
    print(all_docs[0].metadata)
    print("\nSample Document Content (first 500 chars of first doc):")
    print(all_docs[0].page_content[:500])
else:
    print("\nNo documents were loaded successfully.")
    # Consider exiting or handling this error appropriately
    exit()

Loading and Processing PDFs...
-> Loading: BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf
   Loaded 38 pages.
-> Loading: BES-Ramadan-Guideline-2020-min.pdf
   Loaded 46 pages.
-> Loading: Diabetes_Care_BADAS_guideline2019-3.pdf
   Loaded 79 pages.
-> Loading: Insulin-Guideline-min.pdf
   Loaded 93 pages.

Total documents loaded: 256

Sample Document Metadata (first doc):
{'producer': 'Nitro PDF PrimoPDF', 'creator': 'PrimoPDF http://www.primopdf.com', 'creationdate': '2020-06-07T20:17:39-06:00', 'moddate': '2020-06-07T20:17:39-06:00', 'title': 'Microsoft Word - BES COVID Pract Recomnd 06 June Final Copy', 'author': 'Mir', 'source': 'BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf', 'total_pages': 38, 'page': 0, 'page_label': '1'}

Sample Document Content (first 500 chars of first doc):
Bangladesh Endocrine Society (BES) 
Practical Recommendations for Management of 
Diabetes and Other Endocrine Diseases in Patients with 
COVID-19 
 
 
 
 
 
Published Online June 2020 
 
 
All rights res

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    # Keep separators that make sense for text structure
    separators=["\n\n", "\n", ". ", ", ", " ", ""],
    length_function=len,
)

In [6]:
chunks = text_splitter.split_documents(all_docs)

print(f"\nTotal chunks created: {len(chunks)}")
if chunks:
    print("\nSample Chunk Metadata (first chunk):")
    print(chunks[0].metadata)
    print("\nSample Chunk Content (first 500 chars):")
    print(chunks[0].page_content[:500])
else:
    print("\nNo chunks were created. Check splitting process.")
    exit()


Total chunks created: 702

Sample Chunk Metadata (first chunk):
{'producer': 'Nitro PDF PrimoPDF', 'creator': 'PrimoPDF http://www.primopdf.com', 'creationdate': '2020-06-07T20:17:39-06:00', 'moddate': '2020-06-07T20:17:39-06:00', 'title': 'Microsoft Word - BES COVID Pract Recomnd 06 June Final Copy', 'author': 'Mir', 'source': 'BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf', 'total_pages': 38, 'page': 0, 'page_label': '1'}

Sample Chunk Content (first 500 chars):
Bangladesh Endocrine Society (BES) 
Practical Recommendations for Management of 
Diabetes and Other Endocrine Diseases in Patients with 
COVID-19 
 
 
 
 
 
Published Online June 2020 
 
 
All rights reserved by: Bangladesh Endocrine Society (BES) 
 
 
Published by 
Bangladesh Endocrine Society (BES) 
Website: http://bes-org.net 
E-mail: 
endobd2012@gmail.com


In [7]:
print("\nInitializing Embedding Model...")
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/e5-small-v2")

print("\nCreating Vector Store (ChromaDB)...")
# Chroma.from_documents handles Document objects directly
# Consider adding persistence: persist_directory="./chroma_db_diabetiq"
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model,
    # persist_directory="./chroma_db_diabetiq" # Uncomment to save DB locally
)
# If persisting: vectorstore.persist()

# To load later:
# vectorstore = Chroma(persist_directory="./chroma_db_diabetiq", embedding_function=embedding_model)

print("Vector Store Created.")


Initializing Embedding Model...

Creating Vector Store (ChromaDB)...
Vector Store Created.


In [8]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) # Retrieve top 5 chunks

In [9]:
print(f"Retriever configured (using k={retriever.search_kwargs.get('k', 'default')}).")

Retriever configured (using k=5).


In [10]:
from langchain_core.prompts import PromptTemplate
prompt_template = """
You are DiabetIQ, an AI assistant specializing in diabetes management for patients in Bangladesh, based *strictly* on the provided context documents.

Context Documents:
{context}

Based *only* on the information in the numbered context documents above, answer the following question concisely and directly.
Your advice should be actionable and consider general practices relevant to Bangladesh where possible (e.g., common foods mentioned in context, local guidelines if present in context).
Do *not* add information that is not present in the context.
If the context does not contain the answer, state that clearly.
Always conclude your response by advising the user to consult a healthcare professional for personalized medical advice.

Question: {question}

Answer:
"""

prompt = PromptTemplate.from_template(prompt_template)

In [11]:
print("Initializing LLM (Ollama - Mistral)...")
llm = OllamaLLM(model="mistral")

Initializing LLM (Ollama - Mistral)...


In [12]:
def format_docs_with_metadata(docs):
    """Formats retrieved documents including source and page."""
    formatted_strings = []
    for i, doc in enumerate(docs):
        metadata_str = f"Source: {doc.metadata.get('source', 'N/A')}, Page: {doc.metadata.get('page', 'N/A')}"
        content_str = doc.page_content.replace('\n', ' ').strip()
        formatted_strings.append(f"{i+1}. [{metadata_str}] {content_str}")
    return "\n\n".join(formatted_strings)

In [13]:
rag_chain = (
    {"context": retriever | format_docs_with_metadata, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG Chain constructed.")

RAG Chain constructed.


In [14]:
print("\n--- Querying RAG Chain ---")
question = "How can I control my blood sugar level with diet according to the textbook?" 


print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: How can I control my blood sugar level with diet according to the textbook?

Response:
1. Emphasize consumption of fruits, legumes, whole grains, and dairy products if you are on insulin therapy. You should receive education on carbohydrate counting.
2. Avoid sugar-sweetened beverages (including fruit juices) to control glycemia, weight, reduce the risk for cardiovascular disease and fatty liver. Eat at least 2 or 3 servings of fruits and vegetables daily.
3. Maintain a regular diet containing approximately 150 grams of carbohydrate per day before undergoing an OGTT (Oral Glucose Tolerance Test). The test should be done in the morning after fasting for 8-14 hours, and you should not smoke or engage in physical stress during the test.
4. Ensure a balanced diet with sufficient protein from animal and plant sources like fish, meat, egg, milk, cheese, seeds, and nuts. Daily protein intake should be approximately 1 gram per kg body weight for older indi

In [15]:
print("\n--- Querying RAG Chain ---")
question = "What does the BADAS guideline say about insulin initiation?"

print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: What does the BADAS guideline say about insulin initiation?

Response:
 The BADAS Guideline 2019 suggests that in major surgeries, glucose-insulin infusion should be started. For regular practice, if a person is on insulin, both intermediate or long acting insulin and the dose may need to be reduced. Shorter acting insulin should be adjusted according to blood glucose values and food intake. However, it's important to consult with a healthcare professional for personalized medical advice.


In [16]:
print("\n--- Querying RAG Chain ---")
question = "Tell me about managing diabetes during Ramadan based on the provided texts." 

print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: Tell me about managing diabetes during Ramadan based on the provided texts.

Response:
 To manage diabetes during Ramadan in Bangladesh, it is recommended to follow these practices based on the information provided:

1. Maintain a balanced diet: While fasting, consume meals that are nutritious and well-balanced. Try to include foods rich in fiber, lean proteins, and complex carbohydrates like brown rice, whole wheat roti, lentils, fruits, and vegetables.

2. Monitor blood sugar levels: Regularly check your blood sugar levels before meals and at night. Fasting can affect blood sugar control, so it is crucial to monitor levels carefully during this time.

3. Hydrate yourself: Drink plenty of water or fluids when you break your fast in the evening and during suhoor (the pre-fast meal). Avoid sugary drinks as they may cause a sudden spike in blood sugar levels.

4. Adjust medication schedule: If taking insulin, it might be necessary to adjust dosage an

In [17]:
print("\n--- Querying RAG Chain ---")
question = "I have diabetes. Can I eat sweets?" 



print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: I have diabetes. Can I eat sweets?

Response:
 It is advisable to limit or avoid sweets, especially sugar-sweetened beverages like candy and desserts, as they can negatively impact your blood glucose levels and potentially lead to weight gain, which may increase the risk of complications for people with diabetes. Instead, focus on a diet rich in fruits, legumes, whole grains, dairy products, and lean proteins from both animal and plant sources. As always, it is essential to consult your healthcare professional for personalized medical advice.


# Dataset

In [21]:
from langsmith import Client

client = Client()

examples = [
    {
        "inputs": {"question": "What are common symptoms of low blood sugar (hypoglycemia)?"},
        "outputs": {"answer": "Common symptoms of hypoglycemia include shakiness, sweating, dizziness, confusion, rapid heartbeat, hunger, and irritability. Severe cases can lead to loss of consciousness. It's important to treat it promptly."},
    },
    {
        "inputs": {"question": "How can I use the DiabetIQ app to log my meals?"},
        "outputs": {"answer": "In the DiabetIQ app, navigate to the 'Log' or 'Diary' section, select 'Meal', and enter details like the food items, portion sizes, estimated carbohydrates, and the time of the meal. Saving this helps track your dietary intake."},
    },
    {
        "inputs": {"question": "Why is monitoring blood glucose levels important for diabetes management?"},
        "outputs": {"answer": "Monitoring blood glucose helps you understand how food, activity, medication, and stress affect your levels. This information empowers you and your healthcare team to make informed decisions about your treatment plan to maintain target ranges and prevent complications."},
    },
    {
        "inputs": {"question": "Can DiabetIQ help predict my risk of high blood sugar?"},
        "outputs": {"answer": "DiabetIQ uses machine learning based on your logged data (like meals, activity, glucose readings) to identify patterns and potentially indicate an increased short-term risk of high blood sugar (hyperglycemia). This feature is for informational purposes to help you be proactive and should be discussed with your healthcare provider."},
    },
    {
        "inputs": {"question": "What type of exercise is good for managing diabetes?"},
        "outputs": {"answer": "A combination of aerobic exercise (like brisk walking, swimming, cycling) and resistance training (like lifting weights or using resistance bands) is generally recommended. Always consult your doctor before starting any new exercise program to ensure it's safe and appropriate for you."}
    }
]


# Create the dataset and examples in LangSmith
dataset_name = "DiabetIQ Chatbot Q&A"
dataset = client.create_dataset(dataset_name=dataset_name)
client.create_examples(
    dataset_id=dataset.id,
    examples=examples
)

{'example_ids': ['7293e7dd-c0f8-491c-9067-efeb0de32c46',
  '70c8275e-bfc6-4cee-8f4c-e0e8e5216c37',
  '8208f035-3082-4f01-9e8f-c920a97d99b2',
  '8a60cad6-4a3e-49c9-8af6-da5683439416',
  '18523223-c8ec-4250-865c-dcfeb8e94d1f'],
 'count': 5}

# Correctness: Response vs reference answer

In [24]:
from typing_extensions import Annotated, TypedDict
from langchain_core.output_parsers import JsonOutputParser


class CorrectnessGrade(TypedDict):
    explanation: Annotated[str, ..., "Detailed clinical reasoning for diabetes management accuracy"]
    correct: Annotated[bool, ..., "True if answer is medically accurate for diabetes care"]


# Grade prompt for diabetes management
diabetes_correctness_instructions = """You are a medical professional evaluating diabetes-related advice.

You will be given:
- A PATIENT QUESTION about diabetes management
- The MEDICALLY VALID ANSWER (ground truth)
- The SYSTEM'S RESPONSE to the patient

Evaluation Criteria:
1. Medical Accuracy: Must match current diabetes treatment guidelines
2. Safety: Must not contain dangerous or misleading advice
3. Completeness: Should cover key aspects of the question
4. Clarity: Should be understandable for patients

Special Considerations:
- Be strict with medication/dosage information
- Flag any advice that contradicts standard care
- Note if answer oversimplifies complex topics
- Consider cultural sensitivity in dietary advice

Respond with a JSON object containing:
- "explanation": your detailed analysis
- "correct": boolean for medical accuracy
- "safety_flag": boolean for dangerous advice
- "completeness": score from 0-1

Provide detailed analysis before concluding."""

# Configure the Ollama Mistral LLM
medical_grader_llm = OllamaLLM(
    model="mistral",
    temperature=0,
    system=diabetes_correctness_instructions
)

# Set up JSON output parser
output_parser = JsonOutputParser(pydantic_object=DiabetesCorrectnessGrade)

def evaluate_diabetes_response(inputs: dict, outputs: dict, reference_outputs: dict) -> dict:
    """Evaluator for DiabetIQ chatbot responses using Ollama Mistral"""
    evaluation_context = f"""\
PATIENT QUESTION: {inputs['question']}
MEDICAL STANDARD ANSWER: {reference_outputs['answer']}
SYSTEM RESPONSE: {outputs['answer']}"""

    # Run medical evaluation
    response = medical_grader_llm.invoke(evaluation_context)
    grade = output_parser.parse(response)
    
    return {
        "is_correct": grade["correct"],
        "is_safe": not grade["safety_flag"],
        "completeness_score": grade["completeness"],
        "explanation": grade["explanation"]
    }