# Environment Setup 

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
huggingface_api_key = os.getenv('HUGGINGFACE_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
os.environ['HUGGINGFACE_API_KEY'] = huggingface_api_key

In [2]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader # Use this
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaLLM
from langchain_core.documents import Document

In [3]:
pdf_files = [
    r"F:\DiabetIQ\LLM\PDFs\BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf",
    r"F:\DiabetIQ\LLM\PDFs\BES-Ramadan-Guideline-2020-min.pdf",
    r"F:\DiabetIQ\LLM\PDFs\Diabetes_Care_BADAS_guideline2019-3.pdf",
    r"F:\DiabetIQ\LLM\PDFs\Insulin-Guideline-min.pdf"
]

all_docs = [] # Will store LangChain Document objects

print("Loading and Processing PDFs...")
for pdf_path in pdf_files:
    try:
        # Extract filename for metadata
        file_name = os.path.basename(pdf_path)
        print(f"-> Loading: {file_name}")

        loader = PyPDFLoader(pdf_path)
        # Load pages as individual documents. Each doc will have metadata['page']
        pages = loader.load_and_split() # This does basic splitting

        # Add source filename to metadata for each page/document
        for page_doc in pages:
            page_doc.metadata['source'] = file_name
            # Optional: clean up page content slightly if needed
            # page_doc.page_content = page_doc.page_content.replace('\n', ' ').strip()

        all_docs.extend(pages)
        print(f"   Loaded {len(pages)} pages.")

    except Exception as e:
        print(f"Error loading {pdf_path}: {e}")

print(f"\nTotal documents loaded: {len(all_docs)}")
if all_docs:
    print("\nSample Document Metadata (first doc):")
    print(all_docs[0].metadata)
    print("\nSample Document Content (first 500 chars of first doc):")
    print(all_docs[0].page_content[:500])
else:
    print("\nNo documents were loaded successfully.")
    # Consider exiting or handling this error appropriately
    exit()

Loading and Processing PDFs...
-> Loading: Diabetes_Care_BADAS_guideline2019-3.pdf
   Loaded 79 pages.
-> Loading: Insulin-Guideline-min.pdf
   Loaded 93 pages.
-> Loading: BES-Ramadan-Guideline-2020-min.pdf
   Loaded 46 pages.
-> Loading: BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf
   Loaded 38 pages.

Total documents loaded: 256

Sample Document Metadata (first doc):
{'producer': 'Online2PDF.com', 'creator': 'Online2PDF.com', 'creationdate': '2025-02-14T08:08:22+01:00', 'source': 'Diabetes_Care_BADAS_guideline2019-3.pdf', 'total_pages': 79, 'page': 0, 'page_label': '1'}

Sample Document Content (first 500 chars of first doc):
DIABETES CARE 
BADAS Guideline 2019 
          
  
   
  
   P|) 
DAS GUELINE ON Man 
DELIT IGEMEN 
  
A Joint Initiative of 
Diabetic Association of Bangladesh 
NCDC Program, Directorate General of Health Services


In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    # Keep separators that make sense for text structure
    separators=["\n\n", "\n", ". ", ", ", " ", ""],
    length_function=len,
)

In [5]:
chunks = text_splitter.split_documents(all_docs)

print(f"\nTotal chunks created: {len(chunks)}")
if chunks:
    print("\nSample Chunk Metadata (first chunk):")
    print(chunks[0].metadata)
    print("\nSample Chunk Content (first 500 chars):")
    print(chunks[0].page_content[:500])
else:
    print("\nNo chunks were created. Check splitting process.")
    exit()


Total chunks created: 702

Sample Chunk Metadata (first chunk):
{'producer': 'Online2PDF.com', 'creator': 'Online2PDF.com', 'creationdate': '2025-02-14T08:08:22+01:00', 'source': 'Diabetes_Care_BADAS_guideline2019-3.pdf', 'total_pages': 79, 'page': 0, 'page_label': '1'}

Sample Chunk Content (first 500 chars):
DIABETES CARE 
BADAS Guideline 2019 
          
  
   
  
   P|) 
DAS GUELINE ON Man 
DELIT IGEMEN 
  
A Joint Initiative of 
Diabetic Association of Bangladesh 
NCDC Program, Directorate General of Health Services


In [6]:
print("\nInitializing Embedding Model...")
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/e5-small-v2")

print("\nCreating Vector Store (ChromaDB)...")
# Chroma.from_documents handles Document objects directly
# Consider adding persistence: persist_directory="./chroma_db_diabetiq"
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model,
    # persist_directory="./chroma_db_diabetiq" # Uncomment to save DB locally
)
# If persisting: vectorstore.persist()

# To load later:
# vectorstore = Chroma(persist_directory="./chroma_db_diabetiq", embedding_function=embedding_model)

print("Vector Store Created.")


Initializing Embedding Model...

Creating Vector Store (ChromaDB)...
Vector Store Created.


In [7]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) # Retrieve top 5 chunks

In [8]:
print(f"Retriever configured (using k={retriever.search_kwargs.get('k', 'default')}).")

Retriever configured (using k=5).


In [9]:
from langchain_core.prompts import PromptTemplate
prompt_template = """
You are DiabetIQ, an AI assistant specializing in diabetes management for patients in Bangladesh, based *strictly* on the provided context documents (diabetes guidelines and textbooks).

Context Documents:
{context}

Based *only* on the information in the numbered context documents above, answer the following question.
Be concise but specific. If the context discusses strategies for managing occasional intake of high-sugar foods (like sweets), explain those strategies clearly and actionably. Mention portion control, timing relative to meals, carbohydrate counting/exchange, and the role of sugar substitutes if discussed in the context.
Consider general dietary principles relevant to Bangladesh if mentioned in the context.
Do *not* add information or recommendations *not* found in the context documents.
If the context strictly advises against all sweets with no exceptions or strategies mentioned, state that clearly.
Always conclude your response by strongly advising the user to consult a healthcare professional or registered dietitian for personalized medical advice tailored to their specific situation.

Question: {question}

Answer:
"""

prompt = PromptTemplate.from_template(prompt_template)

In [10]:
print("Initializing LLM (Ollama - Mistral)...")
llm = OllamaLLM(model="mistral")

Initializing LLM (Ollama - Mistral)...


In [11]:
def format_docs_with_metadata(docs):
    """Formats retrieved documents including source and page."""
    formatted_strings = []
    for i, doc in enumerate(docs):
        metadata_str = f"Source: {doc.metadata.get('source', 'N/A')}, Page: {doc.metadata.get('page', 'N/A')}"
        content_str = doc.page_content.replace('\n', ' ').strip()
        formatted_strings.append(f"{i+1}. [{metadata_str}] {content_str}")
    return "\n\n".join(formatted_strings)

In [12]:
rag_chain = (
    {"context": retriever | format_docs_with_metadata, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG Chain constructed.")

RAG Chain constructed.


In [13]:
print("\n--- Querying RAG Chain ---")
question = "How can I control my blood sugar level with diet according to the textbook?" # Example refined query
# question = "What does the BADAS guideline say about insulin initiation?" # Example metadata-aware query
# question = "Tell me about managing diabetes during Ramadan based on the provided texts."

print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: How can I control my blood sugar level with diet according to the textbook?

Response:
 To manage your blood sugar level effectively, focus on a balanced diet as per the guidelines provided in your diabetes management textbooks. Here are some actionable strategies you can consider:

1. Portion Control: Limit your intake of high-sugar foods like sweets by portioning them carefully. Instead of eating large portions at once, try smaller amounts spread over time.

2. Timing: It's best to consume sweets around meal times, rather than as snacks between meals. This will help regulate your blood sugar levels and prevent spikes.

3. Carbohydrate Counting/Exchange: If you're on insulin therapy, it is recommended that you learn carbohydrate counting to better manage your food intake. This involves understanding the amount of carbohydrates in different foods and adjusting your insulin dosage accordingly.

4. Dietary Composition: Emphasize fruits, legumes, whol

In [14]:
print("\n--- Querying RAG Chain ---")
question = "What does the BADAS guideline say about insulin initiation?" # Example refined query

# question = "Tell me about managing diabetes during Ramadan based on the provided texts."

print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: What does the BADAS guideline say about insulin initiation?

Response:
 According to the BADAS Guideline 2019, in all major surgeries glucose-insulin infusion should be started. For regular diabetes management, if a person is on insulin, intermediate or long acting insulin is continued; the dose may need to be reduced. Shorter acting insulin should be adjusted according to blood glucose values and food intake. If the portion of high-sugar foods like sweets occasionally consumed, these should be included in daily carbohydrate counting/exchange when planning meals. The timing relative to meals is important, as consuming sweets around meal times can help with proper insulin dosage adjustments. Sugar substitutes are not explicitly mentioned in the provided context documents regarding dietary principles. It's crucial to consult a healthcare professional or registered dietitian for personalized medical advice tailored to your specific situation.


In [15]:
print("\n--- Querying RAG Chain ---")
question = "Tell me about managing diabetes during Ramadan based on the provided texts." # Example refined query



print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: Tell me about managing diabetes during Ramadan based on the provided texts.

Response:
 During Ramadan, it is important to manage your diet while maintaining proper blood sugar levels. The context documents suggest the following strategies for occasional intake of high-sugar foods like sweets:

1. Portion Control: Limit your portion sizes and avoid overindulging in sugary treats.
2. Timing Relative to Meals: Consume sweets either as a small dessert after a balanced meal or, if necessary, as a snack between meals. It is advisable to balance these treats with protein-rich foods for improved blood sugar control.
3. Carbohydrate Counting/Exchange: If possible, count the carbohydrates in your sweets and adjust your other meal components accordingly. This will help you maintain proper blood glucose levels throughout the day.
4. Sugar Substitutes: Some context documents suggest that sugar substitutes may be used occasionally, but it is essential to consul

In [16]:
print("\n--- Querying RAG Chain ---")
question = "I have diabetes. Can I eat sweets?" # Example refined query



print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: I have diabetes. Can I eat sweets?

Response:
 While it's understandable that you might want to indulge in sweets as a person with diabetes, it's important to practice moderation and follow some guidelines to manage your blood glucose levels effectively. Here are some strategies discussed in the context documents:

1. Portion control: Limit your serving size of sweets. A smaller portion can help you control your blood sugar levels better.
2. Timing relative to meals: It's best to have sweets after a meal, preferably with carbohydrates. This helps balance the impact on your blood glucose levels.
3. Carbohydrate counting/exchange: If you are on insulin therapy, it may be beneficial to count or exchange carbohydrates in your diet, including sweets, to manage your blood sugar levels accurately.
4. Sugar substitutes: Nonnutritive sweeteners can help reduce overall calorie and carbohydrate intake but should be used sparingly as they may have potential si