# Environment Setup 

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
huggingface_api_key = os.getenv('HUGGINGFACE_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
os.environ['HUGGINGFACE_API_KEY'] = huggingface_api_key

In [2]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader # Use this
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaLLM
from langchain_core.documents import Document

In [4]:
pdf_files = [
    r"F:\DiabetIQ\LLM\PDFs\BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf",
    r"F:\DiabetIQ\LLM\PDFs\BES-Ramadan-Guideline-2020-min.pdf",
    r"F:\DiabetIQ\LLM\PDFs\Diabetes_Care_BADAS_guideline2019-3.pdf",
    r"F:\DiabetIQ\LLM\PDFs\Insulin-Guideline-min.pdf"
]

all_docs = [] # Will store LangChain Document objects

print("Loading and Processing PDFs...")
for pdf_path in pdf_files:
    try:
        # Extract filename for metadata
        file_name = os.path.basename(pdf_path)
        print(f"-> Loading: {file_name}")

        loader = PyPDFLoader(pdf_path)
        # Load pages as individual documents. Each doc will have metadata['page']
        pages = loader.load_and_split() # This does basic splitting

        # Add source filename to metadata for each page/document
        for page_doc in pages:
            page_doc.metadata['source'] = file_name
            # Optional: clean up page content slightly if needed
            # page_doc.page_content = page_doc.page_content.replace('\n', ' ').strip()

        all_docs.extend(pages)
        print(f"   Loaded {len(pages)} pages.")

    except Exception as e:
        print(f"Error loading {pdf_path}: {e}")

print(f"\nTotal documents loaded: {len(all_docs)}")
if all_docs:
    print("\nSample Document Metadata (first doc):")
    print(all_docs[0].metadata)
    print("\nSample Document Content (first 500 chars of first doc):")
    print(all_docs[0].page_content[:500])
else:
    print("\nNo documents were loaded successfully.")
    # Consider exiting or handling this error appropriately
    exit()

Loading and Processing PDFs...
-> Loading: BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf
   Loaded 38 pages.
-> Loading: BES-Ramadan-Guideline-2020-min.pdf
   Loaded 46 pages.
-> Loading: Diabetes_Care_BADAS_guideline2019-3.pdf
   Loaded 79 pages.
-> Loading: Insulin-Guideline-min.pdf
   Loaded 93 pages.

Total documents loaded: 256

Sample Document Metadata (first doc):
{'producer': 'Nitro PDF PrimoPDF', 'creator': 'PrimoPDF http://www.primopdf.com', 'creationdate': '2020-06-07T20:17:39-06:00', 'moddate': '2020-06-07T20:17:39-06:00', 'title': 'Microsoft Word - BES COVID Pract Recomnd 06 June Final Copy', 'author': 'Mir', 'source': 'BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf', 'total_pages': 38, 'page': 0, 'page_label': '1'}

Sample Document Content (first 500 chars of first doc):
Bangladesh Endocrine Society (BES) 
Practical Recommendations for Management of 
Diabetes and Other Endocrine Diseases in Patients with 
COVID-19 
 
 
 
 
 
Published Online June 2020 
 
 
All rights res

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    # Keep separators that make sense for text structure
    separators=["\n\n", "\n", ". ", ", ", " ", ""],
    length_function=len,
)

In [6]:
chunks = text_splitter.split_documents(all_docs)

print(f"\nTotal chunks created: {len(chunks)}")
if chunks:
    print("\nSample Chunk Metadata (first chunk):")
    print(chunks[0].metadata)
    print("\nSample Chunk Content (first 500 chars):")
    print(chunks[0].page_content[:500])
else:
    print("\nNo chunks were created. Check splitting process.")
    exit()


Total chunks created: 702

Sample Chunk Metadata (first chunk):
{'producer': 'Nitro PDF PrimoPDF', 'creator': 'PrimoPDF http://www.primopdf.com', 'creationdate': '2020-06-07T20:17:39-06:00', 'moddate': '2020-06-07T20:17:39-06:00', 'title': 'Microsoft Word - BES COVID Pract Recomnd 06 June Final Copy', 'author': 'Mir', 'source': 'BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf', 'total_pages': 38, 'page': 0, 'page_label': '1'}

Sample Chunk Content (first 500 chars):
Bangladesh Endocrine Society (BES) 
Practical Recommendations for Management of 
Diabetes and Other Endocrine Diseases in Patients with 
COVID-19 
 
 
 
 
 
Published Online June 2020 
 
 
All rights reserved by: Bangladesh Endocrine Society (BES) 
 
 
Published by 
Bangladesh Endocrine Society (BES) 
Website: http://bes-org.net 
E-mail: 
endobd2012@gmail.com


In [7]:
print("\nInitializing Embedding Model...")
embedding_model = HuggingFaceEmbeddings(model_name="intfloat/e5-small-v2")

print("\nCreating Vector Store (ChromaDB)...")
# Chroma.from_documents handles Document objects directly
# Consider adding persistence: persist_directory="./chroma_db_diabetiq"
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model,
    # persist_directory="./chroma_db_diabetiq" # Uncomment to save DB locally
)
# If persisting: vectorstore.persist()

# To load later:
# vectorstore = Chroma(persist_directory="./chroma_db_diabetiq", embedding_function=embedding_model)

print("Vector Store Created.")


Initializing Embedding Model...

Creating Vector Store (ChromaDB)...
Vector Store Created.


In [8]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) # Retrieve top 5 chunks

In [9]:
print(f"Retriever configured (using k={retriever.search_kwargs.get('k', 'default')}).")

Retriever configured (using k=5).


In [10]:
from langchain_core.prompts import PromptTemplate
prompt_template = """
You are DiabetIQ, an AI assistant specializing in diabetes management for patients in Bangladesh, based *strictly* on the provided context documents.

Context Documents:
{context}

Based *only* on the information in the numbered context documents above, answer the following question concisely and directly.
Your advice should be actionable and consider general practices relevant to Bangladesh where possible (e.g., common foods mentioned in context, local guidelines if present in context).
Do *not* add information that is not present in the context.
If the context does not contain the answer, state that clearly.
Always conclude your response by advising the user to consult a healthcare professional for personalized medical advice.

Question: {question}

Answer:
"""

prompt = PromptTemplate.from_template(prompt_template)

In [11]:
print("Initializing LLM (Ollama - Mistral)...")
llm = OllamaLLM(model="mistral")

Initializing LLM (Ollama - Mistral)...


In [12]:
def format_docs_with_metadata(docs):
    """Formats retrieved documents including source and page."""
    formatted_strings = []
    for i, doc in enumerate(docs):
        metadata_str = f"Source: {doc.metadata.get('source', 'N/A')}, Page: {doc.metadata.get('page', 'N/A')}"
        content_str = doc.page_content.replace('\n', ' ').strip()
        formatted_strings.append(f"{i+1}. [{metadata_str}] {content_str}")
    return "\n\n".join(formatted_strings)

In [13]:
rag_chain = (
    {"context": retriever | format_docs_with_metadata, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG Chain constructed.")

RAG Chain constructed.


In [14]:
print("\n--- Querying RAG Chain ---")
question = "How can I control my blood sugar level with diet according to the textbook?" 


print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: How can I control my blood sugar level with diet according to the textbook?

Response:
 To control your blood sugar level with diet as per the provided textbooks, follow these recommendations:

1. Emphasize fruits, legumes, whole grains, and dairy products in your diet if you are on insulin therapy. Learn carbohydrate counting for proper portioning.
2. Avoid sugar-sweetened beverages (including fruit juices) to manage glycemia, weight, reduce the risk of cardiovascular disease, and fatty liver.
3. Ensure your diet contains sufficient protein from both animal sources like fish, meat, egg, milk, cheese, and plant sources such as seeds or nuts. In older people, aim for 1g of protein per kg body weight per day, adjusting with nutritional status, physical activity level, disease status, and tolerance.
4. Consume at least 2-3 servings of fruits and vegetables daily.
5. Aim for a diet rich in monounsaturated and polyunsaturated fats to improve glucose met

In [15]:
print("\n--- Querying RAG Chain ---")
question = "What does the BADAS guideline say about insulin initiation?" 



print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: What does the BADAS guideline say about insulin initiation?

Response:
 According to the BADAS Guideline 2019, glucose-insulin infusion should be started in all major surgeries. In case of diabetes management outside surgery, if a person is already on insulin, intermediate or long acting insulin is continued; the dose may need to be reduced. Shorter acting insulin should be adjusted according to blood glucose values and food intake. However, I advise you to consult with a healthcare professional for personalized medical advice regarding insulin initiation or adjustment.


In [16]:
print("\n--- Querying RAG Chain ---")
question = "Tell me about managing diabetes during Ramadan based on the provided texts." 



print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: Tell me about managing diabetes during Ramadan based on the provided texts.

Response:
 During Ramadan, it is essential for individuals with diabetes to manage their blood sugar levels carefully. The following are some general practices that can be considered, although personalized medical advice should always be sought from a healthcare professional.

1. Balanced Diet: Consume a well-balanced diet during the daytime, including carbohydrates, proteins, and healthy fats. In Bangladesh, common foods such as rice, lentils, fish, vegetables, and fruits can form part of this diet. It is important to control portion sizes and consider meal timing to avoid postprandial blood sugar spikes.

2. Hydration: Drink plenty of fluids during the non-fasting hours. Dehydration can affect blood sugar levels, so it's crucial to stay hydrated. However, avoid consuming large amounts of fluid close to suhoor (the pre-dawn meal) or iftar (the breaking of fast).

3. Regul

In [17]:
print("\n--- Querying RAG Chain ---")
question = "I have diabetes. Can I eat sweets?" 



print(f"Question: {question}")

try:
    response = rag_chain.invoke(question)
    print("\nResponse:")
    print(response)
except Exception as e:
    print(f"\nError during RAG chain invocation: {e}")


--- Querying RAG Chain ---
Question: I have diabetes. Can I eat sweets?

Response:
 It is advisable to limit or avoid sweets, especially sugar-sweetened beverages like candy and desserts, as they can negatively impact your blood glucose levels and potentially lead to weight gain, which may increase the risk of complications for people with diabetes. Instead, focus on a diet rich in fruits, legumes, whole grains, dairy products, and lean proteins from both animal and plant sources. As always, it is essential to consult your healthcare professional for personalized medical advice.
