In [1]:
import os
from dotenv import load_dotenv
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader 
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaLLM
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate

print("--- Environment and Imports ---")


load_dotenv()
langchain_api_key = os.getenv('LANGCHAIN_API_KEY')
huggingface_api_key = os.getenv('HUGGINGFACE_API_KEY')


os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
os.environ['HUGGINGFACE_API_KEY'] = huggingface_api_key

print("Environment variables loaded and LangChain configured.")
print("Required libraries imported.")

--- Environment and Imports ---
Environment variables loaded and LangChain configured.
Required libraries imported.


In [2]:
print("\n--- PDF File Paths ---")
pdf_files = [
    
    r"F:\DiabetIQ\LLM\PDFs\BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf",
    r"F:\DiabetIQ\LLM\PDFs\BES-Ramadan-Guideline-2020-min.pdf",
    r"F:\DiabetIQ\LLM\PDFs\Diabetes_Care_BADAS_guideline2019-3.pdf",
    r"F:\DiabetIQ\LLM\PDFs\Insulin-Guideline-min.pdf"
]

for pdf_path in pdf_files:
    if not os.path.exists(pdf_path):
        print(f"WARNING: File not found at {pdf_path}")
    else:
        print(f"Found: {pdf_path}")


--- PDF File Paths ---
Found: F:\DiabetIQ\LLM\PDFs\BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf
Found: F:\DiabetIQ\LLM\PDFs\BES-Ramadan-Guideline-2020-min.pdf
Found: F:\DiabetIQ\LLM\PDFs\Diabetes_Care_BADAS_guideline2019-3.pdf
Found: F:\DiabetIQ\LLM\PDFs\Insulin-Guideline-min.pdf


In [3]:
print("\n--- Loading and Processing PDFs ---")
all_docs = [] 

for pdf_path in pdf_files:
    file_name = os.path.basename(pdf_path)
    if not os.path.exists(pdf_path):
        print(f"Skipping non-existent file: {file_name}")
        continue
    try:
        print(f"-> Loading: {file_name}")
        loader = PyPDFLoader(pdf_path)
        pages = loader.load_and_split() 

        
        for page_doc in pages:
            page_doc.metadata['source'] = file_name
            

        all_docs.extend(pages)
        print(f"   Loaded {len(pages)} pages.")

    except Exception as e:
        print(f"Error loading {pdf_path}: {e}")

print(f"\nTotal documents loaded: {len(all_docs)}")
if all_docs:
    print("\nSample Document Metadata (first doc):")
    print(all_docs[0].metadata)
    print("\nSample Document Content (first 500 chars of first doc):")
    print(all_docs[0].page_content[:500])
else:
    print("\nNo documents were loaded successfully. Exiting.")
    exit() 


--- Loading and Processing PDFs ---
-> Loading: BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf
   Loaded 38 pages.
-> Loading: BES-Ramadan-Guideline-2020-min.pdf
   Loaded 46 pages.
-> Loading: Diabetes_Care_BADAS_guideline2019-3.pdf
   Loaded 79 pages.
-> Loading: Insulin-Guideline-min.pdf
   Loaded 93 pages.

Total documents loaded: 256

Sample Document Metadata (first doc):
{'producer': 'Nitro PDF PrimoPDF', 'creator': 'PrimoPDF http://www.primopdf.com', 'creationdate': '2020-06-07T20:17:39-06:00', 'moddate': '2020-06-07T20:17:39-06:00', 'title': 'Microsoft Word - BES COVID Pract Recomnd 06 June Final Copy', 'author': 'Mir', 'source': 'BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf', 'total_pages': 38, 'page': 0, 'page_label': '1'}

Sample Document Content (first 500 chars of first doc):
Bangladesh Endocrine Society (BES) 
Practical Recommendations for Management of 
Diabetes and Other Endocrine Diseases in Patients with 
COVID-19 
 
 
 
 
 
Published Online June 2020 
 
 
All righ

In [4]:
print("\n--- Configuring Text Splitter ---")
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    separators=["\n\n", "\n", ". ", ", ", " ", ""], 
    length_function=len,
)
print(f"Text splitter configured: chunk_size={text_splitter._chunk_size}, chunk_overlap={text_splitter._chunk_overlap}")


--- Configuring Text Splitter ---
Text splitter configured: chunk_size=1000, chunk_overlap=200


In [5]:
print("\n--- Splitting Documents into Chunks ---")
chunks = text_splitter.split_documents(all_docs)

print(f"\nTotal chunks created: {len(chunks)}")
if chunks:
    print("\nSample Chunk Metadata (first chunk):")
    print(chunks[0].metadata)
    print("\nSample Chunk Content (first 500 chars):")
    print(chunks[0].page_content[:500])
else:
    print("\nNo chunks were created. Check splitting process. Exiting.")
    exit() 


--- Splitting Documents into Chunks ---

Total chunks created: 702

Sample Chunk Metadata (first chunk):
{'producer': 'Nitro PDF PrimoPDF', 'creator': 'PrimoPDF http://www.primopdf.com', 'creationdate': '2020-06-07T20:17:39-06:00', 'moddate': '2020-06-07T20:17:39-06:00', 'title': 'Microsoft Word - BES COVID Pract Recomnd 06 June Final Copy', 'author': 'Mir', 'source': 'BES-COVID-Pract-Recomnd-06-June-Final-Copy.pdf', 'total_pages': 38, 'page': 0, 'page_label': '1'}

Sample Chunk Content (first 500 chars):
Bangladesh Endocrine Society (BES) 
Practical Recommendations for Management of 
Diabetes and Other Endocrine Diseases in Patients with 
COVID-19 
 
 
 
 
 
Published Online June 2020 
 
 
All rights reserved by: Bangladesh Endocrine Society (BES) 
 
 
Published by 
Bangladesh Endocrine Society (BES) 
Website: http://bes-org.net 
E-mail: 
endobd2012@gmail.com


In [6]:
print("\n--- Initializing Embedding Model ---")

embedding_model_name = "intfloat/e5-small-v2"
embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_name)
print(f"Embedding model initialized: {embedding_model_name}")


--- Initializing Embedding Model ---
Embedding model initialized: intfloat/e5-small-v2


In [7]:
print("\n--- Creating Vector Store (ChromaDB) ---")

vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embedding_model,
)

print("Vector Store Created (in-memory).")


--- Creating Vector Store (ChromaDB) ---
Vector Store Created (in-memory).


In [8]:
print("\n--- Configuring Retriever ---")
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) 
print(f"Retriever configured to fetch top k={retriever.search_kwargs.get('k', 'N/A')} chunks.")


--- Configuring Retriever ---
Retriever configured to fetch top k=5 chunks.


In [9]:
print("\n--- Defining User Profile Structure and Helper ---")

user_profile_example = {
    "user_id": "user123",
    "name": "Ahmed Khan", 
    "diabetes_type": "Type 2",
    "medications": ["Metformin 1000mg daily", "Gliclazide 80mg daily"],
    "recent_a1c": 7.8,
    "goals": ["Lower A1C to below 7.0", "Incorporate more vegetables", "Be more consistent with evening medication"],
    "preferences": {"diet": "Likes chicken and fish, avoids red meat", "activity": "Prefers walking"},
    "challenges": ["Snacking late at night", "Forgetting evening Gliclazide sometimes", "Finding time for exercise"]
}

print(f"Example User Profile defined for: {user_profile_example.get('name', 'N/A')} (ID: {user_profile_example.get('user_id', 'N/A')})")


def get_relevant_profile_info(profile, question=None):
    """
    Extracts key information from the user profile to be included in the LLM prompt.
    Can be enhanced with NLP to select info based on the question context.
    """
    if not profile:
        return "No user profile provided."

    info = f"User Profile Summary:\n"
    info += f"- Diabetes Type: {profile.get('diabetes_type', 'N/A')}\n"
    info += f"- Key Medications: {', '.join(profile.get('medications', ['N/A']))}\n"
    info += f"- Recent A1C: {profile.get('recent_a1c', 'N/A')}\n"
    info += f"- Goals: {', '.join(profile.get('goals', ['N/A']))}\n"
    info += f"- Challenges: {', '.join(profile.get('challenges', ['N/A']))}\n"
    info += f"- Preferences: Diet - {profile.get('preferences', {}).get('diet', 'N/A')}; Activity - {profile.get('preferences', {}).get('activity', 'N/A')}\n"
   
    return info

print("Helper function 'get_relevant_profile_info' defined.")


--- Defining User Profile Structure and Helper ---
Example User Profile defined for: Ahmed Khan (ID: user123)
Helper function 'get_relevant_profile_info' defined.


In [10]:
print("\n--- Defining Personalized Prompt Template ---")

personalized_prompt_template = """
You are DiabetIQ, an AI assistant specializing in personalized diabetes management for patients in Bangladesh.
Your advice must be based *strictly* on the provided Context Documents AND the User's Profile.
Do *not* use any information external to these sources. Your knowledge is limited to the documents provided.

User Profile:
{user_profile_summary}

Context Documents:
{context}

---
Based *only* on the User Profile and the Context Documents above, answer the user's question.
Frame your answer to be actionable and relevant to this specific user's situation (their type of diabetes, medications, goals, challenges, preferences) where appropriate based *only* on the provided context.
Incorporate principles of health optimization and behavioral encouragement (e.g., acknowledge goals, suggest small steps relevant to challenges mentioned in profile, align with preferences if context allows).
If the context documents do not contain information relevant to the user's specific profile details or the question asked, state that the documents don't cover that specific aspect.
If the answer is not found in the context documents at all, state clearly that the information is not available in the provided documents.
Always conclude your response by advising the user to consult their healthcare professional for personalized medical decisions, especially regarding diagnosis, treatment, or medication changes.

Question: {question}

Personalized Answer:
"""

personalized_prompt = PromptTemplate.from_template(personalized_prompt_template)
print("Personalized prompt template created.")


--- Defining Personalized Prompt Template ---
Personalized prompt template created.


In [11]:
print("\n--- Initializing LLM (Ollama - Mistral) ---")

llm = OllamaLLM(model="mistral")

print("Ollama LLM (Mistral) initialized.")


--- Initializing LLM (Ollama - Mistral) ---
Ollama LLM (Mistral) initialized.


In [12]:
print("\n--- Defining Document Formatting Function ---")

def format_docs_with_metadata(docs):
    """Formats retrieved documents including source and page."""
    formatted_strings = []
    if not docs:
        return "No relevant context documents found."
    for i, doc in enumerate(docs):
        
        source = doc.metadata.get('source', 'N/A')
        page = doc.metadata.get('page', 'N/A')
        metadata_str = f"Source: {source}, Page: {page}"
        content_str = doc.page_content.replace('\n', ' ').strip()
        formatted_strings.append(f"{i+1}. [{metadata_str}] {content_str}")
    return "\n\n".join(formatted_strings)

print("Function 'format_docs_with_metadata' defined.")


--- Defining Document Formatting Function ---
Function 'format_docs_with_metadata' defined.


In [13]:
print("\n--- Constructing Personalized RAG Chain ---")


def prepare_rag_input(input_dict):
    """
    Takes the initial input {'question': str, 'user_profile': dict}
    and structures it for the RunnableParallel step.
    """
    user_question = input_dict["question"]
    user_profile = input_dict.get("user_profile", {}) 
    profile_summary = get_relevant_profile_info(user_profile, user_question)
    return {
        "question": user_question,
        "user_profile_summary": profile_summary,
        "retriever_input": user_question 
    }


personalized_rag_chain = (
    RunnableLambda(prepare_rag_input) 
    | RunnableParallel(
        {
           
            "context": (lambda x: x['retriever_input']) | retriever | format_docs_with_metadata,
            
            "question": (lambda x: x['question']),
            
            "user_profile_summary": (lambda x: x['user_profile_summary'])
        }
    )
    | personalized_prompt   
    | llm                    
    | StrOutputParser()      
)

print("Personalized RAG Chain constructed successfully.")


--- Constructing Personalized RAG Chain ---
Personalized RAG Chain constructed successfully.


In [14]:
print("\n--- Querying Personalized RAG Chain ---")


question = "I'm finding it hard to avoid snacks at night because of my evening Gliclazide. What dietary advice from the documents can help me manage this and lower my A1C, considering I like chicken and fish?"
current_user_profile = user_profile_example 

print(f"User: {current_user_profile.get('name', 'N/A')} (ID: {current_user_profile.get('user_id', 'N/A')})")
print(f"Question: {question}")

try:
    
    response = personalized_rag_chain.invoke({
        "question": question,
        "user_profile": current_user_profile
    })
    print("\nPersonalized Response:")
    print(response)

except Exception as e:
    print(f"\nError during personalized RAG chain invocation: {e}")

print("\n--- Second Query Example ---")
question_2 = "Based on the BADAS guideline, what should I know about starting insulin if I have Type 2 diabetes?"
print(f"User: {current_user_profile.get('name', 'N/A')} (ID: {current_user_profile.get('user_id', 'N/A')})")
print(f"Question: {question_2}")

try:
    
    response_2 = personalized_rag_chain.invoke({
        "question": question_2,
        "user_profile": current_user_profile
    })
    print("\nPersonalized Response:")
    print(response_2)

except Exception as e:
    print(f"\nError during personalized RAG chain invocation: {e}")


--- Querying Personalized RAG Chain ---
User: Ahmed Khan (ID: user123)
Question: I'm finding it hard to avoid snacks at night because of my evening Gliclazide. What dietary advice from the documents can help me manage this and lower my A1C, considering I like chicken and fish?

Personalized Response:
 Based on your profile and the provided context documents, here are some actionable steps to help you manage your snacking habits at night while trying to lower your A1C levels.

1. Balanced Meals: Ensure your meals throughout the day are balanced and nutritious, focusing on lean proteins such as chicken and fish, whole grains, fruits, and vegetables. This can help keep you full and reduce the urge for late-night snacks.

2. Portion Control: Pay attention to portion sizes during meals to avoid overeating and prevent excessive blood sugar spikes.

3. Timing of Meals: Try to have your dinner at least 2-3 hours before bedtime to give your body ample time to digest the food properly and regul

In [20]:
import os
from deepeval import evaluate
from deepeval.test_case import LLMTestCaseParams
from deepeval.models import Ollama
from deepeval.metrics import AnswerRelevancyMetric, FaithfulnessMetric
from langchain_core.runnables import RunnableLambda, RunnableParallel
from deepeval.models import OllamaModel

ImportError: cannot import name 'Ollama' from 'deepeval.models' (F:\DiabetIQ\my_env\Lib\site-packages\deepeval\models\__init__.py)

In [None]:
print("--- DeepEval Imports and Setup ---")
print("Ensure Ollama server is running with the 'llama3.2' model available.")


required_vars = ['retriever', 'format_docs_with_metadata', 'personalized_rag_chain', 'prepare_rag_input', 'user_profile_example']
missing_vars = [var for var in required_vars if var not in globals()]
if missing_vars:
    # Attempt to define the context retrieval chain helper again, in case it's missing
    # but its components exist
    if 'retriever' in globals() and 'format_docs_with_metadata' in globals() and 'prepare_rag_input' in globals():
         print("Attempting to redefine context_retrieval_chain...")
         context_retrieval_chain = (
             RunnableLambda(prepare_rag_input)
             | RunnableParallel(
                 {
                     "retrieved_docs": (lambda x: x['retriever_input']) | retriever,
                     "passthrough": (lambda x: x)
                 }
               )
             | RunnableLambda(lambda x: {
                     "formatted_context": format_docs_with_metadata(x['retrieved_docs']),
                     "original_input": x['passthrough']
                 })
         )
         print("Context retrieval chain redefined.")
         # Re-check missing vars
         required_vars = ['retriever', 'format_docs_with_metadata', 'personalized_rag_chain', 'prepare_rag_input', 'user_profile_example', 'context_retrieval_chain']
         missing_vars = [var for var in required_vars if var not in globals()]
         if missing_vars:
              raise NameError(f"Still missing required variables: {', '.join(missing_vars)}. Please ensure all setup cells from the RAG notebook ran successfully.")
    else:
          raise NameError(f"Missing required variables from previous cells: {', '.join(missing_vars)}. Please run the setup cells first.")
else:
    print("Required RAG components found in the environment.")
    # Define context_retrieval_chain here if all components are present initially
    context_retrieval_chain = (
        RunnableLambda(prepare_rag_input)
        | RunnableParallel(
            {
                "retrieved_docs": (lambda x: x['retriever_input']) | retriever,
                "passthrough": (lambda x: x)
            }
          )
        | RunnableLambda(lambda x: {
                "formatted_context": format_docs_with_metadata(x['retrieved_docs']),
                "original_input": x['passthrough']
            })
    )
    print("Context retrieval chain helper defined.")