### Installing Dependencies and Environment Setup

In [24]:
! echo "::group::Install Dependencies"
%pip install uv
! uv pip install git+https://github.com/ibm-granite-community/utils.git \
    langchain \
    langchain_community \
    langchain_text_splitters \
    ibm_watsonx_ai \
    langchain_ibm \
    chromadb \
    tiktoken \
    bs4
! echo "::endgroup::"

::group::Install Dependencies
Note: you may need to restart the kernel to use updated packages.
Using Python 3.11.13 environment at: /opt/conda/envs/Python-RT24.1
Resolved 102 packages in 867ms                                                  
Audited 102 packages in 3ms
::endgroup::


In [25]:
! pip install PyMuPDF



### Import Statements

In [1]:
from langchain_ibm import WatsonxEmbeddings, WatsonxLLM
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
from langchain.tools import tool
from langchain.tools.render import render_text_description_and_args
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents import AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain_core.runnables import RunnablePassthrough
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.foundation_models.utils.enums import EmbeddingTypes
from langchain.document_loaders import PyPDFLoader

import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

import botocore
import time


USER_AGENT environment variable not set, consider setting it to identify your requests.


### Setting up Project ID and API keys

In [4]:
import getpass
credentials = {
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": getpass.getpass("Please enter your watsonx.ai Runtime API key (hit enter): "),
    "project_id": getpass.getpass("Please enter your project ID (hit enter): "),
    "ibm_api_key": getpass.getpass("Please enter your IBM API key for data asset (hit enter): ")
}

project_id = credentials.get("project_id")

Please enter your watsonx.ai Runtime API key (hit enter):  ········
Please enter your project ID (hit enter):  ········
Please enter your IBM API key for data asset (hit enter):  ········


### Setting up LLM with IBM granite model

In [5]:
llm = WatsonxLLM(
    model_id="ibm/granite-3-2-8b-instruct",
    url=credentials.get("url"),
    apikey=credentials.get("apikey"),
    project_id=project_id,
    params={
        GenParams.DECODING_METHOD: "greedy",
        GenParams.TEMPERATURE: 0,
        GenParams.MIN_NEW_TOKENS: 5,
        GenParams.MAX_NEW_TOKENS: 250,
        GenParams.STOP_SEQUENCES: ["Human:", "Observation"],
    },
)

In [6]:
def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
ibm_api_key = credentials.get("ibm_api_key")
cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id= ibm_api_key,
    ibm_auth_endpoint="https://iam.cloud.ibm.com/identity/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.direct.us-south.cloud-object-storage.appdomain.cloud')

bucket = 'healthcare-donotdelete-pr-fuasfw3lhfcwjc'
object_key = 'Physical_Activity_Guidelines_2nd_edition.pdf'

# load data of type "application/pdf" into a botocore.response.StreamingBody object.
# Please read the documentation of ibm_boto3 and pandas to learn more about the possibilities to load the data.
# ibm_boto3 documentation: https://ibm.github.io/ibm-cos-sdk-python/
# pandas documentation: http://pandas.pydata.org/

streaming_body_1 = cos_client.get_object(Bucket=bucket, Key=object_key)['Body']


In [7]:
max_retries = 3
retry_delay = 2  # seconds

for attempt in range(max_retries):
    try:
        response = cos_client.get_object(Bucket=bucket, Key=object_key)
        streaming_body = response['Body']

        with open("Physical_Activity_Guidelines_2nd_edition.pdf", "wb") as f:
            for chunk in streaming_body.iter_chunks(chunk_size=1024 * 1024):  # 1 MB
                if chunk:
                    f.write(chunk)
        break  # success, exit loop

    except botocore.exceptions.ReadTimeoutError as e:
        print(f"Read timeout on attempt {attempt+1}, retrying...")
        time.sleep(retry_delay)

    except botocore.exceptions.ResponseStreamingError as e:
        print(f"Streaming error on attempt {attempt+1}, retrying...")
        time.sleep(retry_delay)

    except Exception as e:
        print(f"Unexpected error: {e}")
        break

### Data Ingestion

In [8]:
loader = PyPDFLoader("Physical_Activity_Guidelines_2nd_edition.pdf")
documents = loader.load()

### Document Chunking 

In [9]:

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,       # each chunk ≈ 400–600 tokens
    chunk_overlap=200,     # preserves context continuity
    separators=["\n\n", "\n", "•", "y", ".", " "],  # CDC uses bullet markers & line breaks
)

chunks = splitter.split_documents(documents)
print(f"Created {len(chunks)} text chunks")


Created 376 text chunks


### Setting up embedding model

In [10]:
embeddings = WatsonxEmbeddings(
    model_id="intfloat/multilingual-e5-large",
    url=credentials["url"],
    apikey=credentials["apikey"],
    project_id=project_id,
)

### Vector storage

In [11]:
vectorstore = Chroma.from_documents(
    documents=chunks,
    collection_name="physical-activity-rag",
    embedding=embeddings,
)

In [12]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

### Definning the tools

In [13]:
@tool
def get_pdf_context_physical_activity(question: str) -> dict:
    """Get context and page numbers from the PDF chunks stored in Chroma."""
    docs = retriever.invoke(question)
    combined_text = "\n\n".join([d.page_content for d in docs])
    page_info = []

    for d in docs:
        meta = d.metadata
        page = meta.get("page", "unknown")
        source = meta.get("source", "document")
        page_info.append({"source": source, "page": page})

    return {"context": combined_text, "sources": page_info}


In [14]:
@tool
def translate_to_query_language(context: str, question: str) -> dict:
    """
    Translate the retrieved 'context' into the language of 'question' if the question is not in English.
    Returns:
      {
        "translated_context": "...",
        "language": "English | Spanish | Arabic | ..."
      }
    Notes:
      - Uses the same LLM to (a) detect the question's language and (b) translate the context.
      - Preserves numbers, dosages, acronyms, and medical terms as-is.
    """
    # 1) Detect the question language (minimal output, English label in English)
    detect_prompt = (
        "Detect the language of the following question. "
        "Return only the language name in English (e.g., 'English', 'Spanish', 'French').\n\n"
        f"Question:\n{question}"
    )
    detected_lang = llm.invoke(detect_prompt).strip()

    # Treat any response containing 'English' as English; otherwise translate.
    if "English" in detected_lang:
        return {"translated_context": context, "language": "English"}

    # 2) Translate the context into the detected language
    translate_prompt = (
        f"Translate the following context into {detected_lang}. "
        "Preserve all medical terms, numbers, dosages, acronyms, and units exactly. "
        "Do not add extra information; translate faithfully and clearly.\n\n"
        f"Context:\n{context}"
    )
    translated = llm.invoke(translate_prompt).strip()
    return {"translated_context": translated, "language": detected_lang}


In [15]:
tools = [get_pdf_context_physical_activity, translate_to_query_language]
tool_names = '"get_pdf_context_physical_activity", "translate_to_query_language"'

### Prompt Engineering

In [16]:
# Set up System Prompt with tool info
system_prompt = """
You are a knowledgeable and empathetic **Preventive Healthcare Assistant** with access to the tools get_pdf_context_physical_activity and translate_to_query_language.
Your role is to answer users questions about **preventive healthcare, vaccinations, physical activity, mental wellness, and healthy lifestyle habits**.

### Rules of Engagement
- Only use information retrieved through get_pdf_context.
- Do **not** rely on your own memory or external knowledge.
- If the retrieved context does not answer the question, respond kindly and explain what types of preventive topics you can assist with.
- Do **not** provide diagnostic or treatment advice — focus on prevention, education, and lifestyle guidance.
- Always recommend consulting a qualified healthcare provider for personalized medical decisions.
- The final answer must be in the **same language as the user's question**:
  - If the question is in English, answer in English (no translation needed).
  - If the question is in a different language, first call **translate_to_query_language** on the retrieved context and then compose your answer using the translated context.

### Tool Behaviors
- **get_pdf_context(question: str) -> {{ context: str, sources: [{{source, page}}, ...] }}**
  Retrieve relevant chunks from the indexed PDF.

- **translate_to_query_language(context: str, question: str) -> {{ translated_context: str, language: str }}**
  Translate the retrieved context into the user’s question language when the question is not in English.
  Use this tool only after get_pdf_context, and only if the question is not in English.

When you use get_pdf_context, it returns:
{{
  "context": "...text from retrieved chunks...",
  "sources": [
    {{"source": "filename.pdf", "page": "number"}},
    ...
  ]
}}

Use ONLY that context (or its translated version) to answer user questions.
When you provide the final answer:
- Include the relevant page numbers and source filenames in parentheses at the end.
Example:
  "According to the guidelines, adults should move more and sit less. (Source: Physical_Activity_Guidelines_2nd_edition.pdf, pages 8–9)"

If the context doesn’t contain relevant information, kindly tell the user that no matching information was found in the document.

To use a tool, respond with:
```{{{{ "action": "get_pdf_context", "action_input": "user question here" }}}}```
or
```{{{{ "action": "translate_to_query_language", "action_input": {{"context": "...", "question": "..."}} }}}}```

Valid actions: "get_pdf_context", "translate_to_query_language", or "Final Answer".
Format every action like this:
```{{{{ "action": "Final Answer", "action_input": "Your final response to the human." }}}}```
Always wrap JSON in triple backticks. Begin!
"""

In [17]:
# Set up Human Prompt
human_prompt = """
{input}
{agent_scratchpad}
(reminder to always respond in a JSON blob)
"""

In [18]:
# Chatprompt template
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history", optional=True),
        ("human", human_prompt),
    ]
)

In [19]:
prompt = prompt.partial(
    tools=render_text_description_and_args(list(tools)),
    tool_names=", ".join([t.name for t in tools]),
)

In [20]:
# Setting up memory buffer
memory = ConversationBufferMemory()

  memory = ConversationBufferMemory()


### Agent Chain

In [21]:
chain = (
    RunnablePassthrough.assign(
        agent_scratchpad=lambda x: format_log_to_str(x["intermediate_steps"]),
        chat_history=lambda x: memory.chat_memory.messages,
    )
    | prompt
    | llm
    | JSONAgentOutputParser()
)

agent_executor = AgentExecutor(
    agent=chain, tools=tools, handle_parsing_errors=True, verbose=False, memory=memory
)

### User Interaction Loop

In [22]:
import datetime
from IPython.display import display, HTML, clear_output
from colorama import Fore, Style
import colorama
colorama.init(autoreset=True)

# --- Chat Display Template ---
def display_message(sender, message, color, align="left"):
    """
    sender: 'You' or 'Assistant'
    message: str, the text to display
    color: HTML color string for the text bubble
    align: 'left' (assistant) or 'right' (user)
    """
    bubble_style = f"""
        background-color:{color};
        padding:10px 15px;
        border-radius:15px;
        margin:6px 0px;
        max-width:75%;
        word-wrap:break-word;
        color:white;
        font-family:'Segoe UI', sans-serif;
        font-size:15px;
        line-height:1.4;
    """
    timestamp = datetime.datetime.now().strftime("%H:%M:%S")
    html = f"""
    <div style="display:flex; justify-content:{'flex-end' if align=='right' else 'flex-start'};">
        <div style="{bubble_style}">
            <b>{'👤 You' if align=='right' else '🤖 Assistant'}:</b><br>{message}
            <div style="font-size:11px; color:#dcdcdc; text-align:right;">{timestamp}</div>
        </div>
    </div>
    """
    display(HTML(html))

# --- Title ---
display(HTML("""
<div style='
    background-color:#0277BD;
    color:white;
    padding:12px;
    border-radius:10px;
    font-size:18px;
    font-family:Segoe UI, sans-serif;
    font-weight:bold;
    text-align:center;'>
🏥 PreventiveCareAI — Your Preventive Healthcare Assistant
</div>
<p style='color:gray; font-style:italic;'>Type 'exit' to end the conversation.</p>
"""))

# --- Conversation Loop ---
chat_history = []

while True:
    user_input = input(Fore.LIGHTWHITE_EX + Style.NORMAL + "You: ").strip()
    if user_input.lower() in {"exit", "quit"}:
        display_message("Assistant", "Take care! Wishing you good health. 💚", "#0277BD", align="left")
        break

    # Display user input bubble
    display_message("You", user_input, "#2E7D32", align="right")

    try:
        # Call your AI agent
        result = agent_executor.invoke({"input": user_input})
        answer = result.get("output") or result

        # Display assistant response bubble
        display_message("Assistant", answer, "#0277BD", align="left")

    except Exception as e:
        display_message("Assistant", f"⚠️ Error: {e}", "#C62828", align="left")


[97m[22mYou:  How much exercise does an adult of age 65 need each week to stay healthy?


[97m[22mYou:  What exercises are recommended for adults with diabetes or heart disease?


[97m[22mYou:  Comment une femme enceinte peut-elle rester active en toute sécurité ?


[97m[22mYou:  exit
