In [None]:
#pip install -U langchain-community faiss-cpu langchain-huggingface pymupdf tiktoken langchain-ollama python-env

In [2]:
#pip install pdfplumber

Collecting pdfplumber
  Downloading pdfplumber-0.11.9-py3-none-any.whl.metadata (43 kB)
Collecting pdfminer.six==20251230 (from pdfplumber)
  Downloading pdfminer_six-20251230-py3-none-any.whl.metadata (4.3 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-5.3.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (67 kB)
Collecting cryptography>=36.0.0 (from pdfminer.six==20251230->pdfplumber)
  Downloading cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)
Downloading pdfplumber-0.11.9-py3-none-any.whl (60 kB)
Downloading pdfminer_six-20251230-py3-none-any.whl (6.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m11.2 MB/s[0m  [33m0:00:00[0mm0:00:01[0m00:01[0m
[?25hDownloading cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl (4.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m11.5 MB/s[0m  [33m0:00:00[0m eta [36m0:00:01[0m
[?25hD

# Libraries

In [19]:
import os
import json
import logging
import re
from collections import Counter

import pdfplumber
import numpy as np
import faiss
import requests

# Logger

In [20]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

## Data Preprocessing

### 2.1 Document Ingestion & Validation

In [21]:
"""
Load and validate a PDF file.
"""
def load_pdf(pdf_path):
    try:
        pdf = pdfplumber.open(pdf_path)
        if len(pdf.pages) == 0:
            raise ValueError("PDF contains no pages.")
        return pdf
    except Exception as e:
        raise RuntimeError(f"Failed to load PDF: {e}")

In [22]:
"""
Extract text page by page using layout-aware pdfplumber extraction.
"""
def extract_pages(pdf):
    pages = []

    for i, page in enumerate(pdf.pages):
        text = page.extract_text(
            x_tolerance=2,
            y_tolerance=2,
            layout=True
        )
        pages.append({
            "page_number": i + 1,
            "text": text or ""
        })

    return pages

In [23]:
"""
Identify pages with anomalously low text content.
"""
def validate_pages(pages, min_chars=50):
    valid_pages = []

    for page in pages:
        if len(page["text"].strip()) < min_chars:
            logging.warning(
                f"Low text content detected: {page['source']} page {page['page_number']}"
            )
            continue
        valid_pages.append(page)

    return valid_pages

### 2.2 Data Cleaning: (Headers, Footer, and Structural Noise Removal)

In [24]:
"""
Detect recurring header and footer lines using frequency analysis.
"""
#Headers and Footers
def detect_repeated_lines(pages, threshold=0.6):
    line_counts = Counter()
    total_pages = len(pages)

    for page in pages:
        lines = page["text"].splitlines()
        candidates = set(lines[:3] + lines[-3:])

        for line in candidates:
            line_counts[line.strip()] += 1

    return {
        line for line, count in line_counts.items()
        if count / total_pages >= threshold
    }

In [25]:
"""
Remove headers, footers, and repeated layout noise.
"""

#Structural noise
def remove_structural_noise(text: str, repeated_lines: set) -> str:
    cleaned_lines = []

    for line in text.splitlines():
        if line.strip() not in repeated_lines:
            cleaned_lines.append(line)

    return "\n".join(cleaned_lines)

In [26]:
"""
Remove title pages, copyright pages, and tables of contents.
"""

#Front-Matter and Non-Content Filtering
def drop_front_matter(pages,start_page: int = 4):
    return [
        page for page in pages
        if page["page_number"] >= start_page
    ]

### 2.3 Text Normalization

In [27]:
"""
Conservative normalization preserving nutrition-specific semantics.
"""
def normalize_text(text: str) -> str:
    text = re.sub(r"\s+", " ", text)
    return text.strip()


### 2.4 Metadata Preservation

In [9]:
"""
Extract embedded PDF metadata using pdfplumber.
"""
def extract_metadata(pdf, pdf_path: str) -> Dict:
    meta = pdf.metadata or {}

    return {
        "title": meta.get("Title"),
        "author": meta.get("Author"),
        "creator": meta.get("Creator"),
        "producer": meta.get("Producer"),
        "creation_date": meta.get("CreationDate"),
        "source_path": pdf_path,
        "total_pages": len(pdf.pages)
    }

NameError: name 'Dict' is not defined

### 2.5 End-to-End PDF Preprocessing Pipeline

In [12]:
pdf_path = "./nih_corpus/25353.pdf"

In [28]:
"""
Complete PDF preprocessing pipeline for nutrition-focused RAG.
"""
def preprocess_pdf(pdf_path):
    logging.info(f"Processing PDF: {pdf_path}")

    pdf = load_pdf(pdf_path)
    # metadata = extract_metadata(pdf, pdf_path)
    pages = extract_pages(pdf)
    
    validate_pages(pages)
    pages = drop_front_matter(pages)
    
    repeated_lines = detect_repeated_lines(pages)

    cleaned_pages = []
    for page in pages:
        cleaned_text = remove_structural_noise(
            page["text"],
            repeated_lines
        )
        normalized_text = normalize_text(cleaned_text)

        cleaned_pages.append({
            "page_number": page["page_number"],
            "text": normalized_text,
        })
    
    return cleaned_pages
    
    #     cleaned_pages.append({
    #         "page_number": page["page_number"],
    #         "text": normalized_text,
    #         #"metadata": metadata
    #     })
    # pdf.close()
    # return {
    #     "metadata": metadata,
    #     "pages": cleaned_pages
    # }

### 2.6 Run Data Preprocessing Logic

In [29]:
# Try with a file
pdf_path = "./dri_corpus/9956.pdf"
cleaned_pages = preprocess_pdf(pdf_path)

2026-01-15 06:11:55,978 - INFO - Processing PDF: ./dri_corpus/9956.pdf


In [30]:
cleaned_pages

[{'page_number': 4,
  'text': '“Knowing is not enough; we must apply. Willing is not enough; we must do.” —Goethe INSTITUTE OF MEDICINE Shaping the Future for Health'},
 {'page_number': 5,
  'text': 'National Academy of Sciences National Academy of Engineering Institute of Medicine National Research Council The National Academy of Sciences is a private, nonprofit, self-perpetuating society of distinguished scholars engaged in scientific and engineering research, dedicated to the furtherance of science and technology and to their use for the general welfare. Upon the authority of the charter granted to it by the Congress in 1863, the Academy has a mandate that requires it to advise the federal government on scientific and technical matters. Dr. Bruce M. Alberts is president of the National Academy of Sciences. The National Academy of Engineering was established in 1964, under the charter of the National Academy of Sciences, as a parallel organization of outstanding engineers. It is auto

## Data Indexing

### Text Chunking

In [31]:
# Combine Pages
def combine_pages(pages):
    combined_text = []

    for page in pages:
        combined_text.append(page["text"])

    return "\n\n".join(combined_text)

In [32]:
doc = combine_pages(cleaned_pages)

In [33]:
doc

'“Knowing is not enough; we must apply. Willing is not enough; we must do.” —Goethe INSTITUTE OF MEDICINE Shaping the Future for Health\n\nNational Academy of Sciences National Academy of Engineering Institute of Medicine National Research Council The National Academy of Sciences is a private, nonprofit, self-perpetuating society of distinguished scholars engaged in scientific and engineering research, dedicated to the furtherance of science and technology and to their use for the general welfare. Upon the authority of the charter granted to it by the Congress in 1863, the Academy has a mandate that requires it to advise the federal government on scientific and technical matters. Dr. Bruce M. Alberts is president of the National Academy of Sciences. The National Academy of Engineering was established in 1964, under the charter of the National Academy of Sciences, as a parallel organization of outstanding engineers. It is autonomous in its administration and in the selection of its memb

In [27]:
# Chunking with overlap

In [34]:
def split_text_units(text, max_chars=1200, overlap=150):
    units = []
    current = ""

    paragraphs = re.split(r"\n\s*\n", text)

    for para in paragraphs:
        if len(current) + len(para) <= max_chars:
            current += para + "\n\n"
        else:
            units.append(current.strip())

            # create overlap
            if overlap > 0:
                overlap_text = current[-overlap:]
            else:
                overlap_text = ""

            current = overlap_text + para + "\n\n"

    if current.strip():
        units.append(current.strip())

    return units

In [35]:
# Usage
chunks = split_text_units(doc)

print("Total units:", len(chunks))
print("First unit length:", len(chunks[0]))
print("Overlap preview:")
print(chunks[19])

Total units: 293
First unit length: 134
Overlap preview:
Upper Intake Level aWhen the AI for a nutrient is not based on mean intakes of healthy popu- lations, this assessment is made with less confidence.

5 SUMMARY the new DRI values. This report is aimed at meeting this need as well as providing the theoretical background and statistical justifica- tion for application of the DRIs in the area of dietary assessment. USING DRI TO ASSESS NUTRIENT INTAKES S OF INDIVIDUALS It can be appropriate to compare intakes of individuals with spe- cific Dietary Reference Intakes (DRIs), even though dietary intake data alone cannot be used to ascertain an individual’s nutritional status. Dietary assessment is one component of a nutritional status assessment, provided that accurate dietary intake data are collected, the correct DRI is selected for the assessment, and the results are interpreted appropriately. Ideally, intake data are combined with clinical, biochemical, and anthropometric information

### Vector Embeddings

In [67]:
#Ollama Embedding Function
def ollama_embed(text, model="nomic-embed-text:v1.5"):
    response = requests.post(
        "http://localhost:11434/api/embeddings",
        json={
            "model": model,
            "prompt": text
        },
        timeout=120
    )
    response.raise_for_status()
    return response.json()["embedding"]

In [59]:
# Build Index record for each embedded Text chunk

def build_records(text_units, source_path):
    return [
        {
            "content": unit,
            "metadata": {"source": source_path}
        }
        for unit in text_units
        if unit.strip()
    ]

In [64]:
def embed_records(records):
    embeddings = []
    valid_records = []

    print(f"[DEBUG] Records received: {len(records)}")

    for i, record in enumerate(records):
        text = record["content"].strip()

        if not text:
            continue

        if i == 0:
            print("[DEBUG] Sample text being embedded:")
            print(text[:500])

        try:
            vector = ollama_embed(text)
        except Exception as e:
            print("[DEBUG] Ollama embedding error:", e)
            continue

        if not vector:
            print("[DEBUG] Empty embedding returned")
            continue

        embeddings.append(vector)
        valid_records.append(record)

    print(f"[DEBUG] Embeddings generated: {len(embeddings)}")
    return valid_records, embeddings


In [61]:
def index_pdf(pdf_path):
    pages = preprocess_pdf(pdf_path)
    combined_text = combine_pages(pages)

    text_units = split_text_units(combined_text, max_chars=1200)

    records = build_records(text_units, source_path=pdf_path)
    records, embeddings = embed_records(records)

    return records, embeddings

In [62]:
def index_corpus(pdf_directory):
    all_records = []
    all_embeddings = []

    pdf_files = [
        os.path.join(pdf_directory, f)
        for f in os.listdir(pdf_directory)
        if f.lower().endswith(".pdf")
    ]

    for pdf_path in pdf_files:
        logging.info(f"Indexing {pdf_path}")
        records, embeddings = index_pdf(pdf_path)

        all_records.extend(records)
        all_embeddings.extend(embeddings)

    faiss_index = build_faiss_index(all_embeddings)

    logging.info(f"Corpus indexing complete")
    logging.info(f"Indexed {faiss_index.ntotal} chunks")

    return faiss_index, all_records

In [60]:
# FAISS index builder

def build_faiss_index(embeddings):
    if len(embeddings) == 0:
        raise ValueError("No embeddings generated — FAISS index cannot be built")

    dim = len(embeddings[0])
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings, dtype="float32"))
    return index


In [65]:
records, embeddings = index_pdf("./dri_corpus/5776.pdf")
print(len(embeddings))

2026-01-15 07:13:52,018 - INFO - Processing PDF: ./dri_corpus/5776.pdf


[DEBUG] Records received: 444
[DEBUG] Sample text being embedded:
STANDING COMMITTEE ON THE SCIENTIFIC EVALUATION OF DIETARY REFERENCE INTAKES VERNON R. YOUNG (Chair),*† Laboratory of Human Nutrition, School of Science, Massachusetts Institute of Technology, Cambridge JOHN W. ERDMAN, JR. (Vice Chair), Division of Nutritional Sciences, College of Agricultural, Consumer and Environmental Sciences, University of Illinois, Urbana-Champaign JANET C. KING (Vice-Chair),* U.S. Department of Agriculture Western Human Nutrition Research Center, Presidio of San Francisco
[DEBUG] Embeddings generated: 444
444


In [68]:
faiss_index, records = index_corpus("./dri_corpus")
print("Total indexed chunks:", faiss_index.ntotal)

2026-01-15 07:21:41,267 - INFO - Indexing ./dri_corpus/5776.pdf
2026-01-15 07:21:41,268 - INFO - Processing PDF: ./dri_corpus/5776.pdf


[DEBUG] Records received: 444
[DEBUG] Sample text being embedded:
STANDING COMMITTEE ON THE SCIENTIFIC EVALUATION OF DIETARY REFERENCE INTAKES VERNON R. YOUNG (Chair),*† Laboratory of Human Nutrition, School of Science, Massachusetts Institute of Technology, Cambridge JOHN W. ERDMAN, JR. (Vice Chair), Division of Nutritional Sciences, College of Agricultural, Consumer and Environmental Sciences, University of Illinois, Urbana-Champaign JANET C. KING (Vice-Chair),* U.S. Department of Agriculture Western Human Nutrition Research Center, Presidio of San Francisco


2026-01-15 07:22:21,971 - INFO - Indexing ./dri_corpus/10026.pdf
2026-01-15 07:22:21,971 - INFO - Processing PDF: ./dri_corpus/10026.pdf


[DEBUG] Embeddings generated: 444
[DEBUG] Records received: 792
[DEBUG] Sample text being embedded:
“Knowing is not enough; we must apply. Willing is not enough; we must do.” —Goethe INSTITUTE OF MEDICINE Shaping the Future for Health


2026-01-15 07:23:29,980 - INFO - Indexing ./dri_corpus/10490.pdf
2026-01-15 07:23:29,981 - INFO - Processing PDF: ./dri_corpus/10490.pdf


[DEBUG] Embeddings generated: 792
[DEBUG] Records received: 1345
[DEBUG] Sample text being embedded:
“Knowing is not enough; we must apply. Willing is not enough; we must do.” —Goethe Advising the Nation. Improving Health.


2026-01-15 07:25:24,988 - INFO - Indexing ./dri_corpus/9956.pdf
2026-01-15 07:25:24,989 - INFO - Processing PDF: ./dri_corpus/9956.pdf


[DEBUG] Embeddings generated: 1345
[DEBUG] Records received: 293
[DEBUG] Sample text being embedded:
“Knowing is not enough; we must apply. Willing is not enough; we must do.” —Goethe INSTITUTE OF MEDICINE Shaping the Future for Health


2026-01-15 07:25:51,598 - INFO - Indexing ./dri_corpus/6015.pdf
2026-01-15 07:25:51,599 - INFO - Processing PDF: ./dri_corpus/6015.pdf


[DEBUG] Embeddings generated: 293
[DEBUG] Records received: 582
[DEBUG] Sample text being embedded:
“Knowing is not enough; we must apply. Willing is not enough; we must do.” —Goethe INSTITUTE OF MEDICINE Shaping the Future for Health


2026-01-15 07:26:41,362 - INFO - Indexing ./dri_corpus/13050.pdf
2026-01-15 07:26:41,363 - INFO - Processing PDF: ./dri_corpus/13050.pdf


[DEBUG] Embeddings generated: 582
[DEBUG] Records received: 1096
[DEBUG] Sample text being embedded:
“Knowing is not enough; we must apply. Willing is not enough; we must do.” —Goethe Advising the Nation. Improving Health.


2026-01-15 07:29:31,253 - INFO - Indexing ./dri_corpus/25353.pdf
2026-01-15 07:29:31,254 - INFO - Processing PDF: ./dri_corpus/25353.pdf


[DEBUG] Embeddings generated: 1096
[DEBUG] Records received: 571
[DEBUG] Sample text being embedded:
The National Academy of Sciences was established in 1863 by an Act of Congress, signed by President Lincoln, as a private, nongovernmental institu- tion to advise the nation on issues related to science and technology. Members are elected by their peers for outstanding contributions to research. Dr. Marcia McNutt is president. The National Academy of Engineering was established in 1964 under the char- ter of the National Academy of Sciences to bring the practices of engineering to advising the n


2026-01-15 07:30:30,710 - INFO - Indexing ./dri_corpus/10925.pdf
2026-01-15 07:30:30,711 - INFO - Processing PDF: ./dri_corpus/10925.pdf


[DEBUG] Embeddings generated: 571
[DEBUG] Records received: 630
[DEBUG] Sample text being embedded:
“Knowing is not enough; we must apply. Willing is not enough; we must do.” —Goethe Adviser to the Nation to Improve Health


2026-01-15 07:31:24,068 - INFO - Indexing ./dri_corpus/9810.pdf
2026-01-15 07:31:24,068 - INFO - Processing PDF: ./dri_corpus/9810.pdf


[DEBUG] Embeddings generated: 630
[DEBUG] Records received: 523
[DEBUG] Sample text being embedded:
NATIONAL ACADEMY PRESS • 2101 Constitution Avenue, N.W. • Washington, DC 20418 NOTICE: The project that is the subject of this report was approved by the Governing Board of the National Research Council, whose members are drawn from the councils of the Na- tional Academy of Sciences, the National Academy of Engineering, and the Institute of Med- icine. The members of the committee responsible for the report were chosen for their spe- cial competences and with regard for appropriate balance. This


2026-01-15 07:32:08,926 - INFO - Corpus indexing complete
2026-01-15 07:32:08,927 - INFO - Indexed 6276 chunks


[DEBUG] Embeddings generated: 523
Total indexed chunks: 6276


### Save Vector Store

In [53]:
def save_vector_store(faiss_index, records, output_dir="dri_vector_db"):
    os.makedirs(output_dir, exist_ok=True)

    faiss.write_index(faiss_index, f"{output_dir}/faiss.index")

    with open(f"{output_dir}/records.json", "w", encoding="utf-8") as f:
        json.dump(records, f, indent=2, ensure_ascii=False)

    logging.info(f"Vector store saved to '{output_dir}'")

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


In [57]:
# Usage
save_vector_store(faiss_index, records)

### Load the Vector Database

In [73]:
import pickle

In [69]:
def save_vector_store(faiss_index, records, output_dir="dri_vector_db", embedding_model="nomic-embed-text:v1.5"):
    os.makedirs(output_dir, exist_ok=True)

    # Save FAISS index
    faiss.write_index(faiss_index, os.path.join(output_dir, "faiss.index"))

    # Save records (pickle)
    with open(os.path.join(output_dir, "records.pkl"), "wb") as f:
        pickle.dump(records, f)

    # Save metadata (JSON for transparency)
    metadata = {
        "embedding_model": embedding_model,
        "num_vectors": faiss_index.ntotal,
        "index_type": type(faiss_index).__name__
    }

    with open(os.path.join(output_dir, "index_metadata.json"), "w") as f:
        json.dump(metadata, f, indent=2)

    logging.info(f"Vector store saved to '{output_dir}'")

In [70]:
def load_vector_store(input_dir="dri_vector_db"):
    faiss_index = faiss.read_index(os.path.join(input_dir, "faiss.index"))

    with open(os.path.join(input_dir, "records.pkl"), "rb") as f:
        records = pickle.load(f)

    with open(os.path.join(input_dir, "index_metadata.json"), "r") as f:
        metadata = json.load(f)

    return faiss_index, records, metadata

In [71]:
# Save Vector DB

In [74]:
save_vector_store(
    faiss_index=faiss_index,
    records=records,
    output_dir="dri_vector_db",
    embedding_model="nomic-embed-text:v1.5"
)

2026-01-15 07:41:38,154 - INFO - Vector store saved to 'dri_vector_db'


In [75]:
# Load Vector DB

In [76]:
faiss_index, records, metadata = load_vector_store("dri_vector_db")

print("Loaded vectors:", faiss_index.ntotal)
print("Embedding model:", metadata["embedding_model"])

Loaded vectors: 6276
Embedding model: nomic-embed-text:v1.5


## Retrieval

### 4.1 Query Embedding & Similarity-Based Retrieval

In [77]:
def retrieve(query, faiss_index, records, top_k=5):
    query_embedding = ollama_embed(query)
    query_vec = np.array([query_embedding]).astype("float32")

    distances, indices = faiss_index.search(query_vec, top_k)

    retrieved_records = []
    for idx in indices[0]:
        retrieved_records.append(records[idx])

    return retrieved_records

### 4.2 Example Retrieval Usage

In [78]:
results = retrieve(
    query="What is the recommended dietary allowance for calcium in adults?",
    faiss_index=faiss_index,
    records=records,
    top_k=5
)

for i, r in enumerate(results, 1):
    print(f"\n--- Result {i} ---")
    print("Source:", r["metadata"]["source"])
    print(r["content"][:400], "...")



--- Result 1 ---
Source: ./dri_corpus/13050.pdf
s=2#b3 (accessed July 23, 2010). 15Available online at http://www.statcan.gc.ca/pub/82-623-x/2010002/part-partie1-eng. htm (accessed July 23, 2010).

DIETARY INTAKE ASSESSMENT 463 Recommended Dietary Allowance (RDA), because this approach will lead to estimates of inadequacy that are too large. Based on the 2000 IOM report cited above (IOM, 2000), whenever pos- sible, the assessment of apparent di ...

--- Result 2 ---
Source: ./dri_corpus/5776.pdf
ol)/day (from diet and supplements) is recommended for children due to the lack of data. UL for Children 1 through 18 years 2,500 mg (62.5 mmol)/day

142 DIETARY REFERENCE INTAKES Older Adults: Ages > 70 Years Several physiologic differences in older adults need to be consid- ered in setting the UL for people over age 70. Because this popula- tion is more likely to have achlorhydria (Recker, 1985) ...

--- Result 3 ---
Source: ./dri_corpus/13050.pdf
nto account, but women 51 to 70 years of age

## Generation

### 5.1 Prompt Design

In [79]:
def build_prompt(query, retrieved_docs):
    context_blocks = []

    for i, doc in enumerate(retrieved_docs, start=1):
        block = (
            f"[Source {i}]\n"
            f"Document: {doc['metadata']['source']}\n"
            f"Text:\n{doc['content']}\n"
        )
        context_blocks.append(block)

    context = "\n\n".join(context_blocks)

    prompt = f"""
You are a nutrition expert answering questions using ONLY the sources provided.

Rules:
- Use only the information from the sources below.
- Cite sources using [Source X] notation.
- If the answer is not found in the sources, say:
  "The provided documents do not contain sufficient information to answer this question."

Sources:
{context}

Question:
{query}

Answer:
"""
    return prompt


The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


### 5.2 Ollama Generation

In [82]:
def ollama_generate_grounded(prompt, model="deepseek-r1:8b"):
    response = requests.post(
        "http://localhost:11434/api/generate",
        json={
            "model": model,
            "prompt": prompt,
            "stream": False
        },
        timeout=300
    )
    response.raise_for_status()
    return response.json()["response"]

### 5.3 Q&A Pipeline

In [85]:
def answer_with_citations(
    query,
    faiss_index,
    records,
    top_k=5,
    llm_model="deepseek-r1:8b"
):
    # Step 1: Retrieve
    retrieved_docs = retrieve(query, faiss_index, records, top_k=top_k)

    # Step 2: Build prompt
    prompt = build_prompt(query, retrieved_docs)

    # Step 3: Generate grounded answer
    answer = ollama_generate_grounded(prompt, model=llm_model)

    return answer, retrieved_docs


### 5.4 Example Usage (DRI-Specific)

In [86]:
query = "What is the recommended dietary allowance for calcium in adults aged 19–50?"

answer, sources = answer_with_citations(
    query=query,
    faiss_index=faiss_index,
    records=records,
    top_k=5
)

print("\n=== Answer ===\n")
print(answer)

print("\n=== Sources Used ===\n")
for i, doc in enumerate(sources, 1):
    print(f"[Source {i}] {doc['metadata']['source']}")


=== Answer ===

The recommended dietary allowance (RDA) for calcium for adults aged 19–50 years is **1,000 mg/day**.

This information is found in Table 7-1 of the Dietary Reference Intakes for Calcium and Vitamin D in Source 4 [Source 4].

=== Sources Used ===

[Source 1] ./dri_corpus/5776.pdf
[Source 2] ./dri_corpus/13050.pdf
[Source 3] ./dri_corpus/13050.pdf
[Source 4] ./dri_corpus/13050.pdf
[Source 5] ./dri_corpus/13050.pdf


In [9]:
from langchain_core.output_parsers import StrOutputParser #Final output as a string
from langchain_core.runnables import Runnable, RunnablePassthrough #To parse Query & Context directly to the LLM 
from langchain_core.prompts import ChatPromptTemplate #To pass prompt data & context

from langchain_ollama import ChatOllama

In [10]:
import langchain
print(langchain.__version__)

1.0.5


In [1]:
model = ChatOllama(model="llama3.1", base_url="http://localhost:11434")
model.invoke("hi")

NameError: name 'ChatOllama' is not defined

In [42]:
# Prompt Template for RAG
from langsmith import Client
client = Client(api_key=LANGCHAIN_API_KEY)
prompt = client.pull_prompt("rlm/rag-prompt", include_model=True)

In [103]:
# Prompt Template Using 'rlm/rag-prompt'

prompt = """
    You are a helpful nutritionist in a elderly care facility. 
    Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    
    Question: {question} 
    Context: {context} 
    Answer:
"""

# prompt = """
#     You are a helpful nutritionist in a elderly care facility. 
#     Use the following pieces of retrieved context to answer the question.
#     Prioritize the first 5 retrieved context.
#     If you don't know the answer, just say that you don't know.
    
#     Question: {question} 
#     Context: {context} 
#     Answer:
# """


prompt = ChatPromptTemplate.from_template(prompt)

In [68]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# print(format_docs(docs))

In [95]:
rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [53]:
question = "Can you give me the maximum amount of carbohydrates a patient with diabetes can take daily?"
output = rag_chain.invoke(question)
print(output)

According to the Dietary Reference Intakes, there is no specific maximum amount of carbohydrates recommended for patients with diabetes. However, it does provide some guidelines on carbohydrate requirements.

The estimated average requirement for carbohydrates in an energy-adequate diet is approximately 87 (117 – 30) to 112 (142 – 30) g/d. This suggests that a daily intake of around 100-120 grams of carbohydrates may be sufficient for most individuals, including those with diabetes.

It's also worth noting that the focus should not just be on total carbohydrate intake, but also on the glycemic index and quality of carbohydrates consumed. A balanced diet that includes complex carbohydrates such as whole grains, fruits, and vegetables can help regulate blood sugar levels and improve overall health outcomes for individuals with diabetes.


In [54]:
question = "A patient named Ronny Chen is a 73 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis and some suggestions or recommendations?"
output = rag_chain.invoke(question)
print(output)

Based on the information provided, I'll analyze Ronny Chen's meal and provide some suggestions.

**Analysis:**

Ronny Chen is a 73-year-old man with diabetes. His meal consists of:

* 320 grams of stir-fried seasonal vegetables
* 220 grams of cabbage soup

Both of these options are high in fiber and water content, which can be beneficial for people with diabetes as they can help regulate blood sugar levels.

However, there is no information on the macronutrient breakdown (carbohydrates, protein, fat) or the glycemic index of the stir-fried vegetables. The cabbage soup may also contain a significant amount of carbohydrates, depending on the ingredients and portion size.

**Suggestions:**

Considering Ronny Chen's diabetes diagnosis, I would recommend:

1. **Balancing macronutrients:** Ensure that his meal is balanced with a mix of protein (from meat, fish, or legumes), healthy fats (e.g., nuts, seeds, avocados), and complex carbohydrates (whole grains, vegetables).
2. **Glycemic index a

In [98]:
question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of 炒時蔬 stir-fried seasonal vegetables and 220 grams of 高麗菜粥 cabbage soup for his meal. Can you provide an analysis and some healthy suggestions or recommendations?"

output = rag_chain.invoke(question)
print(output)

Based on the provided text, I will analyze Roni's meal plan to determine if it is suitable for his needs.

However, since you didn't mention Roni or his meal plan, I'll assume that we're analyzing the general recommendations provided in the text. Here's a summary of the key points:

1.  **Relationships between macronutrient intake and chronic disease**: The relationship between macronutrient intake and risk of chronic diseases like coronary heart disease (CHD), obesity, cancer, and diabetes is complex and not well understood.
2.  **Challenges in providing nutritional guidance**: Modifying the diet for one energy-yielding nutrient often changes the intake of other nutrients, making it difficult to provide clear and specific nutritional guidance.
3.  **Estimated Acceptable Macronutrient Distribution Ranges (AMDR)**: AMDR can be estimated by considering the risk of chronic disease as well as ensuring adequate amounts of essential macronutrients and micronutrients.

Now, let's consider a g

In [101]:
question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis and some healthy suggestions or recommendations?"

output = rag_chain.invoke(question)
print(output)

Based on the provided text, here's an analysis of Ron Finley's diet and exercise habits:

Ron Finley is a 55-year-old man who consumes a high-glycemic index (GI) diet. He eats foods such as white bread, sugary drinks, and processed snacks. This type of diet has been linked to various health problems, including obesity, diabetes, and heart disease.

While the text doesn't provide specific information about Ron's exercise habits, it does mention that some studies have shown a link between physical activity and improved insulin sensitivity. Therefore, we can infer that regular exercise may be beneficial for Ron in terms of improving his glucose metabolism and reducing his risk of chronic diseases.

Based on this analysis, here are some potential concerns with Ron Finley's diet:

* High-glycemic index foods: Consuming high-GI foods like white bread and sugary drinks can lead to a rapid spike in blood sugar levels, which may increase the risk of insulin resistance and type 2 diabetes.
* Low

In [110]:
question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis, suggestions, or recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the information provided, I can analyze Ronny Chen's meal.

Ronny Chen is a 76-year-old man with diabetes who has consumed 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. 

The key point here is that both stir-fried seasonal vegetables and cabbage soup are considered low-glycemic index foods, which means they may cause a gradual increase in blood sugar levels rather than a sharp spike. This can be beneficial for individuals with diabetes.

However, without knowing the exact portion sizes or the type of complex carbohydrates Ronny has consumed (e.g., whole grains), it's difficult to assess whether his carbohydrate intake is sufficient or excessive.

That being said, I would recommend the following suggestions:

1.  Ensure that Ronny's meal includes a balanced mix of protein sources, healthy fats, and complex carbohydrates to maintain stable blood sugar levels.
2.  Encourage Ronny to consume more fiber-rich foods, such as fruits, legumes, 

In [112]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs = {'k': 30, 
                                                                          'fetch_k': 200,
                                                                          'lambda_mult': 1})

question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis, suggestions, or recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the information provided, it seems that Ronny Chen's meal is quite balanced and healthy. He has a large serving of stir-fried seasonal vegetables (320 grams) and cabbage soup (220 grams), which are both rich in fiber, vitamins, and minerals.

However, as a 76-year-old man with diabetes, Ronny may need to be mindful of his carbohydrate intake, especially if he is consuming high-glycemic index foods. A large serving of vegetables like cabbage can contribute a significant amount of carbohydrates to the meal.

It would be beneficial for Ronny to know the glycemic index of the specific vegetables and cabbage soup he is consuming, as this will help him understand how they will affect his blood sugar levels. Additionally, it may be helpful for him to balance out his meal with some protein and healthy fats to prevent a spike in insulin levels.

As a nutritionist, I would suggest that Ronny's caregiver or healthcare provider consider the following:

* Encourage Ronny to eat more slowly

In [114]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs = {'k': 40, 
                                                                          'fetch_k': 200,
                                                                          'lambda_mult': 1})

question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis, suggestions, or recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the information provided, I can offer some analysis and suggestions for Ronny Chen's meal.

Firstly, it's great to see that Ronny is having a generous portion of stir-fried seasonal vegetables (320g) and cabbage soup (220g). These foods are excellent sources of fiber, vitamins, and minerals. Stir-frying is also a good cooking method as it helps retain the nutrients in the vegetables.

However, I don't have specific information on how these food items will affect Ronny's diabetes management or insulin sensitivity. A low-glycemic index diet may be beneficial for managing blood sugar levels, but I don't know if this meal plan would fall into that category.

It's also unclear what type of carbohydrates and fats are present in the cabbage soup and stir-fried vegetables. If the soup is made with high-starch ingredients like potatoes or corn, it might have a higher glycemic index. On the other hand, if the soup is made with low-starch ingredients like vegetables and broth, it might b

In [115]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs = {'k': 50, 
                                                                          'fetch_k': 200,
                                                                          'lambda_mult': 1})

question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis, suggestions, or recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the information provided, it appears that Ronny Chen's meal consists of 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup.

As a nutritionist in an elderly care facility, I would analyze this meal as follows:

* The high fiber content from the cabbage soup may help to regulate blood sugar levels and support digestive health.
* The stir-fried seasonal vegetables are likely rich in vitamins, minerals, and antioxidants, which can provide essential nutrients for overall health.

However, I do not know if Ronny Chen's diabetes management is adequately addressed with this meal. A meal plan for a person with diabetes should take into account their individual nutritional needs and goals. 

Therefore, it would be beneficial to review Ronny Chen's medication regimen and blood glucose monitoring results to ensure that his meal plan is aligned with his health objectives.

As an additional suggestion, I would recommend considering the glycemic index of the vegetable

In [116]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs = {'k': 40, 
                                                                          'fetch_k': 300,
                                                                          'lambda_mult': 1})

question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis, suggestions, or recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the information provided, I can analyze Ronny Chen's meal as follows:

Ronny Chen is a 76-year-old man with diabetes who has consumed 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. 

While the exact nutrient content of these foods is not specified, it is clear that both dishes are high in fiber and water content, which can help slow down glucose absorption and reduce the glycemic index (GI) of the meal.

As a person with diabetes, Ronny Chen would benefit from consuming low to moderate GI foods to manage his blood sugar levels. The GI is a measure of how quickly a food raises blood sugar levels after eating. Foods with a lower GI value are digested more slowly and may cause a gradual increase in blood glucose.

The stir-fried seasonal vegetables and cabbage soup appear to be good choices for Ronny Chen's meal, as they likely have a low to moderate GI value due to their high fiber and water content. However, without knowing the specific 

In [11]:
model = ChatOllama(model="llama3.2", base_url="http://localhost:11434")

In [67]:
# Prompt Template Using 'rlm/rag-prompt'
prompt = """
    You are a helpful and professional nutritionist in a elderly care facility or hospice. 
    Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    
    Question: {question} 
    Context: {context} 
    Answer:
"""
prompt = ChatPromptTemplate.from_template(prompt)

In [60]:
retriever = new_vector_store.as_retriever(search_type="mmr", search_kwargs = {'k': 10, 
                                                                          'fetch_k': 100,
                                                                          'lambda_mult': 1})

In [21]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

In [61]:
rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [54]:
question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis, suggestions, or recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the provided context, I can analyze Ronny Chen's meal and provide some suggestions.

Firstly, I would like to note that the total energy intake from the stir-fried seasonal vegetables (320g) and cabbage soup (220g) is not explicitly mentioned. However, we can estimate the approximate energy content of each meal:

* Stir-fried seasonal vegetables: approximately 50-100 kcal/g, depending on the specific vegetables and cooking method.
* Cabbage soup: approximately 10-20 kcal/g.

Assuming an average energy content of 75 kcal/g for the stir-fried vegetables and 15 kcal/g for the cabbage soup, we can estimate the total energy intake:

* Stir-fried seasonal vegetables: 320g x 75 kcal/g = 24,000 kcal
* Cabbage soup: 220g x 15 kcal/g = 3,300 kcal

Total estimated energy intake: approximately 27,300 kcal

This is significantly higher than the recommended daily energy intake for a 76-year-old man with diabetes. According to the Dietary Reference Intakes (DRI), the recommended daily energy

In [57]:
question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis, suggestions, or recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the context provided, I can offer some analysis and suggestions for Ronny Chen's meal. However, please note that I'll need to make some assumptions since specific guidelines or data sources are not directly mentioned.

Ronny Chen is a 76-year-old man with diabetes, consuming 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup. Let's examine the nutritional content:

1. **Carbohydrates**: The total carbohydrate intake from both meals appears to be relatively high (320g + 220g = 540g). While some carbohydrates are naturally occurring in vegetables, a significant portion may come from the cabbage soup.
2. **Fiber**: Vegetables like stir-fried seasonal vegetables contain fiber, which can help regulate blood sugar levels and promote satiety. However, the exact amount of fiber in each serving is not specified.
3. **Protein**: I couldn't find any information on protein content in Ronny's meals. It's essential to consider protein intake, as it plays a role in glu

In [71]:
# Prompt Template Using 'rlm/rag-prompt'
prompt = """
    You are a helpful nutritionist in a elderly care facility in Taiwan. 
    Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    
    Question: {question} 
    Context: {context} 
    Answer:
"""
prompt = ChatPromptTemplate.from_template(prompt)


retriever = new_vector_store.as_retriever(search_type="mmr", search_kwargs = {'k':10, 
                                                                          'fetch_k': 100,
                                                                          'lambda_mult': 1})
rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide an analysis, suggestions, or recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Thank you for providing the context. Based on the information available, I can provide some analysis and recommendations for Ronny Chen's meal.

Firstly, it is essential to note that 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup are relatively high in carbohydrates, which may contribute to an increased risk of hyperinsulinemia and glucose intolerance. As a person with diabetes, it is crucial to monitor carbohydrate intake to prevent excessive spikes in blood sugar levels.

The glycemic index (GI) of the meal cannot be determined without specific data on the individual ingredients, but it is likely that the high carb content will cause a rapid increase in blood glucose levels, followed by an insulin response. This could potentially exacerbate diabetes-related complications.

Considering the context provided, I would recommend the following:

1. Encourage Ronny Chen to explore low-GI options for his meals, such as incorporating more protein-rich foods and vege

In [77]:
# Prompt Template Using 'rlm/rag-prompt'
prompt = """
    You are a helpful nutritionist in a elderly care facility in Taiwan. 
    Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    
    Question: {question} 
    Context: {context} 
    Answer:
"""
prompt = ChatPromptTemplate.from_template(prompt)


retriever = new_vector_store.as_retriever(search_type="mmr", search_kwargs = {'k':15, 
                                                                          'fetch_k': 100,
                                                                          'lambda_mult': 1})
rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

question = "A patient named Ronny Chen is a 76 year old man with diabetes. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide (1) an analysis, (2) suggestions, and (3) recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the context provided, I'll analyze, suggest, and recommend for Ronny Chen's meal.

Analysis:

Ronny Chen's meal consists of 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup. This provides a good amount of fiber, vitamins, and minerals from the vegetables, which can help support overall health. However, I couldn't find specific information on the glycemic index (GI) of these specific foods. Without this data, it's challenging to determine if Ronny's meal is suitable for managing his diabetes.

Suggestions:

1.  Assess the GI of each food component: To better understand how Ronny's meal will affect his blood sugar levels, it would be helpful to know the GI of each food component. This information can help you calculate the meal's overall glycemic impact.
2.  Consider the portion sizes: While Ronny's meal seems substantial, I couldn't find specific guidelines on optimal portion sizes for stir-fried vegetables and cabbage soup. Be mindful of his daily calo

In [78]:
# Prompt Template Using 'rlm/rag-prompt'
prompt = """
    You are a helpful nutritionist in a elderly care facility in Taiwan. 
    Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    
    Question: {question} 
    Context: {context} 
    Answer:
"""
prompt = ChatPromptTemplate.from_template(prompt)


retriever = new_vector_store.as_retriever(search_type="mmr", search_kwargs = {'k':15, 
                                                                          'fetch_k': 100,
                                                                          'lambda_mult': 1})
rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

question = "A patient named Ronny Chen is a 76 year old man. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Can you provide (1) an analysis, (2) suggestions, and (3) recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the provided context, I'll analyze Ronny Chen's meal and provide suggestions and recommendations.

Analysis:

Ronny Chen is a 76-year-old man who consumes 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. The total energy content of this meal is not explicitly mentioned in the provided text. However, we can estimate the approximate energy intake based on the macronutrient composition of the food items.

Stir-fried seasonal vegetables typically contain a mix of carbohydrates (mainly from vegetables like bell peppers, carrots, and broccoli), fiber, vitamins, and minerals. Assuming an average carbohydrate content of 10-15 grams per serving, Ronny's vegetable portion might contribute around 320 grams x 12.5 grams/gram = 4000 kcal.

Cabbage soup is a rich source of carbohydrates (from the cabbage itself) and water content, with approximately 80-100 calories per 100g serving. Based on this, the energy contribution from Ronny's cabbage soup would

In [96]:
# Prompt Template Using 'rlm/rag-prompt'
prompt = """
    You are a helpful nutritionist in a elderly care facility in Taiwan. 
    Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know.
    
    Question: {question} 
    Context: {context} 
    Answer:
"""
prompt = ChatPromptTemplate.from_template(prompt)


retriever = new_vector_store.as_retriever(search_type="mmr", search_kwargs = {'k':10, 
                                                                          'fetch_k': 120,
                                                                          'lambda_mult': 1})
rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

question = "A patient named Ronny Chen is a 76 year old man with diabetes. He weighs at 60kg and is 5'10''. He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. Create an assessment based on Dietary Reference Intakes (DRIs) and can you provide (1) an analysis, (2) suggestions, and (3) recommendations based on that information?"

output = rag_chain.invoke(question)
print(output)

Based on the provided context, I will analyze Ronny Chen's meal and provide suggestions and recommendations.

**Analysis:**

1. Energy: To determine if Ronny Chen is maintaining weight, we need to calculate his total energy intake. The stir-fried seasonal vegetables have an approximate energy density of 25 kcal/100g (assuming a mix of various vegetables). Let's assume the total energy from these vegetables is approximately 320 x 25 = 8000 kcal.
The cabbage soup has an approximate energy density of 40 kcal/100g. Assuming the total energy from this soup is approximately 220 x 40 = 8800 kcal.
Total energy intake: 8000 + 8800 = 16800 kcal
2. Carbohydrates: The stir-fried seasonal vegetables have a moderate carbohydrate content (assuming 5-10% of dry weight). Let's assume the total carbohydrate content from these vegetables is approximately 320 x 0.075 = 24g.
The cabbage soup has a higher carbohydrate content (assuming 20% of dry weight). Let's assume the total carbohydrate content from thi

AttributeError: 'tuple' object has no attribute 'page_content'

In [12]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
You are a professional clinical nutritionist specializing in elderly care in Taiwan.

Use the retrieved context below to support your reasoning.
- If context provides relevant data, use it directly.
- If it lacks exact values, give only safe, general suggestions.
- Do NOT make up numbers or conversions.
- Be concise, clear, and compassionate.

Question:
{question}

Context:
{context}

Please provide:
1. **Analysis** — summarize the nutritional content and adequacy.
2. **Suggestions** — what can be improved or balanced.
3. **Recommendations** — practical next steps for elderly dietary care.
""")

retriever = new_vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={
        "k": 8,          # fewer but higher-quality chunks
        "fetch_k": 50,   # how many to initially fetch before filtering
        "lambda_mult": 0.7  # balance between diversity and relevance
    }
)

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

question = (
    "A patient named Ronny Chen is a 76-year-old man. "
    "He is having 320 grams of stir-fried seasonal vegetables and "
    "220 grams of cabbage soup for his meal. "
    "Can you provide (1) an analysis, (2) suggestions, and (3) recommendations?"
)

output = rag_chain.invoke(question)
print(output)

I'd like to help you with analyzing Ronny Chen's meal and providing suggestions and recommendations for his elderly dietary care.

1. Analysis:
Ronny Chen's meal consists of 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup. Here's a rough breakdown of the nutritional content:

- Calories: Approximately 800-1000
- Protein: 20-25g (from vegetables and cabbage)
- Fat: 5-7g (mostly from the vegetable oil used in stir-frying)
- Carbohydrates: 60-70g (mainly from cabbage, with some from the vegetables)
- Fiber: 8-10g
- Vitamins and minerals: Rich in vitamins A and C, potassium, and various B vitamins from the vegetables.

2. Suggestions:
- Balancing carbohydrate intake: Ronny's meal is relatively high in carbohydrates due to the cabbage soup. Consider balancing this with protein and healthy fats to maintain a stable blood sugar level.
- Adding variety: While the seasonal vegetables are nutritious, incorporating other colorful vegetables can provide a broader range of

In [16]:
question = (
    "A patient named Ronny Chen is a 76-year-old man. "
    "He is having 320 grams of rice and 500mg of braise pork"
    "220 grams of cabbage soup for his meal. "
    "Can you provide (1) an analysis, (2) suggestions, and (3) recommendations?"
)

output = rag_chain.invoke(question)
print(output)

As a clinical nutritionist specializing in elderly care in Taiwan, I will analyze Ronny Chen's meal, provide suggestions for improvement, and offer recommendations for his dietary care.

**Analysis**

Ronny Chen's meal consists of 320 grams of rice, 500mg of braise pork, and 220 grams of cabbage soup. According to the Dietary Reference Intakes (DRIs), the recommended daily intake of carbohydrates is 45-65% of total daily energy. With a meal size of 800 calories, Ronny's carbohydrate intake exceeds this range.

The American Heart Association recommends limiting dietary saturated fat to no more than 5-6% of total daily energy. Braise pork contains significant amounts of saturated fat (approximately 30g per 100g serving). The recommended daily intake of protein is 0.8-1.2 grams per kilogram body weight, which translates to approximately 50-60 grams for a 76-year-old man.

The cabbage soup provides some essential vitamins and minerals, but the amount may not be sufficient to meet Ronny's d

In [13]:
model = ChatOllama(model="deepseek-r1:8b", base_url="http://localhost:11434")

In [25]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
You are a professional clinical nutritionist specializing in elderly care in Taiwan.

Use the retrieved context below to support your reasoning.
- If context provides relevant data, use it directly.
- If it lacks exact values, give only safe, general suggestions.
- Do NOT make up numbers or conversions.
- Be concise, clear, and compassionate.
- Provide English and  the equivalent Chinese (Traditional) translation

Question:
{question}

Context:
{context}

Please provide:
1. **Analysis** — summarize the nutritional content and adequacy.
2. **Suggestions** — what can be improved or balanced.
3. **Recommendations** — practical next steps for elderly dietary care.
""")

retriever = new_vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={
        "k": 10,          # fewer but higher-quality chunks
        "fetch_k": 50,   # how many to initially fetch before filtering
        "lambda_mult": 1  # balance between diversity and relevance
    }
)

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

question = (
    "A patient named Ronny Chen is a 76-year-old man"
    "He is 175cms tall and weighs 71 kilos."
    "He is having 320 grams of stir-fried seasonal vegetables and 220 grams of cabbage soup for his meal. "
    "Can you provide (1) an analysis, (2) suggestions, and (3) recommendations?"
)

output = rag_chain.invoke(question)
print(output)

Okay, let's analyze Mr. Chen's situation based on the provided meal information and the principles outlined in the Dietary Reference Intakes (DRIs).

**Please note:** The analysis below is based on the meal provided (320g stir-fried seasonal vegetables + 220g cabbage soup) and general DRI principles, as specific nutrient composition data for these foods is not provided. A precise analysis requires detailed nutritional information or a food diary assessment.

---

**1. Analysis**

*   **Meal Composition:** The meal consists primarily of vegetables (both fresh and likely liquid). Vegetables are a crucial part of a healthy diet, providing essential vitamins, minerals, fiber, and phytonutrients.
*   **Nutrient Focus (Based on Food Type):** This meal is likely low in energy-dense foods like grains, fruits (though vegetables contain some), fats, and, most notably for an elderly individual, potentially adequate or insufficient protein, depending on the *type* of seasonal vegetables and the pr