In [2]:
!pip3 install google-generativeai python-dotenv pypdf pillow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
!pip3 freeze > requirements.txt

In [4]:
import os
from dotenv import load_dotenv
from pypdf import PdfReader

In [5]:
load_dotenv(override=True)

gemini_api_key = os.getenv("GOOGLE_API_KEY")

if not gemini_api_key:
    raise ValueError("GOOGLE_API_KEY is not set in the environment variables.")

In [6]:
def read_pdf(file_path):
    try:
        reader = PdfReader(file_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
        return text.strip()
    except Exception as e:
        print(f"Error reading PDF file: {e}")
        return None

In [7]:
dietary_guidelines_text = read_pdf("resources/dietary_guidelines.pdf")

if not dietary_guidelines_text:
    raise ValueError("Failed to read the dietary guidelines PDF file.")

In [8]:
dietary_guidelines_text[:10]

'Dietary\nGu'

In [9]:
photo_analysis_system_prompt = """
You are a nutrition-label analyzer. From the provided product photo(s), extract ONLY the following fields in JSON:
- fat, cholesterol, sodium, carbohydrates, protein

Rules:
- Each field is a dictionary of key→value pairs you can read from the label 
  (e.g., {"total": 7, "saturated": 1.5, "unit": "g", "basis": "per_serving", "percent_dv": 9}).
- If a field is not readable or missing, set numeric values to 0 and strings to empty string "".
- Keep numbers as plain numbers (no % or unit symbols in the numbers themselves).
- If the label shows only percent daily value, put it as a number in "percent_dv".
- Do not convert salt↔sodium; only report what's printed.
- Output ONLY a JSON object matching the schema. No extra text.

Example for missing/unreadable values:
{"total": 0, "saturated": 0, "unit": "", "basis": "", "percent_dv": 0}
"""

In [10]:
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
genai.configure(api_key=gemini_api_key)

In [12]:
nutrition_schema = {
    "type": "object",
    "properties": {
        "schema_version": {
            "type": "string"
        },
        "fat": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        },
        "cholesterol": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        },
        "sodium": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        },
        "carbohydrates": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        },
        "protein": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        }
    },
    "required": ["schema_version"]
}

In [13]:
model = genai.GenerativeModel('gemini-2.0-flash-exp')

In [14]:
generation_config = GenerationConfig(
    response_mime_type="application/json",
    response_schema=nutrition_schema
)

In [15]:
image = Image.open("resources/composition.jpg")

In [16]:
response = model.generate_content(
    [photo_analysis_system_prompt, "Analyze this nutrition label.", image],
    generation_config=generation_config
)

In [17]:
response_json = response.text
response_json

'{\n  "schema_version": "string",\n  "fat": {\n    "amount": 1.5,\n    "daily_value_percent": 2,\n    "unit": "g"\n  },\n  "protein": {\n    "amount": 4,\n    "daily_value_percent": 8,\n    "unit": "g"\n  },\n  "sodium": {\n    "amount": 200,\n    "daily_value_percent": 9,\n    "unit": "mg"\n  }\n}'

In [18]:
weight = 60
height = 180
age = 18

In [19]:
composition_analysis_system_prompt = """
Role: You are a registered-dietitian–level nutrition analyst and careful data checker.

You will receive:
1) A nutrition label as JSON: {response_json}
2) Dietary guidelines as plain text: {dietary_guidelines_text}
3) Person parameters:
   - Weight: {weight} kg
   - Height: {height} cm
   - Age: {age} years
   (Optional if provided in future: sex, activity_level, health goals/conditions)

Your tasks:
A. Parse & validate
   - Parse the JSON safely. If any fields are missing or inconsistent (units, totals, serving count), explicitly note them.
   - Do not assume nutrients that are not present on the label. Mark them as “not provided”.

B. Person context
   - Compute BMI = weight / (height/100)^2 and classify (WHO categories). State the numeric BMI and category.
   - Only estimate energy needs if sex and activity_level are provided. If not provided, skip TDEE and say “not provided”.

C. Label math (per serving and per package)
   - Confirm serving_size and servings_per_container. If missing, treat “per serving” as the only basis and say container totals are unknown.
   - Compute calories from macros using Atwater factors: protein=4 kcal/g, carbs=4 kcal/g, fat=9 kcal/g. Note that label carbs include fiber; do not subtract fiber unless the label explicitly provides available carbs.
   - Show grams and, when possible, %DV/%RDA for: calories, protein, total fat (and sat/trans if present), total carbohydrate (sugars, added sugars, fiber), sodium, cholesterol, and any vitamins/minerals listed.
   - If %DV is not on the label but the dietary guidelines text provides reference values, compute %DV from those references and cite which values you used. If neither is available, write “%DV: n/a”.

D. Guidelines alignment
   - Use the provided dietary guidelines text as the source of truth. Map each relevant nutrient to the recommended limits/targets.
   - Explicitly state where the product is: “Meets”, “Close to limit”, or “Exceeds” guideline thresholds (e.g., high sodium, high added sugar, low fiber, high sat fat). Quote the exact threshold you’re applying from the guidelines text.

E. Suitability for the person
   - Considering BMI and the guidelines, briefly assess suitability (e.g., “good protein-to-calorie ratio”, “sodium may be high if blood pressure is a concern”).
   - If information needed for a strong conclusion is missing (e.g., activity level, health conditions), say what is missing rather than guessing.

F. Practical recommendations
   - Provide 2–4 actionable suggestions: portion control, pairing (e.g., add fiber/protein), frequency of intake, or alternatives that better match the guidelines/person context.

G. Transparency & safety
   - List any assumptions you made, any missing data, and any rounding.
   - Do not give medical advice or diagnose. Keep the tone objective and non-judgmental.

Output format (Markdown):
# Summary (1–2 sentences)
# Key Numbers
- Serving size: …
- Servings per container: …
- Calories: … (per serving) | … (per package, if known)
- Protein: … g (… %DV if available)
- Carbohydrate: … g [sugars … g (added … g), fiber … g] (… %DV if available)
- Fat: … g [sat … g, trans … g] (… %DV if available)
- Sodium: … mg (… %DV if available)
- Cholesterol: … mg (… %DV if available)
# Person Context
- BMI: … (category: …)
- TDEE: not provided / … kcal (only if sex & activity_level provided; state method)
# Guidelines Check (cite the guideline lines you used)
- Sodium: … → [Meets / Close to limit / Exceeds] — threshold: “…”
- Added sugar: … → [Meets / Close / Exceeds] — threshold: “…”
- Saturated fat: … → [Meets / Close / Exceeds] — threshold: “…”
- Fiber: … → [Adequate / Low] — target: “…”
- Any other nutrients present on the label
# Suitability for This Person
- 2–4 bullet points tailored to the above
# Recommendations
- 2–4 bullets (portion, pairing, swap, frequency)
# Data Quality & Assumptions
- Missing fields: …
- Rounding/assumptions: …
- Notes: This analysis is informational and not medical advice.

Constraints:
- Be precise with units. Show both grams and mg where applicable.
- Do not invent data. If a value is absent, say “not provided”.
- Keep the entire analysis within 250–400 words unless the label/guidelines require brief additional detail.
"""

In [20]:
response = model.generate_content([composition_analysis_system_prompt])

In [21]:
model_response = response.text

In [24]:
from IPython.display import display, Markdown

display(Markdown(model_response))

Okay, I understand. Let's proceed with the analysis.

# Summary
This analysis evaluates a food product's nutritional content based on the provided label, dietary guidelines, and person parameters, assessing its suitability and providing practical recommendations. The analysis is limited by the completeness of the nutrition label and the absence of sex and activity level.

# Key Numbers
- Serving size: 56 g
- Servings per container: 8
- Calories: 230 kcal (per serving) | 1840 kcal (per package)
- Protein: 6 g (12 %DV)
- Carbohydrate: 30 g [sugars 14 g (added 13 g), fiber 1 g] (11 %DV)
- Fat: 11 g [sat 7 g, trans 0 g] (14 %DV)
- Sodium: 160 mg (7 %DV)
- Cholesterol: 30 mg (10 %DV)

# Person Context
- BMI: 24.7 (category: Normal weight)
- TDEE: not provided

# Guidelines Check (cite the guideline lines you used)
- Sodium: 160 mg → Meets — threshold: "Less than 2300 mg per day"
- Added sugar: 13 g → Close to limit — threshold: "Less than 10% of total daily calories"
- Saturated fat: 7 g → Exceeds — threshold: "Less than 10% of total daily calories"
- Fiber: 1 g → Low — target: "25-30 grams per day"

# Suitability for This Person
- The product is relatively high in saturated fat, which might be a concern for long-term health if consumed frequently.
- The added sugar content is notable and approaching the recommended limit.
- The fiber content is low, which could be addressed by pairing this food with fiber-rich options.
- The protein content is reasonable, but might not be sufficient as a standalone meal component.

# Recommendations
- Consider portion control to manage saturated fat and added sugar intake.
- Pair with a source of fiber (e.g., fruit, vegetables, whole grains) to increase overall fiber intake.
- Limit consumption frequency due to the high saturated fat content.
- Explore alternative snacks or meals with lower saturated fat and higher fiber.

# Data Quality & Assumptions
- Missing fields: None.
- Rounding/assumptions: Calories calculated using Atwater factors were rounded to the nearest 10. Dietary guidelines thresholds are interpreted based on common recommendations. The analysis assumes the label information is accurate.
- Notes: This analysis is informational and not medical advice.


In [25]:
verification_system_prompt = """
Role: You are a rigorous QA reviewer for nutrition analyses. Your job is to verify the accuracy, completeness, and methodological soundness of the model’s analysis against the provided inputs.

Inputs you will receive:
- Nutrition label JSON: {response_json}
- Dietary guidelines text: {dietary_guidelines_text}
- Person parameters:
  - Weight: {weight} kg
  - Height: {height} cm
  - Age: {age} years
  (Optional, if present elsewhere in the thread: sex, activity_level, health goals/conditions)

You will also receive: the model’s analysis to be verified.

What to verify (checklist):
1) Parsing & Units
   - The JSON was parsed correctly (serving_size, servings_per_container, per-serving nutrients, %DV if present).
   - Units are correct and consistent (g vs mg; sodium often in mg; fiber/sugars in g).
   - No nutrients were invented; absent fields are labeled “not provided”.

2) Math & Internal Consistency
   - Calories-from-macros (Atwater): protein=4 kcal/g, carbs=4 kcal/g, fat=9 kcal/g. Compare to stated calories.
     • If |macro_kcal − label_kcal| > 10% of label_kcal, flag a discrepancy.
   - Per-serving vs per-package math is correct (multiplying by servings_per_container when shown).
   - %DV calculations (when computed by the model) use reference values present in the label OR explicitly provided in dietary_guidelines_text. No outside defaults.

3) BMI (and TDEE logic)
   - BMI = weight / (height/100)^2 is computed correctly and WHO category is correct.
   - TDEE handling:
     • If sex and activity_level are NOT provided, the analysis should NOT fabricate TDEE; it should say “not provided”.
     • If TDEE is computed, the method (e.g., Mifflin–St Jeor + activity factor) must be named and the result should be within ±5% of your recomputation.

4) Guidelines Alignment
   - Each guideline claim (e.g., high sodium, added sugars limit, saturated fat, fiber targets, key vitamins/minerals) is supported by thresholds/targets that appear in {dietary_guidelines_text}.
   - When the analysis labels something as Meets/Close/Exceeds, that label matches the product’s numbers vs the stated threshold/target.

5) Suitability & Recommendations
   - Suitability comments are tailored to the person context (at least using BMI and any salient nutrient flags).
   - Recommendations are specific, actionable, and flow logically from the analysis (portion control, pairing, swaps, frequency). Avoid generic, unrelated advice.

6) Transparency
   - Missing data, assumptions, and rounding are disclosed. No medical diagnosis is given.

Red flags (must downgrade accuracy if present):
- Hallucinated nutrient values or thresholds not present in the label or guidelines text.
- Using generic %DV/DRI numbers without stating their source when the guidelines text is provided.
- Misuse of per 100 g vs per serving.
- Major math errors (macro calories, %DV, totals).

Scoring rubric for “Overall accuracy”:
- Accurate: No material errors; at most minor omissions/wording issues; math and thresholds correct.
- Partially accurate: 1–3 material issues (e.g., %DV miscalc, unclear threshold citation, modest kcal mismatch) but core conclusions mostly stand.
- Inaccurate: 4+ material issues, or any critical error (e.g., fabricated TDEE without inputs, major kcal mismatch, invented data).

Output format (Markdown only; no extra prose outside the sections below):

# Verification Summary
- Overall accuracy: [Accurate / Partially accurate / Inaccurate]
- Key issues found:
  - [List each major inaccuracy/omission succinctly. When relevant, quote 3–12 words from the guidelines you used, e.g., “limit sodium to …”. If none, write “None.”]
- Recommendations for improvement:
  - [Concrete fixes the model should make: e.g., “Recompute calories from macros,” “Cite sodium threshold line,” “Mark TDEE as ‘not provided’ when sex/activity_level are missing.” If none, write “None.”]

Constraints:
- Be concise but thorough; focus on factual accuracy and completeness.
- Use clear, objective language.
- Do not introduce outside data; verify only against the provided JSON, guidelines text, and person parameters.
- If critical inputs are missing, require the analysis to state “not provided” rather than guessing.
"""


In [26]:
response = model.generate_content([verification_system_prompt])

In [None]:
model_response = response.text

In [28]:
display(Markdown(model_response))

# Verification Summary
- Overall accuracy: Partially accurate
- Key issues found:
  - The model calculates TDEE even though sex and activity level are not provided.
  - The model mentions "Consume less than 2,300 milligrams of sodium per day" from the dietary guidelines, but the analysis incorrectly assesses whether the food exceeds this limit.
  - The model states "This food provides 40% of the Daily Value for fiber, which is close to the recommendation to 'make half your grains whole grains'" which is not a direct quote from the dietary guidelines.
- Recommendations for improvement:
  - Do not calculate TDEE if sex and activity level are not provided.
  - Ensure correct application of sodium limit (2300mg) when assessing the food.
  - Ensure dietary recommendations match the exact guidelines provided, or indicate if using a general recommendation rather than a specific quote.
