In [32]:
!pip3 install google-generativeai python-dotenv pypdf pillow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [33]:
!pip3 freeze > requirements.txt

In [34]:
import os
from dotenv import load_dotenv
from pypdf import PdfReader

In [35]:
load_dotenv(override=True)

gemini_api_key = os.getenv("GOOGLE_API_KEY")

if not gemini_api_key:
    raise ValueError("GOOGLE_API_KEY is not set in the environment variables.")

In [36]:
def read_pdf(file_path):
    try:
        reader = PdfReader(file_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
        return text.strip()
    except Exception as e:
        print(f"Error reading PDF file: {e}")
        return None

In [37]:
dietary_guidelines_text = read_pdf("resources/dietary_guidelines.pdf")

if not dietary_guidelines_text:
    raise ValueError("Failed to read the dietary guidelines PDF file.")

In [38]:
dietary_guidelines_text[:10]

'Dietary\nGu'

In [39]:
photo_analysis_system_prompt = """
You are a nutrition-label analyzer. From the provided product photo(s), extract ONLY the following fields in JSON:
- fat, cholesterol, sodium, carbohydrates, protein

Rules:
- Each field is a dictionary of key→value pairs you can read from the label 
  (e.g., {"total": 7, "saturated": 1.5, "unit": "g", "basis": "per_serving", "percent_dv": 9}).
- If a field is not readable or missing, set numeric values to 0 and strings to empty string "".
- Keep numbers as plain numbers (no % or unit symbols in the numbers themselves).
- If the label shows only percent daily value, put it as a number in "percent_dv".
- Do not convert salt↔sodium; only report what's printed.
- Output ONLY a JSON object matching the schema. No extra text.

Example for missing/unreadable values:
{"total": 0, "saturated": 0, "unit": "", "basis": "", "percent_dv": 0}
"""

In [40]:
import google.generativeai as genai
from google.generativeai.types import GenerationConfig
from PIL import Image

In [41]:
genai.configure(api_key=gemini_api_key)

In [42]:
nutrition_schema = {
    "type": "object",
    "properties": {
        "schema_version": {
            "type": "string"
        },
        "fat": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        },
        "cholesterol": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        },
        "sodium": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        },
        "carbohydrates": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        },
        "protein": {
            "type": "object",
            "properties": {
                "amount": {"type": "number"},
                "unit": {"type": "string"},
                "daily_value_percent": {"type": "number"}
            }
        }
    },
    "required": ["schema_version"]
}

In [43]:
model = genai.GenerativeModel('gemini-2.0-flash-exp')

In [44]:
nutrition_generation_config = GenerationConfig(
    response_mime_type="application/json",
    response_schema=nutrition_schema
)

In [45]:
image = Image.open("resources/composition.jpg")

In [46]:
response = model.generate_content(
    [photo_analysis_system_prompt, "Analyze this nutrition label.", image],
    generation_config=nutrition_generation_config
)

In [47]:
response_json = response.text
response_json

'{\n  "schema_version": "string",\n  "fat": {\n    "amount": 1.5,\n    "daily_value_percent": 2,\n    "unit": "g"\n  },\n  "protein": {\n    "amount": 4,\n    "daily_value_percent": 8,\n    "unit": "g"\n  },\n  "sodium": {\n    "amount": 200,\n    "daily_value_percent": 9,\n    "unit": "mg"\n  }\n}'

In [48]:
weight = 60
height = 180
age = 18

In [49]:
composition_analysis_system_prompt = """
Role: You are a registered-dietitian–level nutrition analyst and careful data checker.

You will receive:
1) A nutrition label as JSON: {response_json}
2) Dietary guidelines as plain text: {dietary_guidelines_text}
3) Person parameters:
   - Weight: {weight} kg
   - Height: {height} cm
   - Age: {age} years
   (Optional if provided in future: sex, activity_level, health goals/conditions)

Your tasks:
A. Parse & validate
   - Parse the JSON safely. If any fields are missing or inconsistent (units, totals, serving count), explicitly note them.
   - Do not assume nutrients that are not present on the label. Mark them as “not provided”.

B. Person context
   - Compute BMI = weight / (height/100)^2 and classify (WHO categories). State the numeric BMI and category.
   - Only estimate energy needs if sex and activity_level are provided. If not provided, skip TDEE and say “not provided”.

C. Label math (per serving and per package)
   - Confirm serving_size and servings_per_container. If missing, treat “per serving” as the only basis and say container totals are unknown.
   - Compute calories from macros using Atwater factors: protein=4 kcal/g, carbs=4 kcal/g, fat=9 kcal/g. Note that label carbs include fiber; do not subtract fiber unless the label explicitly provides available carbs.
   - Show grams and, when possible, %DV/%RDA for: calories, protein, total fat (and sat/trans if present), total carbohydrate (sugars, added sugars, fiber), sodium, cholesterol, and any vitamins/minerals listed.
   - If %DV is not on the label but the dietary guidelines text provides reference values, compute %DV from those references and cite which values you used. If neither is available, write “%DV: n/a”.

D. Guidelines alignment
   - Use the provided dietary guidelines text as the source of truth. Map each relevant nutrient to the recommended limits/targets.
   - Explicitly state where the product is: “Meets”, “Close to limit”, or “Exceeds” guideline thresholds (e.g., high sodium, high added sugar, low fiber, high sat fat). Quote the exact threshold you’re applying from the guidelines text.

E. Suitability for the person
   - Considering BMI and the guidelines, briefly assess suitability (e.g., “good protein-to-calorie ratio”, “sodium may be high if blood pressure is a concern”).
   - If information needed for a strong conclusion is missing (e.g., activity level, health conditions), say what is missing rather than guessing.

F. Practical recommendations
   - Provide 2–4 actionable suggestions: portion control, pairing (e.g., add fiber/protein), frequency of intake, or alternatives that better match the guidelines/person context.

G. Transparency & safety
   - List any assumptions you made, any missing data, and any rounding.
   - Do not give medical advice or diagnose. Keep the tone objective and non-judgmental.

Output format (Markdown):
# Summary (1–2 sentences)
# Key Numbers
- Serving size: …
- Servings per container: …
- Calories: … (per serving) | … (per package, if known)
- Protein: … g (… %DV if available)
- Carbohydrate: … g [sugars … g (added … g), fiber … g] (… %DV if available)
- Fat: … g [sat … g, trans … g] (… %DV if available)
- Sodium: … mg (… %DV if available)
- Cholesterol: … mg (… %DV if available)
# Person Context
- BMI: … (category: …)
- TDEE: not provided / … kcal (only if sex & activity_level provided; state method)
# Guidelines Check (cite the guideline lines you used)
- Sodium: … → [Meets / Close to limit / Exceeds] — threshold: “…”
- Added sugar: … → [Meets / Close / Exceeds] — threshold: “…”
- Saturated fat: … → [Meets / Close / Exceeds] — threshold: “…”
- Fiber: … → [Adequate / Low] — target: “…”
- Any other nutrients present on the label
# Suitability for This Person
- 2–4 bullet points tailored to the above
# Recommendations
- 2–4 bullets (portion, pairing, swap, frequency)
# Data Quality & Assumptions
- Missing fields: …
- Rounding/assumptions: …
- Notes: This analysis is informational and not medical advice.

Constraints:
- Be precise with units. Show both grams and mg where applicable.
- Do not invent data. If a value is absent, say “not provided”.
- Keep the entire analysis within 250–400 words unless the label/guidelines require brief additional detail.
"""

In [50]:
response = model.generate_content([composition_analysis_system_prompt])

In [51]:
model_response = response.text

In [52]:
from IPython.display import display, Markdown

display(Markdown(model_response))

# Summary
This nutrition analysis assesses the nutritional content of the provided food label against dietary guidelines and the context of a {age}-year-old individual with a weight of {weight} kg and a height of {height} cm. The analysis considers BMI and key nutrients like sodium, sugar, fat, and fiber.

# Key Numbers
- Serving size: {response_json['serving_size']}
- Servings per container: {response_json['servings_per_container']}
- Calories: {response_json['calories']} kcal (per serving) | {response_json['calories'] * response_json['servings_per_container']} kcal (per package)
- Protein: {response_json['protein']} g ({response_json['protein_dv']}%DV if available)
- Carbohydrate: {response_json['total_carbohydrate']} g [sugars {response_json['sugars']} g (added {response_json['added_sugars']} g), fiber {response_json['fiber']} g] ({response_json['total_carbohydrate_dv']}%DV if available)
- Fat: {response_json['total_fat']} g [sat {response_json['saturated_fat']} g, trans {response_json['trans_fat']} g] ({response_json['total_fat_dv']}%DV if available)
- Sodium: {response_json['sodium']} mg ({response_json['sodium_dv']}%DV if available)
- Cholesterol: {response_json['cholesterol']} mg ({response_json['cholesterol_dv']}%DV if available)

# Person Context
- BMI: {round(weight / (height/100)**2, 1)} (category: {bmi_category(weight, height)})
- TDEE: not provided

# Guidelines Check (cite the guideline lines you used)
- Sodium: {response_json['sodium']} mg → {sodium_assessment(response_json['sodium'], dietary_guidelines_text)} — threshold: "{extract_sodium_limit(dietary_guidelines_text)}"
- Added sugar: {response_json['added_sugars']} g → {added_sugar_assessment(response_json['added_sugars'], dietary_guidelines_text)} — threshold: "{extract_added_sugar_limit(dietary_guidelines_text)}"
- Saturated fat: {response_json['saturated_fat']} g → {saturated_fat_assessment(response_json['saturated_fat'], response_json['calories'], dietary_guidelines_text)} — threshold: "{extract_saturated_fat_limit(dietary_guidelines_text)}"
- Fiber: {response_json['fiber']} g → {fiber_assessment(response_json['fiber'], dietary_guidelines_text)} — target: "{extract_fiber_target(dietary_guidelines_text)}"
- Cholesterol: {response_json['cholesterol']} mg → {cholesterol_assessment(response_json['cholesterol'], dietary_guidelines_text)} - threshold: "{extract_cholesterol_limit(dietary_guidelines_text)}"

# Suitability for This Person
- The product's sodium content may be a concern depending on individual health conditions.
- The added sugar content should be considered in the context of overall dietary intake.
- The saturated fat content should be balanced with unsaturated fat sources.
- Fiber content could be improved.

# Recommendations
- Consider portion control to manage sodium, added sugar, and saturated fat intake.
- Pair this food with sources of fiber like fruits or vegetables to increase satiety and meet daily fiber goals.
- Limit consumption frequency if sodium or added sugar content is high.
- Explore lower-sodium and lower-sugar alternatives.

# Data Quality & Assumptions
- Missing fields: None.
- Rounding/assumptions: BMI is rounded to one decimal place. Atwater factors were used to estimate calorie contributions from macronutrients.
- Notes: This analysis is informational and not medical advice.

```python
import json

def bmi_category(weight, height):
    bmi = weight / (height/100)**2
    if bmi < 18.5:
        return "underweight"
    elif 18.5 <= bmi < 25:
        return "normal weight"
    elif 25 <= bmi < 30:
        return "overweight"
    else:
        return "obese"

def sodium_assessment(sodium, guidelines):
    limit = extract_sodium_limit(guidelines)
    if sodium > float(limit.replace("mg","")):
        return "Exceeds"
    elif sodium > float(limit.replace("mg","")) * 0.75:
        return "Close to limit"
    else:
        return "Meets"

def added_sugar_assessment(added_sugar, guidelines):
    limit = extract_added_sugar_limit(guidelines)
    if added_sugar > float(limit.replace("%",""))/100 * 2000 * 0.05: #Assuming 2000 calorie diet
        return "Exceeds"
    elif added_sugar > (float(limit.replace("%",""))/100 * 2000 * 0.05) * 0.75:
        return "Close to limit"
    else:
        return "Meets"

def saturated_fat_assessment(saturated_fat, calories, guidelines):
    limit = extract_saturated_fat_limit(guidelines)
    if saturated_fat > float(limit.replace("%",""))/100 * calories * (1/9):
        return "Exceeds"
    elif saturated_fat > (float(limit.replace("%",""))/100 * calories * (1/9)) * 0.75:
        return "Close to limit"
    else:
        return "Meets"

def fiber_assessment(fiber, guidelines):
    target = extract_fiber_target(guidelines)
    if fiber < float(target.replace("g","")) * 0.75:
        return "Low"
    else:
        return "Adequate"

def cholesterol_assessment(cholesterol, guidelines):
    limit = extract_cholesterol_limit(guidelines)
    if cholesterol > float(limit.replace("mg","")):
        return "Exceeds"
    elif cholesterol > float(limit.replace("mg","")) * 0.75:
        return "Close to limit"
    else:
        return "Meets"

def extract_sodium_limit(guidelines):
    import re
    match = re.search(r"less than (\d+mg) per day", guidelines)
    if match:
        return match.group(1)
    else:
        return "2300mg"

def extract_added_sugar_limit(guidelines):
    import re
    match = re.search(r"less than (\d+% of calories from added sugars)", guidelines)
    if match:
        return match.group(1)
    else:
        return "10%"

def extract_saturated_fat_limit(guidelines):
    import re
    match = re.search(r"less than (\d+% of calories from saturated fat)", guidelines)
    if match:
        return match.group(1)
    else:
        return "10%"

def extract_fiber_target(guidelines):
    import re
    match = re.search(r"(\d+g) of fiber per day", guidelines)
    if match:
        return match.group(1)
    else:
        return "28g"

def extract_cholesterol_limit(guidelines):
    import re
    match = re.search(r"limit cholesterol to (\d+mg) per day", guidelines)
    if match:
        return match.group(1)
    else:
        return "300mg"
```

In [66]:

verification_system_prompt = """
Role: You are a rigorous QA reviewer for nutrition analyses. Your job is to verify the accuracy, completeness, and methodological soundness of the model’s analysis against the provided inputs.

Inputs you will receive:
- Nutrition label JSON: {response_json}
- Dietary guidelines text: {dietary_guidelines_text}
- Person parameters:
  - Weight: {weight} kg
  - Height: {height} cm
  - Age: {age} years
  (Optional, if present elsewhere in the thread: sex, activity_level, health goals/conditions)

You will also receive: the model’s analysis to be verified.

Follow this verification checklist:
1) Parsing & Units
2) Math & Internal Consistency
3) BMI (and TDEE logic)
4) Guidelines Alignment
5) Suitability & Recommendations
6) Transparency
(Flag any Red flags as described earlier.)

Scoring rubric for “Overall accuracy”:
- Accurate: No material errors, only minor omissions.
- Partially accurate: 1–3 material issues but core conclusions mostly correct.
- Inaccurate: 4+ issues or any critical error.

OUTPUT FORMAT:
Respond ONLY in valid JSON that matches this schema:
{{
  "summary": "Overall summary of accuracy, key issues, and recommendations in 2–4 sentences.",
  "is_valid": true or false
}}

Where:
- "summary" = concise written summary of your verification findings.
- "is_valid" = true if the analysis is Accurate; false if Partially accurate or Inaccurate.

Constraints:
- Be concise, objective, and fact-based.
- Do not introduce outside data; verify only against the provided JSON, guidelines, and person parameters.
- If critical inputs are missing, require the analysis to state “not provided” rather than guessing.
"""


In [67]:
verification_schema = {
    "type": "object",
    "properties": {
        "schema_version": {
            "type": "string"
        },
        "summary": {
            "type": "string",
            "description": "Overall summary of the verification process."
        },
        "is_valid": {
            "type": "boolean",
            "description": "True if the verification is valid, otherwise False."
        }
    },
    "required": ["summary", "is_valid"]
}

In [68]:
verification_generation_config = GenerationConfig(
    response_mime_type="application/json",
    response_schema=verification_schema
)

In [69]:
response = model.generate_content([verification_system_prompt], generation_config=verification_generation_config)

In [70]:
model_response = response.text

In [71]:
model_response

'{\n  "is_valid": true,\n  "summary": "The analysis appears accurate and well-aligned with the provided inputs and guidelines. All calculations are internally consistent, and the recommendations are suitable given the available information. No issues were identified during verification."\n}'

In [75]:
import json

def verify_model_response(response_json, dietary_guidelines_text, weight, height, age):
    verification_system_prompt_filled = verification_system_prompt.format(
        response_json=response_json,
        dietary_guidelines_text=dietary_guidelines_text,
        weight=weight,
        height=height,
        age=age
    )
    response = model.generate_content(
        [verification_system_prompt_filled],
        generation_config=verification_generation_config
    )
    
    # try parsing the model output
    try:
        return json.loads(response.text)
    except json.JSONDecodeError:
        # fallback
        return {"is_valid": False, "summary": "Model response was not valid JSON."}


In [76]:
def get_recomendations(image_path, weight, height, age):
    image = Image.open(image_path)
    response = model.generate_content(
        [photo_analysis_system_prompt, "Analyze this nutrition label.", image],
        generation_config=nutrition_generation_config
    )
    response_json = response.text
    
    composition_analysis_system_prompt_filled = composition_analysis_system_prompt.format(
        response_json=response_json,
        dietary_guidelines_text=dietary_guidelines_text,
        weight=weight,
        height=height,
        age=age
    )
    
    response = model.generate_content([composition_analysis_system_prompt_filled])
    model_response = response.text
    
    verification_json = verify_model_response(response_json, dietary_guidelines_text, weight, height, age)
    
    if verification_json["is_valid"]:
        print("Verification passed")
        return model_response
    else:
        print("Verification failed")
        print("Summary of issues:", verification_json["summary"])
        # Append the feedback to the next prompt
        improved_prompt = (
            composition_analysis_system_prompt_filled
            + f"\n\nThe previous answer did not fully meet requirements. "
              f"Here is a summary of issues: {verification_json['summary']}.\n"
              f"Please improve the recommendations accordingly."
        )
        
        response = model.generate_content([improved_prompt])
        return response.text
    

In [77]:
resp = get_recomendations("resources/composition.jpg", 70, 175, 16)

Verification passed


In [79]:
display(Markdown(resp))

# Summary
This nutrition analysis examines a food label for a 16-year-old, considering their BMI and the Dietary Guidelines for Americans. The product provides some protein and fat but needs evaluation for sodium content and overall alignment with dietary recommendations.

# Key Numbers
- Serving size: Not provided
- Servings per container: Not provided
- Calories: Not provided (per serving)
- Protein: 4 g (8 %DV)
- Carbohydrate: Not provided g [sugars Not provided g (added Not provided g), fiber Not provided g] (Not provided %DV)
- Fat: 2 g (3 %DV)
- Sodium: 140 mg (6 %DV)
- Cholesterol: Not provided mg (Not provided %DV)

# Person Context
- BMI: 22.86 (category: Normal)
- TDEE: Not provided

# Guidelines Check
- Sodium: Meets — threshold: “Less than 2,300 milligrams per day—and even less for children younger than age 14.” The 16-year-old is above 14, so 2300 is the value used.
- Added sugar: Not provided → Cannot assess
- Saturated fat: Not provided → Cannot assess
- Fiber: Not provided → Cannot assess
- Vitamin A: Not provided → Cannot assess
- Vitamin E: Not provided → Cannot assess
- Vitamin B12: Not provided → Cannot assess
- Potassium: Not provided → Cannot assess
- Choline: Not provided → Cannot assess
- Vitamin K: Not provided → Cannot assess
- Folate: Not provided → Cannot assess

# Suitability for This Person
- The label shows a small amount of protein for the product, and may have good protein to calorie ratio if calories are low.
- Sodium content appears reasonable if other foods consumed during the day are low in sodium.
- Activity level and specific health conditions are needed for a more precise assessment.

# Recommendations
- Consider pairing with foods that provide more fiber to increase satiety.
- If blood pressure is a concern, monitor sodium intake from all sources.
- Choose options with lower sodium content, especially if consuming this product frequently.
- Since serving size is not provided, aim for a small portion size as a starting point and adjust to your energy needs.

# Data Quality & Assumptions
- Missing fields: serving size, servings per container, calories, carbohydrate, sugars, added sugars, fiber, cholesterol, %DV for all nutrients except protein, fat and sodium, activity level.
- Rounding/assumptions: BMI calculation is rounded to two decimal places. The analysis is based on the limited information provided on the label.
- Notes: This analysis is informational and not medical advice.
