In [1]:
import os
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
from PIL import Image
import requests
import torch

from tqdm import tqdm
import json
import pandas as pd
from pathlib import Path

In [2]:
import os

# Set HuggingFace cache directory
os.environ['HF_HOME'] = "/Users/rodrigocarrillo/Documents/LLMs Hugging Face/gemma-3-4b-it"
model_id = "google/gemma-3-4b-it"


In [3]:
model = Gemma3ForConditionalGeneration.from_pretrained(
    model_id, device_map="auto"
).eval()

processor = AutoProcessor.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the disk.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [4]:
system_prompt = """

You are a helpful assistant.
Your task is to extract the name, size, macronutrients and detailed description of the food products from images.
Use **only** information that is clearly visible in the image.
If the information is not clearly visible, respond with "Not available".
Your response should be in JSON format with the following structure:
{
  "products": [
    {
      "name": "Name of the product.",
      "size": "Size of the product (e.g., 500ml, 1.0L, 1.0OZ, 50g).",
      "fats": "Amount of fats in grams.",
      "carbohydrates": "Amount of carbohydrates in grams.",
      "proteins": "Amount of proteins in grams.",
      "description": "Detailed description of the product."
    },
    ...
  ]
}
Do not include anything other than the JSON response.

"""

In [None]:
# messages = [
#     {
#         "role": "system",
#         "content": [{"type": "text", "text": system_prompt}]
#     },
#     {
#         "role": "user",
#         "content": [
#             {"type": "image", "image": "/Users/rodrigocarrillo/Documents/Computer Vision/Vending Machines/03_Cropped_Objects/object_008_conf_0_75.jpg"},
#             {"type": "text", "text": "Get the name and detailed description of the food product from this image."}
#         ]
#     }
# ]

# inputs = processor.apply_chat_template(
#     messages,
#     add_generation_prompt=True,
#     tokenize=True,
#     return_dict=True,
#     return_tensors="pt"
# ).to(model.device, dtype=torch.bfloat16)

# input_len = inputs["input_ids"].shape[-1]

# with torch.inference_mode():
#     generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
#     generation = generation[0][input_len:]

# decoded = processor.decode(generation, skip_special_tokens=True)
# print(decoded)


In [7]:
import os
import json
import pandas as pd
import torch
from pathlib import Path
import re

# Configuration
folder_path = "/Users/rodrigocarrillo/Documents/Computer Vision/Vending Machines/03_Cropped_Objects"
output_json_path = "food_products.json"
output_csv_path = "food_products.csv"


# Process all JPG files
results = []

# Get all .jpg files (case-insensitive)
jpg_files = list(Path(folder_path).glob("*.jpg")) + list(Path(folder_path).glob("*.JPG"))
print(f"Found {len(jpg_files)} JPG files to process\n")

for idx, image_path in tqdm(enumerate(jpg_files, 1), total=len(jpg_files), desc="Processing images"):
    print(f"Processing {idx}/{len(jpg_files)}: {image_path.name}")
    
    try:
        # Prepare messages for this image
        messages = [
            {
                "role": "system",
                "content": [{"type": "text", "text": system_prompt}]
            },
            {
                "role": "user",
                "content": [
                    {"type": "image", "image": str(image_path)},
                    {"type": "text", "text": "Get the name and detailed description of the food product from this image."}
                ]
            }
        ]
        
        # Process with model
        inputs = processor.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt"
        ).to(model.device, dtype=torch.bfloat16)
        
        input_len = inputs["input_ids"].shape[-1]
        
        with torch.inference_mode():
            generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)
            generation = generation[0][input_len:]
        
        decoded = processor.decode(generation, skip_special_tokens=True)
        
        # Clean up the decoded string (remove markdown code blocks if present)
        decoded_clean = decoded.strip()
        if decoded_clean.startswith("```json"):
            decoded_clean = decoded_clean.replace("```json", "").replace("```", "").strip()
        
        # Parse the JSON response
        try:
            result_dict = json.loads(decoded_clean)
            
            # Extract the first product from the products array
            if "products" in result_dict and len(result_dict["products"]) > 0:
                product = result_dict["products"][0]
                
                result_entry = {
                    "filename": image_path.name,
                    "name": product.get("name", "Unknown"),
                    "description": product.get("description", ""),
                    "size": product.get("size", ""),
                    "fats": product.get("fats", ""),
                    "carbohydrates": product.get("carbohydrates", ""),
                    "proteins": product.get("proteins", "")
                }
            else:
                # Fallback if structure is unexpected
                result_entry = {
                    "filename": image_path.name,
                    "name": "Unknown",
                    "description": decoded_clean
                }
                
        except json.JSONDecodeError as je:
            print(f"  ⚠ JSON parsing error: {je}")
            result_entry = {
                "filename": image_path.name,
                "name": "Parse Error",
                "description": decoded_clean
            }
        
        results.append(result_entry)
        print(f"  ✓ {result_entry.get('name', 'Unknown')}")
        
    except Exception as e:
        print(f"  ✗ Error processing {image_path.name}: {e}")
        results.append({
            "filename": image_path.name,
            "name": "Error",
            "description": str(e)
        })

print(f"\n{'='*50}")
print(f"Processing complete!")
print(f"{'='*50}\n")



Found 30 JPG files to process



Processing images:   0%|          | 0/30 [00:00<?, ?it/s]

Processing 1/30: object_021_conf_0_74.jpg


The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Processing images:   3%|▎         | 1/30 [04:14<2:03:14, 255.00s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Jumbo Honey Bun
Processing 2/30: object_023_conf_0_69.jpg


Processing images:   7%|▋         | 2/30 [08:36<2:00:44, 258.73s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ M&M's
Processing 3/30: object_022_conf_0_72.jpg


Processing images:  10%|█         | 3/30 [13:22<2:02:06, 271.35s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Lay's Sour Cream & Onion Chips
Processing 4/30: object_009_conf_0_86.jpg


Processing images:  13%|█▎        | 4/30 [17:31<1:53:39, 262.29s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Bugles
Processing 5/30: object_024_conf_0_68.jpg


Processing images:  17%|█▋        | 5/30 [21:48<1:48:29, 260.36s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Snickers
Processing 6/30: object_026_conf_0_63.jpg


Processing images:  20%|██        | 6/30 [26:27<1:46:47, 266.99s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Famous Chocolate Chip Cookies
Processing 7/30: object_010_conf_0_85.jpg


Processing images:  23%|██▎       | 7/30 [30:41<1:40:39, 262.58s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Mr. Mix
Processing 8/30: object_017_conf_0_81.jpg


Processing images:  27%|██▋       | 8/30 [35:02<1:36:05, 262.07s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ CHEZZI
Processing 9/30: object_014_conf_0_84.jpg


Processing images:  30%|███       | 9/30 [39:15<1:30:44, 259.28s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Cheetos Puffs
Processing 10/30: object_028_conf_0_60.jpg


Processing images:  33%|███▎      | 10/30 [43:38<1:26:47, 260.39s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Hershey's Milk Chocolate with Whole Almonds
Processing 11/30: object_002_conf_0_89.jpg


Processing images:  37%|███▋      | 11/30 [47:42<1:20:51, 255.32s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Oreo
Processing 12/30: object_007_conf_0_87.jpg


Processing images:  40%|████      | 12/30 [52:32<1:19:44, 265.82s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ⚠ JSON parsing error: Unterminated string starting at: line 13 column 15 (char 298)
  ✓ Parse Error
Processing 13/30: object_001_conf_0_89.jpg


Processing images:  43%|████▎     | 13/30 [56:49<1:14:34, 263.18s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Ruffles Cheddar & Sour Cream
Processing 14/30: object_004_conf_0_87.jpg


Processing images:  47%|████▋     | 14/30 [1:01:08<1:09:51, 261.95s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Nekot Sandwich cookies Peanut Butter
Processing 15/30: object_013_conf_0_85.jpg


Processing images:  50%|█████     | 15/30 [1:05:39<1:06:11, 264.76s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Reese's 2 Peanut Butter Cups
Processing 16/30: object_000_conf_0_90.jpg


Processing images:  53%|█████▎    | 16/30 [1:09:57<1:01:19, 262.79s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Lance Malt Sandwich Crackers Peanut Butter
Processing 17/30: object_005_conf_0_87.jpg


Processing images:  57%|█████▋    | 17/30 [1:14:24<57:12, 264.04s/it]  The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Planters Salted Peanuts
Processing 18/30: object_012_conf_0_85.jpg


Processing images:  60%|██████    | 18/30 [1:19:20<54:44, 273.71s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ⚠ JSON parsing error: Expecting ',' delimiter: line 11 column 4 (char 363)
  ✓ Parse Error
Processing 19/30: object_015_conf_0_84.jpg


Processing images:  63%|██████▎   | 19/30 [1:23:41<49:26, 269.72s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Lay's Classic
Processing 20/30: object_029_conf_0_60.jpg


Processing images:  67%|██████▋   | 20/30 [1:27:49<43:52, 263.20s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Mrs. Freshley's
Processing 21/30: object_011_conf_0_85.jpg


Processing images:  70%|███████   | 21/30 [1:32:40<40:43, 271.50s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Nekot. Sandwich cookies Lemon Crème
Processing 22/30: object_006_conf_0_87.jpg


Processing images:  73%|███████▎  | 22/30 [1:36:20<34:09, 256.21s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Lay's Salt & Vinegar
Processing 23/30: object_016_conf_0_83.jpg


Processing images:  77%|███████▋  | 23/30 [1:39:52<28:20, 242.94s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Taktos
Processing 24/30: object_003_conf_0_88.jpg


Processing images:  80%|████████  | 24/30 [1:43:23<23:18, 233.16s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Gauduco Wafer
Processing 25/30: object_025_conf_0_68.jpg


Processing images:  83%|████████▎ | 25/30 [1:46:19<18:00, 216.07s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Mars
Processing 26/30: object_008_conf_0_86.jpg


Processing images:  87%|████████▋ | 26/30 [1:48:55<13:12, 198.16s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Cinnamon Almond Butter
Processing 27/30: object_018_conf_0_80.jpg


Processing images:  90%|█████████ | 27/30 [1:53:43<11:15, 225.06s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Golden Flake Buffalo Ranch Thin & Crispy Potato Chips
Processing 28/30: object_027_conf_0_61.jpg


Processing images:  93%|█████████▎| 28/30 [1:58:05<07:51, 235.99s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ Skittles Wild Berry
Processing 29/30: object_019_conf_0_77.jpg


Processing images:  97%|█████████▋| 29/30 [2:02:53<04:11, 251.85s/it]The following generation flags are not valid and may be ignored: ['top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


  ✓ White Chedda Popcorn
Processing 30/30: object_020_conf_0_76.jpg


Processing images: 100%|██████████| 30/30 [2:07:41<00:00, 255.38s/it]

  ✓ Nabisco Stubb's Sticky Sweet BBQ Cheese Crackers

Processing complete!






In [8]:
# At the end of VLLMs.py, add:

# Clean up
del model
del processor
import gc
gc.collect()

if torch.backends.mps.is_available():
    torch.mps.empty_cache()

print("✓ Memory cleared")

✓ Memory cleared


In [9]:
# Save as JSON
with open(output_json_path, 'w', encoding='utf-8') as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

print(f"✓ JSON saved to: {output_json_path}")

# Create DataFrame and save as CSV
df = pd.DataFrame(results)

# Reorder columns - put filename and name first, then description
columns_order = ['filename', 'name', 'description']
# Add any additional columns that exist
additional_cols = [col for col in df.columns if col not in columns_order]
columns_order.extend(additional_cols)

df = df[columns_order]
df.to_csv(output_csv_path, index=False, encoding='utf-8')

print(f"✓ CSV saved to: {output_csv_path}")
print(f"\nProcessed {len(results)} images successfully!")

# Display summary
print(f"\n{'='*50}")
print("Preview of results:")
print(f"{'='*50}")
print(df.head(10).to_string(index=False))

✓ JSON saved to: food_products.json
✓ CSV saved to: food_products.csv

Processed 30 images successfully!

Preview of results:
                filename                                        name                                           description          size          fats carbohydrates      proteins
object_021_conf_0_74.jpg                             Jumbo Honey Bun                              A jumbo-sized honey bun. Not available Not available Not available Not available
object_023_conf_0_69.jpg                                       M&M's                 Chocolate candies with a candy shell. Not available Not available Not available Not available
object_022_conf_0_72.jpg              Lay's Sour Cream & Onion Chips                        Lay's Sour Cream & Onion Chips 1 oz (28.3 g) Not available Not available Not available
object_009_conf_0_86.jpg                                      Bugles                           Orange-flavored corn chips.        7/8 oz Not available Not ava

In [None]:
results[0]

{'filename': 'object_021_conf_0_74.jpg',
 'name': 'Jumbo Honey Bun',
 'description': 'A jumbo-sized honey bun.',
 'size': 'Not available',
 'fats': 'Not available',
 'carbohydrates': 'Not available',
 'proteins': 'Not available'}

In [11]:
df

Unnamed: 0,filename,name,description,size,fats,carbohydrates,proteins
0,object_021_conf_0_74.jpg,Jumbo Honey Bun,A jumbo-sized honey bun.,Not available,Not available,Not available,Not available
1,object_023_conf_0_69.jpg,M&M's,Chocolate candies with a candy shell.,Not available,Not available,Not available,Not available
2,object_022_conf_0_72.jpg,Lay's Sour Cream & Onion Chips,Lay's Sour Cream & Onion Chips,1 oz (28.3 g),Not available,Not available,Not available
3,object_009_conf_0_86.jpg,Bugles,Orange-flavored corn chips.,7/8 oz,Not available,Not available,Not available
4,object_024_conf_0_68.jpg,Snickers,"Chocolate bar with peanuts, caramel, and nougat.",Not available,Not available,Not available,Not available
5,object_026_conf_0_63.jpg,Famous Chocolate Chip Cookies,6-Size Cookies with Rich Chip Taste. Original ...,3.0 OZ (56g),Not available,Not available,Not available
6,object_010_conf_0_85.jpg,Mr. Mix,Two cookie bars of Mr. Mix.,2 cookie bars,Not available,Not available,Not available
7,object_017_conf_0_81.jpg,CHEZZI,Baked snack crackers. 100% real cheese.,Not available,Not available,Not available,Not available
8,object_014_conf_0_84.jpg,Cheetos Puffs,Orange puffed cheese snack.,140 Calories,Not available,Not available,Not available
9,object_028_conf_0_60.jpg,Hershey's Milk Chocolate with Whole Almonds,Milk chocolate with whole almonds,Not available,Not available,Not available,Not available
