## Run the local LLM pipeline to get structured output

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
import os
from PIL import Image
from pillow_heif import register_heif_opener
from dotenv import load_dotenv
import pandas as pd
import time

import tempfile
import torch
import pandas as pd
import outlines
from transformers import pipeline

from hellofresh_extractor.llm.prompts import multimodal_system_prompt, multimodal_user_query, structured_system_prompt
from hellofresh_extractor.llm.utils import convert_structured_result_to_df
from hellofresh_extractor.llm.output_schemas import ExtractedMeal
from hellofresh_extractor.gsuite.drive.GoogleDriveHelper import GoogleDriveHelper
from hellofresh_extractor.llm.MultiModalModel import MultiModalModel
from hellofresh_extractor.llm.StructuredOutputModel import StructuredOutputModel

In [3]:
load_dotenv()

True

In [4]:
multimodal_pipe = pipeline(
    "image-text-to-text",
    model="google/gemma-3-4b-it",
    device="mps",
    torch_dtype=torch.bfloat16,
    token=os.environ.get('HF_TOKEN'),
    use_fast=True
)
structured_model = outlines.models.transformers(
    "HuggingFaceTB/SmolLM2-1.7B-Instruct", 
    device="mps",
    model_kwargs = {"temperature": 0.1, "do_sample": True}
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use mps


In [5]:
multimodal_model_caller = MultiModalModel(
    model_pipe = multimodal_pipe
)
structured_model_caller = StructuredOutputModel(
    model=structured_model,
    outputmodel=ExtractedMeal
)

In [7]:
this_path = os.getcwd()
images_path = os.path.join(this_path,"images")
images = glob.glob(os.path.join(images_path,"*.HEIC"))

In [8]:
all_meals = []
register_heif_opener()
t0 = time.time()
for i, image in enumerate(images):
    print(f"At image {i}")
    
    open_image = Image.open(image).convert("RGB")
    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp_file:
        temp_filename = temp_file.name
        open_image.save(temp_filename, format="JPEG")
    
        user_message = [
            {"type": "image", "image": temp_filename}, 
            {"type": "text", "text": multimodal_user_query}
        ]
    
        print("Running multimodal model")
        try:
            multimodal_result = multimodal_model_caller.invoke(
                system_message = multimodal_system_prompt, 
                user_messages = user_message
            )
            multimodal_extracted_text = multimodal_result[0]["generated_text"]
        except Exception as e:
            print("Error in multimodal model inference: {}".format(e))

    print("Running structured output model")
    try:
        structured_result = structured_model_caller.invoke(
            system_message = structured_system_prompt, 
            text_to_extract = multimodal_extracted_text,
            user_query = multimodal_user_query
        )
        df = convert_structured_result_to_df(structured_result)
        df["image_path"] = image
        all_meals.append(df)
    except Exception as e:
        print("Error in structured model inference: {}".format(e))
t1 = time.time()
mean_process_time = (t1-t0)/len(images)

At image 0
Running multimodal model
Running structured output model
At image 1
Running multimodal model
Running structured output model
At image 2
Running multimodal model
Running structured output model
At image 3
Running multimodal model
Running structured output model
At image 4
Running multimodal model
Running structured output model
At image 5
Running multimodal model
Running structured output model
At image 6
Running multimodal model
Running structured output model
At image 7
Running multimodal model
Running structured output model
At image 8
Running multimodal model
Running structured output model
At image 9
Running multimodal model
Running structured output model
At image 10
Running multimodal model
Running structured output model
At image 11
Running multimodal model
Running structured output model
At image 12
Running multimodal model
Running structured output model
At image 13
Running multimodal model
Running structured output model
At image 14
Running multimodal model
Running

In [9]:
mean_process_time

1602.9548211050032

In [10]:
all_meals = pd.concat(all_meals)

In [11]:
all_meals.to_csv("test_hello_fresh_recipes_local.csv",index=False)

In [12]:
all_meals

Unnamed: 0,meal_id,title,prep_time,cook_time,calories,ingredient_name,ingredient_amount,image_path
0,7d26262a-996c-4e4e-a0e6-a36496487a98,"Firehouse Cheeseburgers with Fried Onions, Gar...",10 min,35 min,1000,Ground Beef,10 oz (20 oz),/Users/rmartinshort/Documents/DS_projects/gemm...
1,7d26262a-996c-4e4e-a0e6-a36496487a98,"Firehouse Cheeseburgers with Fried Onions, Gar...",10 min,35 min,1000,Cheese,1 ½ Cup,/Users/rmartinshort/Documents/DS_projects/gemm...
2,7d26262a-996c-4e4e-a0e6-a36496487a98,"Firehouse Cheeseburgers with Fried Onions, Gar...",10 min,35 min,1000,Potato Buns,2 ½ (4 oz),/Users/rmartinshort/Documents/DS_projects/gemm...
3,7d26262a-996c-4e4e-a0e6-a36496487a98,"Firehouse Cheeseburgers with Fried Onions, Gar...",10 min,35 min,1000,Potato Wedges,1 ½ Cups,/Users/rmartinshort/Documents/DS_projects/gemm...
4,7d26262a-996c-4e4e-a0e6-a36496487a98,"Firehouse Cheeseburgers with Fried Onions, Gar...",10 min,35 min,1000,Creamy Ranch Dressing,1 ½ tbsp,/Users/rmartinshort/Documents/DS_projects/gemm...
...,...,...,...,...,...,...,...,...
5,da32202f-8607-4050-8fd5-f280b76bf6c6,Pork Katsu with Sesame Roasted Carrots & Ginge...,10 min,35 min,940,Soy Sauce,4 TBSP (6 TBSP),/Users/rmartinshort/Documents/DS_projects/gemm...
6,da32202f-8607-4050-8fd5-f280b76bf6c6,Pork Katsu with Sesame Roasted Carrots & Ginge...,10 min,35 min,940,Sour Cream,1/4 TSP (3 TBSP),/Users/rmartinshort/Documents/DS_projects/gemm...
7,da32202f-8607-4050-8fd5-f280b76bf6c6,Pork Katsu with Sesame Roasted Carrots & Ginge...,10 min,35 min,940,Jasmine Rice,1/2 Cup (1 Cup),/Users/rmartinshort/Documents/DS_projects/gemm...
8,da32202f-8607-4050-8fd5-f280b76bf6c6,Pork Katsu with Sesame Roasted Carrots & Ginge...,10 min,35 min,940,Panko Breadcrumbs,1 Cup (2 Cups),/Users/rmartinshort/Documents/DS_projects/gemm...
