## Run Gemini with structured output to get baseline results

In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import os
from hellofresh_extractor.llm.StructuredGeminiCaller import StructuredGeminiCaller
from hellofresh_extractor.llm.utils import convert_structured_result_to_df
from hellofresh_extractor.llm.prompts import multimodal_system_prompt, multimodal_user_query
from hellofresh_extractor.llm.output_schemas import ExtractedMeal
from hellofresh_extractor.gsuite.drive.GoogleDriveHelper import GoogleDriveHelper
import glob
from PIL import Image
from pillow_heif import register_heif_opener
from dotenv import load_dotenv
import time
import pandas as pd

In [18]:
master_folder_name = "Recipes"
drive_helper = GoogleDriveHelper(f"{master_folder_name}")

In [19]:
top_level_drive_folder_id = drive_helper.get_folder_id()

In [20]:
load_dotenv()

True

In [21]:
this_path = os.getcwd()
images_path = os.path.join(this_path,"images")

In [22]:
images = glob.glob(os.path.join(images_path,"*.HEIC"))

In [23]:
gemini_caller = StructuredGeminiCaller(
    api_key=os.environ.get("GEMINI_API_KEY"),
    model="gemini-2.5-flash-preview-04-17"
)

In [24]:
len(images)

50

In [25]:
all_meals = []
register_heif_opener()
t0 = time.time()
for i, image in enumerate(images):
    print("*"*30)
    print(f"At image {i}")
    open_image = Image.open(image).convert("RGB")
    result = gemini_caller.invoke(
        system_message=multimodal_system_prompt,
        input_content=[open_image, multimodal_user_query],
        output_schema=ExtractedMeal
    )

    if "structured_data" in result:
        analysis = result["structured_data"]
        df = convert_structured_result_to_df(analysis)
        df["image_path"] = image
        all_meals.append(df)
    else:
        print(f"Structured data field not found for result {result}")
t1 = time.time()
mean_process_time = (t1 - t0)/len(images)

******************************
At image 0
******************************
At image 1
******************************
At image 2
******************************
At image 3
******************************
At image 4
******************************
At image 5
******************************
At image 6
******************************
At image 7
******************************
At image 8
******************************
At image 9
******************************
At image 10
******************************
At image 11
******************************
At image 12
******************************
At image 13
******************************
At image 14
******************************
At image 15
******************************
At image 16
******************************
At image 17
******************************
At image 18
******************************
At image 19
Error parsing structured output: 1 validation error for ExtractedMeal
  Invalid JSON: EOF while parsing a value at line 1 column 0 [type=json_invalid,

In [26]:
mean_process_time

2.9893046951293947

In [27]:
all_meals = pd.concat(all_meals)

In [29]:
all_meals.to_csv("test_hello_fresh_recipes_gemini.csv",index=False)

In [30]:
drive_helper.upload_csv_file(file_path="test_hello_fresh_recipes_gemini.csv",parent_folder_id=top_level_drive_folder_id)

Uploaded 'test_hello_fresh_recipes_gemini.csv' to Google Drive (ID: 13BUbs2JTdNDGo66lwkYj9atJlnay_Df2)


{'id': '13BUbs2JTdNDGo66lwkYj9atJlnay_Df2',
 'name': 'test_hello_fresh_recipes_gemini.csv'}