In [1]:

import requests
import base64
from common.custom_types import ImageData
from typing import Tuple, Optional


def fetch_image_as_base64(url: str) -> Optional[Tuple[bytes, str]]:
    """
    Fetches an image from a URL and returns its base64 content and MIME type.

    This function sends a GET request to the provided URL, retrieves the image
    content, and encodes it into a base64 string. It also extracts the
    'Content-Type' header to determine the image's MIME type.

    Args:
        url: The URL of the image to fetch.

    Returns:
        A tuple containing the base64-encoded string of the image and its
        MIME type (e.g., 'image/jpeg').
        Returns None if the request fails, the URL is invalid, or the
        content is not an image.
    """
    # Make the HTTP request to get the image.
    # Set a timeout to avoid hanging indefinitely.
    response = requests.get(url, stream=True, timeout=10)

    # Raise an exception for bad status codes (4xx or 5xx).
    response.raise_for_status()

    # Check the Content-Type header to ensure it's an image.
    mime_type = response.headers.get("Content-Type")
    if not mime_type or not mime_type.startswith("image/"):
        print(f"Warning: URL does not point to an image. MIME type: {mime_type}")
        return None

    # Get the raw binary content of the image.
    image_bytes = response.content

    # Encode the bytes into a base64 string.
    base64_content = base64.b64encode(image_bytes).decode('utf-8')

    return (base64_content, mime_type)


# dummy_uri = "https://lh3.googleusercontent.com/gps-cs-s/AC9h4nolu0ktuCp6Iv2xYf862rZitPbloquBOiu53uSwa16PBf8QvFpHicv9KG9oLv1ToCzh7EnQcIQoL7RCYwkx2El97LO3V_TQ8qa4LMz7UtVUU-Tk1qW2lHB8G6bl0yULK2QruLX-hw=w1080"
# dummy_uri = "https://lh3.googleusercontent.com/gps-cs-s/AC9h4no1uCHfrVSWZ8wF0xORLVeQqBL9-7Nlw0JmTQzMsV17MKHXAXM_58i-P8OMqc_-4ZZyDe8xD5pIYSXiReG0OKZkwLJ4l7_zK-yBTTrAaaMW1Qz5cu0In2Kr-MlgfZu-3XteZafF=w1080"
# dummy_uri = "https://lh3.googleusercontent.com/gps-cs-s/AC9h4nptYsYx_z-xeV4_tOC7K7-WK8QVgHFvXaJq0LOlAE4NeMzzcZ-nQa41Ce_LfaUot4CVF6QhA5bEPLzJv8cakoXeHV9yNYn2jgfEHX3cfpBXGaa7CmCJHfbzd-PG7YCmjMBv5cnkGg=w1080"
dummy_uri = "https://lh3.googleusercontent.com/gps-cs-s/AC9h4nrcNsZt5FksLGx03CtA87lhNsHZVkyXm2xpmnBNtxlM1KrmlQivVfEuiUFzhK_rPG0ycmGjZdFUtFCvyvj4gIa7XXGXC6qMe7VwdVwmQjUz02erXFUpZeqbNkctBfEfRtd9k_h2=w1080"

bb, mime = fetch_image_as_base64(dummy_uri)
dummy_img = ImageData(base64=bb, mime_type=mime)
dummy_img

ImageData(base64=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xe1\x00hExif\x00\x00II*\x00\x08\x00\x00\x00\x02\x001\x01\x02\x00\x07\x00\x00\x00&\x00\x00\x00i\x87\x04\x00\x01\x00\x00\x00.\x00\x00\x00\x00\x00\x00\x00Picasa\x00\x00\x02\x00\x00\x90\x07\x00\x04\x00\x00\x000220\x03\x90\x02\x00\x14\x00\x00\x00L\x00\x00\x00\x00\x00\x00\x002024:04:22 14:52:24\x00\xff\xe2\x0b\xf8ICC_PROFILE\x00\x01\x01\x00\x00\x0b\xe8\x00\x00\x00\x00\x02\x00\x00\x00mntrRGB XYZ \x07\xd9\x00\x03\x00\x1b\x00\x15\x00$\x00\x1facsp\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-\x00\x00\x00\x00)\xf8=\xde\xaf\xf2U\xaexB\xfa\xe4\xca\x839\r\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10desc\x00\x00\x01D\x00\x00\x00ybXYZ\x00\x00\x01\xc0\x00\x00\x00\x14bTRC\x00\x00\x01\xd4\x00\x00\x08\x0cdmdd\x00\x00\t\xe0\x

In [None]:
from menu_reader import read_menu

res = await read_menu(dummy_img)

text_annotations {
  locale: "en"
  description: "croffles\ncraffle (noun)\ncantades stuter,\nforest cafe menu\na hybrid mix of a croissant and affla\nteat, milk, soy may contain eye ata, sesame\nplain 4\nsogar glaze\nforest cafe menu\nsignature drink\nFOREST\nespresso\nLATTE\norganic maple syrup cinnamon milk\nour signature flavor\ncoffee & specialty drinks\nve ceston roast our own (arabical beans\nbrewed coffee-selise roast; notes of chocolate, almond, cherry\nexpress medios to dark roast; notes of cinnamon & brown sagar\ndecaf espresso medium to dark roast; notes of caramel\nbrewed coffee\ncold brew\nespresso\namericano\nmacchiato (traditional)\ncortado\ncappuccino\nflat white\nlatte\nmocha latte\nlavender latte\nvanilla latte\nube latte (purple yan)\nbeing and\nHOT\nICED\n6.00\n6.50\ntry foresting\nyour drink\n+1.25\n3.25\n4.75\n3.25\n3.75\n3.56\n4.08\n3.75\n4.25\n4.75\n4.15\n4.75\n5.50\n5.75\n6.25\n5.75\n6.25\n5.75\n6.25\n5.75\n6.25\n?\n6.25\n5.75\n6.25\n7.25\n7.75\ngrab\nsalted c

In [2]:
res

NameError: name 'res' is not defined

In [None]:
import pickle

if False: 
    with open("menu_dat.pkl", "wb") as file:
        pickle.dump(res, file, protocol=pickle.HIGHEST_PROTOCOL)

In [3]:
import pickle


with open("menu_dat.pkl", "rb") as file:
    loaded_data = pickle.load(file)

loaded_data
res = loaded_data

In [4]:
res

MenuData(sections=[MenuSection(section='signature drink', description='', items=[MenuItem(name='FOREST LATTE', description='espresso + organic maple syrup + cinnamon + milk. our signature flavor.', symbols=[], contains=None)]), MenuSection(section='coffee & specialty drinks', description='we custom roast our own (arabica) beans. brewed coffee - medium roast; notes of chocolate, almond, cherry. cold brew - medium to dark roast; notes of cinnamon & brown sugar. decaf espresso - medium to dark roast; notes of caramel.', items=[MenuItem(name='brewed coffee', description='we custom roast our own (arabica) beans. brewed coffee - medium roast; notes of chocolate, almond, cherry. cold brew - medium to dark roast; notes of cinnamon & brown sugar. decaf espresso - medium to dark roast; notes of caramel.', symbols=[], contains=None), MenuItem(name='cold brew', description='we custom roast our own (arabica) beans. brewed coffee - medium roast; notes of chocolate, almond, cherry. cold brew - medium

In [5]:
from allergy_detector import detect_allergen, detect_allergens
from common.custom_types import SupportedAllergen

# detection = await detect_allergens(res, [SupportedAllergen.GLUTEN, SupportedAllergen.SHELLFISH])
detection = await detect_allergens(res, [SupportedAllergen.GLUTEN, SupportedAllergen.SHELLFISH])

START RESPONSE TEXT
{
  "sections": [
    {
      "section": "signature drink",
      "description": "",
      "items": [
        {
          "name": "FOREST LATTE",
          "description": "espresso + organic maple syrup + cinnamon + milk. our signature flavor.",
          "symbols": [],
          "contains": [
            {
              "allergen": "shellfish",
              "prediction": "VERY_UNLIKELY",
              "explanation": "The ingredients listed are naturally shellfish-free, and there is no indication of cross-contamination risk."
            }
          ]
        }
      ]
    },
    {
      "section": "coffee & specialty drinks",
      "description": "we custom roast our own (arabica) beans. brewed coffee - medium roast; notes of chocolate, almond, cherry. cold brew - medium to dark roast; notes of cinnamon & brown sugar. decaf espresso - medium to dark roast; notes of caramel.",
      "items": [
        {
          "name": "brewed coffee",
          "description": "w

In [6]:
import pickle

if False:
    with open("shellfish_dat.pkl", "wb") as file:
        pickle.dump(detection, file, protocol=pickle.HIGHEST_PROTOCOL)

if True:
    with open("glu_and_shel_dat.pkl", "wb") as file:
        pickle.dump(detection, file, protocol=pickle.HIGHEST_PROTOCOL)


In [13]:
import pickle

with open("glu_and_shel_dat.pkl", "rb") as file:
    loaded_data = pickle.load(file)

detection = loaded_data

In [14]:
detection



In [15]:
from allergy_detector import aggregate_allergies

agg = aggregate_allergies(detection)

<class 'common.custom_types.Menu.LabeledAllergenMenu'>
<class 'common.custom_types.Menu.LabeledAllergenMenu'>
<class 'common.custom_types.Menu.MenuSection'>
<class 'common.custom_types.Menu.LabeledAllergenMenu'>
<class 'common.custom_types.Menu.MenuSection'>
<class 'common.custom_types.Menu.LabeledAllergenMenu'>
<class 'common.custom_types.Menu.MenuSection'>
<class 'common.custom_types.Menu.LabeledAllergenMenu'>
<class 'common.custom_types.Menu.MenuSection'>
<class 'common.custom_types.Menu.LabeledAllergenMenu'>
<class 'common.custom_types.Menu.MenuSection'>
<class 'common.custom_types.Menu.LabeledAllergenMenu'>
<class 'common.custom_types.Menu.MenuSection'>
<class 'common.custom_types.Menu.LabeledAllergenMenu'>
<class 'common.custom_types.Menu.MenuSection'>


In [17]:
print(agg.model_dump())



# Non-Example


In [None]:
# Non example
non_example = "https://businesssignsandmore.com/cdn/shop/files/salon-menu-board-price-list-341568.jpg?v=1742056416&width=1445"
bb, mime = fetch_image_as_base64(non_example)
non_example_img = ImageData(base64=bb, mime_type=mime)
non_example_img

ImageData(base64=b'\xff\xd8\xff\xe1\x00\xbcExif\x00\x00II*\x00\x08\x00\x00\x00\x06\x00\x12\x01\x03\x00\x01\x00\x00\x00\x01\x00\x00\x00\x1a\x01\x05\x00\x01\x00\x00\x00V\x00\x00\x00\x1b\x01\x05\x00\x01\x00\x00\x00^\x00\x00\x00(\x01\x03\x00\x01\x00\x00\x00\x02\x00\x00\x00\x13\x02\x03\x00\x01\x00\x00\x00\x01\x00\x00\x00i\x87\x04\x00\x01\x00\x00\x00f\x00\x00\x00\x00\x00\x00\x00H\x00\x00\x00\x01\x00\x00\x00H\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x90\x07\x00\x04\x00\x00\x000210\x01\x91\x07\x00\x04\x00\x00\x00\x01\x02\x03\x00\x00\xa0\x07\x00\x04\x00\x00\x000100\x01\xa0\x03\x00\x01\x00\x00\x00\xff\xff\x00\x00\x02\xa0\x04\x00\x01\x00\x00\x00\xa5\x05\x00\x00\x03\xa0\x04\x00\x01\x00\x00\x00\xb4\x04\x00\x00\x00\x00\x00\x00\xff\xe2\x01\xb8ICC_PROFILE\x00\x01\x01\x00\x00\x01\xa8lcms\x02\x10\x00\x00mntrRGB XYZ \x07\xdc\x00\x01\x00\x19\x00\x03\x00)\x009acspAPPL\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-l

In [None]:
from menu_reader import read_menu

res = read_menu(non_example_img)

text_annotations {
  locale: "en"
  description: "Single Sessions\nMENU\nMonthly Unlimited\nBuy 5 Get 2 Free\nRegular Bed\n$7.70\nRegular Bed\n$44\nRegular Bed\n$38.50\nRegular Bed with Facial\n$8.80\nRegular Bed with Facials\n$49.50\nRegular Bed with Facial\n$44\nSolaris Level 2\n$12.10\nSolaris Level 2\n$66\nSolaris Level 2\n$60.50\nSolar Force Level 3\n$15.40\nSolar Force Level 3\nSolar Force Level 3\n$77\n$77\nSunScape Level 4\n$19.80\nSunscape Level 4\n$99\nSunscape Level 4\n$99\nRed Light Therapy\n$25.00\nRed Light Therapy\nMystic Spray Tan\n$178\n$99\nHaloSauna\n$25.00\n*2 months\n$150\nBuy Minutes Get Minutes\nMystic Spray Tan\n$35.60+\n*Add-On to any monthly pkg\n$20\nBuy 50 get 20 FREE\n$77\nHaloSauna\n$70\nBuy 75 get 30 FREE\n$115.50\nWeekly Sessions\n*Add-on HaloSauna to any Monthly\n$20\nBuy 100 get 40 FREE\n$154\nMystic Spray Tan (4 max)\n$99\nHaloSauna Unlimited\n$50\nBuy 150 get 70 FREE\n$231\nAll Access UV+RLT+Halo\n$170\nHybrid UV+RLT Unlimited\n$50\nBuy 200 get 100 F

Failed to structure menu with reason: NOT_RESTAURANT_MENU


{
    "error": "NOT_RESTAURANT_MENU"
}


In [None]:
res

MenuData(sections=[MenuSection(section='INVALID_MENU', description='NOT_RESTAURANT_MENU', items=[])])