<div id="singlestore-header" style="display: flex; background-color: rgba(235, 249, 245, 0.25); padding: 5px;">
    <div id="icon-image" style="width: 90px; height: 90px;">
        <img width="100%" height="100%" src="https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/browser.png" />
    </div>
    <div id="text" style="padding: 5px; margin-left: 10px;">
        <div id="badge" style="display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%">SingleStore Notebooks</div>
        <h1 style="font-weight: 500; margin: 8px 0 0 4px;">Python UDF to process images</h1>
    </div>
</div>

<div class="alert alert-block alert-info">
    <b class="fa fa-solid fa-info-circle"></b>
    <div>
        <p><b>Note</b></p>
        <p>This tutorial is meant for Standard & Premium Workspaces. You can't run this with a Free Starter Workspace due to restrictions on Storage. Create a Workspace using +group in the left nav & select Standard for this notebook. Gallery notebooks tagged with "Starter" are suitable to run on a Free Starter Workspace </p>
    </div>
</div>

In [1]:
# Install and Import Required Libraries
!pip install -q langchain==0.3.27 langchain-openai==0.2.10 langchain-community==0.3.25 langchain-core==0.3.72 pillow==10.4.0 aiofiles==24.1.0

In [2]:
# Import necessary modules
import base64
import aiohttp
import io

from singlestoredb.functions import udf
import singlestoredb.apps as apps
from singlestoredb.management import get_secret
from openai import AsyncOpenAI
from PIL import Image

# Configuration for the Multimodal LLM ( Replace with the Path and Auth API key of your model )
INFERENCE_API_MODEL_NAME_1 = "gpt-4-1"
INFERENCE_API_MODEL_API_ENDPOINT_1 = "https://ai.us-east-1.cloud.singlestore.com/b45ad4a4-a620-4ed0-9ffc-76d83ebf9bc7/v1"
# Secrets are the recommended way to manage sensitive information like API keys for use within SingleStore Notebooks
INFERENCE_API_MODEL_API_AUTH_1 = get_secret('GPT_4_1')

# Setup AsyncOpenAI Client
async_llm_client = AsyncOpenAI(
    api_key=INFERENCE_API_MODEL_API_AUTH_1,
    base_url=INFERENCE_API_MODEL_API_ENDPOINT_1
)

async def download_image_from_url(image_url: str, max_size: int = 1024) -> str:
    """
    Download image from URL and encode to base64 for GPT-4o Vision API.

    Parameters
    ----------
    image_url : str
        URL of the image to download
    max_size : int, optional
        Maximum size to resize image to save on API costs, by default 1024

    Returns
    -------
    str
        Base64 encoded image string

    Raises
    ------
    Exception
        If image download fails or processing encounters an error
    """
    try:
        # Set proper headers to avoid 403 errors
        headers = {
            'User-Agent': 'SingleStore-ImageAnalyzer/1.0 (Product-Manager-Bot; https://singlestore.com)',
            'Accept': 'image/jpeg,image/png,image/webp,image/*,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'DNT': '1',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
        }

        # Configure timeout settings
        timeout = aiohttp.ClientTimeout(total=30)

        # Download image from URL with proper headers
        async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
            async with session.get(image_url) as response:
                if response.status != 200:
                    raise Exception(f"Failed to download image: HTTP {response.status}")

                image_data = await response.read()

        # Process image with PIL
        with Image.open(io.BytesIO(image_data)) as img:
            # Convert to RGB if needed
            if img.mode in ("RGBA", "P"):
                img = img.convert("RGB")

            # Resize if too large to save on API costs
            if max(img.size) > max_size:
                img.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)

            # Convert to base64
            buffer = io.BytesIO()
            img.save(buffer, format='JPEG', quality=85)
            return base64.b64encode(buffer.getvalue()).decode('utf-8')

    except Exception as e:
        raise Exception(f"Error processing image from URL {image_url}: {str(e)}")

@udf
async def AI_IMG_COMPLETE(image_url: str, prompt: str) -> str:
    """
    Process image from URL with GPT-4o Vision and return analysis.

    Parameters
    ----------
    image_url : str
        URL of the image to analyze
    prompt : str
        Text prompt for the vision model

    Returns
    -------
    str
        String output from GPT-4o vision analysis

    Notes
    -----
    This function downloads an image from the provided URL, processes it through
    a multimodal LLM (GPT-4o Vision), and returns the AI's analysis based on
    the given prompt.
    """

    try:
        # Download and encode image
        base64_image = await download_image_from_url(image_url)

        # Create vision request
        messages = [{
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}",
                        "detail": "high"
                    }
                }
            ]
        }]

        # Call GPT-4o Vision API
        response = await async_llm_client.chat.completions.create(
            model=INFERENCE_API_MODEL_NAME_1,
            messages=messages,
            max_tokens=500,
            temperature=0.2
        )

        return response.choices[0].message.content

    except Exception as e:
        error_msg = f"Error processing image analysis: {str(e)}"
        return error_msg

In [3]:
# Start Python UDF server
connection_info = await apps.run_udf_app()

Python UDF registered at https://apps.us-east-1.cloud.singlestore.com/pythonudfs/ebbb37c3-66f7-415a-aa78-c3599fbcc191/interactive/


## Publish the Notebook as a UDF


1. Click on the publish button at the top right of your notebook editor window to publish these functions in

## Now open a  *NEW* SQL Editor and run the following commands to test the functioning of the UDFs

## SQL Commands
```
SHOW functions;
```

### Example 1: Analyze a sample image with AI_IMG_COMPLETE

```
SELECT AI_IMG_COMPLETE(
    'https://upload.wikimedia.org/wikipedia/commons/b/b6/Mount_Everest_as_seen_from_Drukair2_PLW_edit_Cropped.jpg',
    'which country do you think this image is from? Answer in one word'
) as analysis_result;
```

### Example 2: Multiple images ( Better if done in batches)

```
SELECT
    image_url,
    AI_IMG_COMPLETE(image_url, 'describe the main subject matter in 15 words or less') as description
FROM (
    SELECT 'https://upload.wikimedia.org/wikipedia/commons/4/41/A_Man_on_the_Moon%2C_AS11-40-5903_%28cropped%29.jpg' as image_url
    UNION ALL
    SELECT 'https://upload.wikimedia.org/wikipedia/commons/4/4c/Series-N700a-Mt.Fuji.jpg'
    UNION ALL
    SELECT 'https://upload.wikimedia.org/wikipedia/commons/b/b6/Mount_Everest_as_seen_from_Drukair2_PLW_edit_Cropped.jpg'
) as test_images;
```

<div id="singlestore-footer" style="background-color: rgba(194, 193, 199, 0.25); height:2px; margin-bottom:10px"></div>
<div><img src="https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/singlestore-logo-grey.png" style="padding: 0px; margin: 0px; height: 24px"/></div>