In [1]:
import asyncio
import base64
import os
from io import BytesIO

import httpx
import orjson
import polars as pl
from dotenv import load_dotenv
from PIL import Image

load_dotenv()

True

In [2]:
assert os.getenv("OPENROUTER_API_KEY"), "OpenRouter API key is not defined in .env."
model = "google/gemini-2.5-flash-lite-preview-06-17"


API_URL = "https://openrouter.ai/api/v1/chat/completions"

headers = {
    "Authorization": f"Bearer {os.getenv('OPENROUTER_API_KEY')}",
    "Content-Type": "application/json",
}

Test Barack Obama. [Image source](https://openverse.org/image/0d5242d2-8838-47a0-88ab-a3ab59a5f75f?q=barack+obama&p=1)


In [3]:
img = Image.open("470562794_2472fada41_b.jpg")
img.size

(1024, 768)

Resize image such that its maximum size is 768, since that's what Gemini is based upon.


In [4]:
def resize_image_maintain_aspect(img, max_size=768):
    """
    Resize an image so that its maximum dimension (width or height) is max_size
    while maintaining the aspect ratio.
    """

    # Get current dimensions
    width, height = img.size

    # Calculate the scaling factor
    if width > height:
        # Width is the larger dimension
        scale_factor = max_size / width
    else:
        # Height is the larger dimension
        scale_factor = max_size / height

    # Calculate new dimensions
    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)

    # Resize the image
    resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

    # resized_img.save("test.png")
    return resized_img


def img_to_base64_str(img):
    img = resize_image_maintain_aspect(img)

    buffered = BytesIO()
    img.save(buffered, format="PNG")
    img_base64 = base64.b64encode(buffered.getvalue())
    img_base64_str = img_base64.decode("utf-8")
    return img_base64_str


In [5]:
img_base64_str = img_to_base64_str(img)
img_base64_str[0:100]

'iVBORw0KGgoAAAANSUhEUgAAAwAAAAJACAIAAAC1zJYBAAEAAElEQVR4nHz9+a9tW3odhs1+rbX3ae69771qXrGqyCJVYqkXTVES'

In [12]:
system = "Identify all the people in this image. In your response, do not include any text other than the names."

params = {
    "model": model,
    # "model": "openai/gpt-4o",
    "messages": [
        {"role": "system", "content": system},
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/png;base64,{img_base64_str}"},
                }
            ],
        },
    ],
    "temperature": 0.0,
}

r = httpx.post(url=API_URL, headers=headers, data=orjson.dumps(params))

print(orjson.dumps(r.json(), option=orjson.OPT_INDENT_2).decode("utf-8"))

{
  "id": "gen-1753077007-qS7EGCg9LVJwiGGrfvWl",
  "provider": "Google AI Studio",
  "model": "google/gemini-2.5-flash-lite-preview-06-17",
  "object": "chat.completion",
  "created": 1753077008,
  "choices": [
    {
      "logprobs": null,
      "finish_reason": "stop",
      "native_finish_reason": "STOP",
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Barack Obama",
        "refusal": null,
        "reasoning": null
      }
    }
  ],
  "usage": {
    "prompt_tokens": 282,
    "completion_tokens": 3,
    "total_tokens": 285
  }
}


## Use async to Parallelize Calls to Different Models.

Define as a list of key-values where the key is the humanized form of the model for better reporting.


In [13]:
model_list = [
    {"Gemini 2.5 Flash Lite": "google/gemini-2.5-flash-lite-preview-06-17"},
    {"Claude Sonnet 4": "anthropic/claude-sonnet-4"},
    {"GPT-4.1 Mini": "openai/gpt-4.1-mini"},
    {"Grok 4": "x-ai/grok-4"},
    {"Llama 4 Scout": "meta-llama/llama-4-scout"},
    {"Mistral 3.2": "mistralai/mistral-small-3.2-24b-instruct"},
    {"Qwen 3.5-VL": "qwen/qwen2.5-vl-72b-instruct"},
    # {"MiniMax - 01": "minimax/minimax-01"},
    {"GLM 4.1V": "thudm/glm-4.1v-9b-thinking"},
    # {"Amazon Nova Pro": "amazon/nova-pro-v1"},
]

In [14]:
async def query_image_async(model_kv, client, system, img_base64_str):
    model_name, model_openrouter = list(model_kv.items())[0]
    params = {
        "model": model_openrouter,
        "messages": [
            {"role": "system", "content": system},
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/png;base64,{img_base64_str}"},
                    }
                ],
            },
        ],
        "temperature": 0.0,
    }

    r = await client.post(
        url=API_URL, headers=headers, data=orjson.dumps(params), timeout=60.0
    )
    try:
        return {
            "model": model_name,
            "response": r.json()["choices"][0]["message"]["content"],
        }
    except Exception as e:
        print(r.json())


async def query_models_async(model_list, client, system, img_base64_str):
    queries = [
        query_image_async(model, client, system, img_base64_str) for model in model_list
    ]

    results = await asyncio.gather(*queries)
    return results

In [15]:
client = httpx.AsyncClient()

results = await query_models_async(model_list, client, system, img_base64_str)

with pl.Config() as cfg:
    cfg.set_tbl_formatting("ASCII_MARKDOWN")
    cfg.set_fmt_str_lengths(10**5)
    cfg.set_tbl_width_chars(-1)
    cfg.set_tbl_hide_column_data_types(True)
    cfg.set_tbl_hide_dataframe_shape(True)

    print(pl.from_dicts(results))

| model                 | response                                                                                                                                                                                                                                                                                                 |
|-----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Gemini 2.5 Flash Lite | Barack Obama                                                                                                                                                                                                                                                                                             |
| Claude Sonnet 4       |