In [1]:
import pandas as pd
import openpyxl
import openpyxl.drawing.image
import base64
import json

def extract_images_from_xlsx(file_path):
    workbook = openpyxl.load_workbook(filename=file_path)
    images = []

    for sheet in workbook.worksheets:
        for drawing in sheet._images:
            if isinstance(drawing, openpyxl.drawing.image.Image):
                item = f"data:image/{drawing.format};base64,{base64.b64encode(drawing.ref.getvalue()).decode()}"
                images.append(item)

    return images

def read_xls_with_images(file_path):
    df = pd.read_excel(file_path, keep_default_na=False)
    images = extract_images_from_xlsx(file_path)
    assert len(df) == len(images)
    df["image"] = images
    return df

In [2]:
df = read_xls_with_images('data.xlsx')

In [3]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

client = openai.OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_BASE_URL"),
)

In [4]:
def describe(b64image):
    response = client.chat.completions.create(
        model="gpt-4-vision-preview",
        messages=[{
            "role": "user",
            "content": [
                {"type": "text", "text":""},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": b64image,
                        "detail": "low",
                    }
                }
            ]
        }],
        max_tokens=300,
    )
    return response

In [5]:
response = describe(df.loc[2, "image"])

In [6]:
print(json.dumps(json.loads(response.model_dump_json()), indent=2))

{
  "id": "chatcmpl-8Msd4ZKZBW067tXzTIdf2icb6ht1c",
  "choices": [
    {
      "finish_reason": null,
      "index": 0,
      "message": {
        "content": "You've shared an image of a pair of light-colored shoes placed on a floor. The shoes appear to be neatly positioned side by side, facing up, with their laces tied. It's a simple image that showcases the shoes quite prominently, potentially for the purpose of sale, evaluation, or to share with someone. If you have any specific questions about these shoes or if there's anything else you'd like to know or say about them, feel free to ask!",
        "role": "assistant",
        "function_call": null,
        "tool_calls": null
      },
      "finish_details": {
        "type": "stop",
        "stop": "<|fim_suffix|>"
      }
    }
  ],
  "created": 1700463902,
  "model": "gpt-4-1106-vision-preview",
  "object": "chat.completion",
  "system_fingerprint": null,
  "usage": {
    "completion_tokens": 94,
    "prompt_tokens": 92,
    "tot