In [2]:
import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True, dotenv_path="../.env")
my_api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(api_key=my_api_key)

import base64

image_path1 = "data/invoice.png" 
image_path2 = "data/office_lease.png"

In [None]:
# Encode image as base64
with open(image_path1, "rb") as f:
    image1_base64 = base64.b64encode(f.read()).decode("utf-8")
# Code explanation:
    # Converts the binary data into a Base64-encoded string.
    # Base64 is a way of representing binary data (like images) using only text characters (Aâ€“Z, aâ€“z, 0â€“9, +, /).
    # This is necessary because APIs (like OpenAIâ€™s) can only transmit text, not raw binary files.

mime_type = "image/png"

#Request extraction from image
response = client.chat.completions.create(
    model="gpt-5-nano",
    messages=[
        {
            "role": "system",
            "content": "You extract and summarize information from invoices or forms."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Extract as many fields, such as Invoice from/Company, Invoice, Invoice information - number, data, due date etc, Invoice product list from this image as a JSON object:"},
                {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image1_base64}"}}
            ]
        }
    ],
)

print("ðŸ§¾ Extracted Info from Image:\n")
print(response.choices[0].message.content)

In [None]:
# Encode image as base64
with open(image_path2, "rb") as f:
    image2_base64 = base64.b64encode(f.read()).decode("utf-8")

mime_type = "image/png"

# Request extraction from image
response = client.chat.completions.create(
    model="gpt-5-nano",
    messages=[
        {
            "role": "system",
            "content": "Extract and summarize information from this property for sale flyer."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Extract key fields from this image, such as property title, details etc as a JSON object:"},
                {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image2_base64}"}}
            ]
        }
    ],
)

print("ðŸ§¾ Extracted Info from Image:\n")
print(response.choices[0].message.content)

In [5]:
import os

image_folder = "data2"
image_files = [f for f in os.listdir(image_folder) if f.lower().endswith('.jpg')]

for image_file in image_files:
    image_path = os.path.join(image_folder, image_file)
    with open(image_path, "rb") as f:
        image_base64 = base64.b64encode(f.read()).decode("utf-8")
    
    mime_type = "image/jpeg"
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": "You extract all text and information from images, especially presentation slides as a text. Ignore footers"
            },
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": f"Extract all text, titles, content, and any structured information from this slide image as a JSON object. Include slide number if visible: {image_file}"},
                    {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_base64}"}}
                ]
            }
        ],
    )
    
    print(f"ðŸ“Š Extracted Info from {image_file}:\n")
    print(response.choices[0].message.content)
    print("\n" + "="*50 + "\n")

ðŸ“Š Extracted Info from Slide4.JPG:

```json
{
  "slide_number": "4",
  "title": "Service charges â€” Mobile Hall (hall-only)",
  "content": {
    "description": "Rates include: seating set-up (160â€“180), power/AV as offered, standard set-up + breakdown. Delivery is priced separately (next slide).",
    "rates": [
      {
        "tier": "Conservative",
        "hourly_rate": "$650/hr",
        "daily_rate": "$5,000/day",
        "notes": "Community events / weekday bookings"
      },
      {
        "tier": "Middle",
        "hourly_rate": "$800/hr",
        "daily_rate": "$6,250/day",
        "notes": "Most weekends + standard demand"
      },
      {
        "tier": "High",
        "hourly_rate": "$1,000/hr",
        "daily_rate": "$7,750/day",
        "notes": "Peak Saturdays / premium setups"
      }
    ],
    "included_adjustments": [
      "Extra hours (after day cap): same hourly tier rate, billed in 30-minute increments",
      "Multi-day: 15â€“25% discount on days 2+ (beca