In [1]:
from azure_authentication.customized_azure_login import CredentialFactory
from openai import AzureOpenAI
import os
from pydantic import BaseModel, Field, conint, confloat
import pandas as pd

In [2]:
import openai

In [3]:
from typing import List, Optional, Literal

In [4]:
api_version="2024-10-21"   #options: "2024-10-21" , "2024-02-01"  
api_base= "https://greendia-oai.openai.azure.com/"
deployment_name = "gpt-4o-mini"

In [5]:
class GRZ(BaseModel):
    value: Optional[float] = Field(
        None,
        description="Der numerische Wert der Grundflächenzahl oder 'null', falls nicht vorhanden.",
        example=0.75
    )

class GFZ(BaseModel):
    value: Optional[float] = Field(
        None,
        description="Der numerische Wert der Geschoßflächenzahl oder 'null', falls nicht vorhanden.",
        example=1.0
    )

class BuildingMetrics(BaseModel):
    
    grz: Optional[GRZ] = Field(None, description="Grundflächenzahl (GRZ)")
    
    gfz: Optional[GFZ] = Field(None, description="Geschoßflächenzahl (GFZ)")

In [6]:
parser_tool = [openai.pydantic_function_tool(GFZ)]


In [7]:
client = AzureOpenAI(
    api_key=os.environ["KEY_GPT_4O_MINI"],  
    api_version=api_version,
    base_url=f"{api_base}/openai/deployments/{deployment_name}"
)

In [8]:
import base64
from mimetypes import guess_type
from PIL import Image
from io import BytesIO

# Function to encode a local image into a compressed data URL 
def local_image_to_data_url(image_path, quality=85, max_size=(800, 800)):
    # Guess the MIME type of the image based on the file extension
    mime_type, _ = guess_type(image_path)
    if mime_type is None:
        mime_type = 'application/octet-stream'  # Default MIME type if none is found

    # Open the image using Pillow
    with Image.open(image_path) as img:
        ## Resize image if it's larger than max_size (optional)
        #img.thumbnail(max_size)

        # Save the image to a buffer in JPEG format (compress it)
        buffered = BytesIO()
        img.save(buffered, format="JPEG", quality=quality)  # Adjust quality for compression

        # Get the base64-encoded compressed image
        base64_encoded_data = base64.b64encode(buffered.getvalue()).decode('utf-8')

    # Construct the data URL
    return f"data:{mime_type};base64,{base64_encoded_data}"


In [9]:
def calculate_llm_calling_price(model_name, 
                                input_tokens, 
                                output_tokens):
    """
    Cost calculator
    based on prices from https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/#pricing
    """

    if model_name == "gpt-4o-mini":
        return input_tokens / 1000 * 0.15 + output_tokens / 1000 * 0.6
    else:
        return -1.0

In [10]:
data_urls = {
    "807" : {"link": "https://raw.githubusercontent.com/ldmnch/bavaria-building-plans-digitalization/refs/heads/main/data/proc/building_plans_sample/test_images/807_0.jpg"},
    "695" : {"link": "https://raw.githubusercontent.com/ldmnch/bavaria-building-plans-digitalization/refs/heads/main/data/proc/building_plans_sample/test_images/695_0.jpg?raw=true"},
    "904" : {"link": "https://raw.githubusercontent.com/ldmnch/bavaria-building-plans-digitalization/refs/heads/main/data/proc/building_plans_sample/test_images/904_0.jpg?raw=true"}
}

In [11]:
for key, value in data_urls.items():
    print(f"{key}: {value['link']}")

807: https://raw.githubusercontent.com/ldmnch/bavaria-building-plans-digitalization/refs/heads/main/data/proc/building_plans_sample/test_images/807_0.jpg
695: https://raw.githubusercontent.com/ldmnch/bavaria-building-plans-digitalization/refs/heads/main/data/proc/building_plans_sample/test_images/695_0.jpg?raw=true
904: https://raw.githubusercontent.com/ldmnch/bavaria-building-plans-digitalization/refs/heads/main/data/proc/building_plans_sample/test_images/904_0.jpg?raw=true


In [38]:
results = pd.DataFrame(columns=["id", "output", "prompt_tokens", "completion_tokens", "price"])

for key, value in data_urls.items():
    response = client.beta.chat.completions.parse(
            model=deployment_name,
            messages=[
                { 
                    "role": "system", 
                    "content": "Sie sind ein hilfreicher Umwelt-Stadtplaner, der einen Bauplan analysiert." 
                },
                { 
                    "role": "user",
                    "content": 
                    "Basierend auf dem unten bereitgestellten Bild eines Gebäudeplans möchten wir folgende Informationen extrahieren.\n"
                    "1. Maximale Geschoßflächenzahl (GFZ).\n"
                    "2. Maximale Grundflächenzahl (GRZ).\n",
                },
                {
                    "role": "user",
                    "content": value['link']  
                }
            ],
            response_format=BuildingMetrics,
            max_tokens=2000
    )

    result = response.choices[0].message.parsed 
    prompt_tokens = response.usage.prompt_tokens
    completion_tokens = response.usage.completion_tokens

    price = calculate_llm_calling_price(deployment_name, prompt_tokens, completion_tokens)

    row = pd.DataFrame({"id": key, "output": result, "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "price": price})

    results = pd.concat([results, row])

  results = pd.concat([results, row])


In [34]:
response.choices[0].message.parsed

BuildingMetrics(grz=GRZ(value=0.75), gfz=GFZ(value=1.0))

In [37]:
row

Unnamed: 0,id,output,prompt_tokens,completion_tokens,price
0,904,"(grz, value=0.75)",359,19,0.06525
1,904,"(gfz, value=1.0)",359,19,0.06525


In [40]:
results.reset_index(drop=True, inplace=True)

In [41]:
results

Unnamed: 0,id,output,prompt_tokens,completion_tokens,price
0,807,"(grz, value=0.5)",356,19,0.0648
1,807,"(gfz, value=1.2)",356,19,0.0648
2,695,"(grz, value=0.3)",359,19,0.06525
3,695,"(gfz, value=1.5)",359,19,0.06525
4,904,"(grz, value=0.4)",359,19,0.06525
5,904,"(gfz, value=1.2)",359,19,0.06525
