In [27]:
from typing import List
import openai
import instructor
from pydantic import BaseModel, Field
import base64
from dotenv import load_dotenv

load_dotenv(dotenv_path="../.env")  # take environment variables from ../.env.

client = instructor.patch(openai.OpenAI(), mode=instructor.Mode.MD_JSON)


class ImageAnalysis(BaseModel):
    title: str = Field(
        ...,
        description="The title of the image.",
    )

    description: str = Field(
        ...,
        description="The description of the image. Include any summary that can help someone find the image in a database.",
    )
    features: List[str] = Field(
        ...,
        description="A list of objects that are present in the image.",
    )


def analyse_image(img) -> ImageAnalysis:
    print("Processing image: ", img)
    with open(img, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
    image_url = f"data:image/jpeg;base64,{encoded_string}"
    resp = client.chat.completions.create(
        model="gpt-4-vision-preview",
        max_tokens=4096,
        max_retries=2,
        response_model=ImageAnalysis,
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": """
                You are a world-class analyst tasked with analyzing aerial photos.
                Your goal is to identify the objects in the image and provide a detailed analysis of the image.
                Include any features that can enhance understanding of the image.""",
            },
            {
                "role": "user",
                "content": [
                    f"Return a detailed analysis of the image, adhering to the structure defined by {ImageAnalysis.model_json_schema()}",
                    *[{"type": "image_url", "image_url": image_url}],
                ],
            },
        ],
    )
    return resp

In [28]:
processed_image = analyse_image("img2.jpg")
import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(processed_image.model_dump())

Processing image:  img2.jpg
{   'description': 'The image is an aerial photograph of a parking lot with '
                   'various parked cars. The cars are arranged in rows with a '
                   'mix of colors and models. The parking lot surface is a '
                   'light gray with visible tire marks. The image can be used '
                   'to analyze parking patterns or for urban planning studies.',
    'features': ['parking lot', 'parked cars', 'tire marks', 'aerial view'],
    'title': 'Aerial View of a Parking Lot'}


In [31]:
import json
print(json.dumps(processed_image.model_dump(), indent=4))



{
    "title": "Aerial View of a Parking Lot",
    "description": "The image is an aerial photograph of a parking lot with various parked cars. The cars are arranged in rows with a mix of colors and models. The parking lot surface is a light gray with visible tire marks. The image can be used to analyze parking patterns or for urban planning studies.",
    "features": [
        "parking lot",
        "parked cars",
        "tire marks",
        "aerial view"
    ]
}


{   'description': 'The image captures an aerial view of a beach scene with a '
                   'wooden pier extending into the sea. People are seen '
                   'swimming in the water and lounging on the beach with '
                   'various beach accessories.',
    'objects': [   'beach',
                   'pier',
                   'people swimming',
                   'people lounging',
                   'sea',
                   'beach umbrellas',
                   'beach towels',
                   'beach bags'],
    'title': 'Aerial View of a Beach Pier'}


In [32]:
processed_image = analyse_image("img3.jpg")
import json
print(json.dumps(processed_image.model_dump(), indent=4))




Processing image:  img3.jpg
{
    "title": "Aerial View of a Beach Pier",
    "description": "The image is an aerial shot of a sandy beach with a wooden pier extending into the sea. People are visible on the beach and in the water, some swimming and others lounging on beach towels or under umbrellas. The water transitions from a clear turquoise near the sand to a deeper blue further out.",
    "features": [
        "sandy beach",
        "wooden pier",
        "people swimming",
        "people lounging",
        "beach towels",
        "beach umbrellas",
        "turquoise water",
        "deep blue sea"
    ]
}
