In [96]:
import openai
import base64
import requests
import json

In [97]:
from keys import API_KEY

openai.api_key = API_KEY

#### Reusable Code

In [98]:
messages = []

In [99]:
def _openai() -> str:
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=300,
    )
    return response.choices[0].message.content

In [100]:
def request(image_url: str, promt: str) -> str:
    messages.append(
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": promt
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": image_url,
                    },
                },
            ],
        }
    )
    return _openai()

In [101]:
def extract(image_url: str) -> str:
    promt = """
        Identify a social situation observable in this picture. 
        Please respond with a description of the social situation, 
        the people involved, including their activities and what they
        look like. Format your response as a JSON object. Besides 
        the general descriptions of the social situation, the 
        resulting JSON object should contain an attribute named
        persons containing a list of person object. Make sure a
        that every person object not only contains information 
        about their activity, but also everything observable that 
        can be used to recognise the same person in another picture.
        Your response should not be longer than 250 tokens.
    """
    return request(image_url, promt)

In [102]:
def recognize(image_url: str, person_description: str) -> str:
    promt = f"""
        Analyse this picture and decide if you can detect a person that corresponds
        to the following description: {person_description}. If the person is present, 
        provide additional details about their activity in this picture.
    """
    return request(image_url, promt)

In [94]:
def load_json_and_remove_lines(json_str: str):
    if isinstance(json_str, dict):
        json_str = json.dumps(json_str)
    # Split the string into lines
    lines = json_str.splitlines()
    
    # Remove the first and last line
    if lines and "json" in lines[0].lower():
        # Remove the first and last line
        lines = lines[1:-1]
    
    # Join the remaining lines back into a string
    json_data = '\n'.join(lines)
    
    # Load the JSON data
    json_obj = json.loads(json_data)
    
    return json_obj

#### Application

In [103]:
social_situation = extract("https://chunntguet.xyz/pics/eliott-reyna-5KrZ3UoDKC4-unsplash.jpg")
social_situation = load_json_and_remove_lines(social_situation)
person_tobe_detected = social_situation["persons"][0]
person_tobe_detected = load_json_and_remove_lines(person_tobe_detected)
recognition = recognize("https://chunntguet.xyz/pics/eliott-reyna-5KrZ3UoDKC4-unsplash.jpg", person_tobe_detected)
print(recognition)

The person corresponding to the provided description is present in the picture. Below is a JSON object with additional details about this social situation and the activities of the people involved:

```json
{
  "socialSituation": "A group of friends are interacting and having a conversation outdoors near a chain-link fence. The person on the left is actively showing or explaining something using hand gestures to the other two individuals.",
  "persons": [
    {
      "activity": "Showing or explaining something with hand gestures.",
      "appearance": {
        "gender": "male",
        "hair": "short, dark, and curly",
        "facial_hair": "beard",
        "clothing": "black and white plaid shirt with long sleeves, jeans"
      }
    },
    {
      "activity": "Listening and smiling.",
      "appearance": {
        "gender": "female",
        "hair": "long, straight, and dark brown",
        "clothing": "dark sweater with a V shape that combines dark-purple, white, and green.",
   

In [63]:
print(social_situation[1][1])


```json
{
  "description": "A group of five friends interacting near a fence with a graffiti-covered train in the background. Activities include chatting, standing, and sitting.",
  "persons": [
    {
      "id": 1,
      "activity": "standing",
      "description": "Young man wearing a light blue denim jacket, white t-shirt, black jeans, and white sneakers. He is leaning against the fence."
    },
    {
      "id": 2,
      "activity": "sitting",
      "description": "Young man wearing a brown jacket, blue jeans, and white sneakers. He is sitting on the curb."
    },
    {
      "id": 3,
      "activity": "standing",
      "description": "Young man wearing a black and white plaid shirt, black jeans, and black sneakers. He is standing with his arms crossed."
    },
    {
      "id": 4,
      "activity": "standing",
      "description": "Young woman wearing a green sweater with a white and pink chevron pattern and blue jeans. She is standing and smiling."
    },
    {
      "id": 5,
   

In [107]:
import openai
from io import BytesIO
from PIL import Image
import base64

# Load the local image file
image_path = "bild1.jpg"
image = Image.open(image_path)

# Convert the image to bytes and encode it as base64
buffer = BytesIO()
image.save(buffer, format="JPEG")
image_bytes = buffer.getvalue()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")

response = openai.chat.completions.create(
  model="gpt-4o",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text", "text": "What’s in this image?"},
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{image_base64}",
          },
        },
      ],
    }
  ],
  max_tokens=200,
)

print(response.choices[0].message.content)

This image shows a group of five young people gathered near a fence. Four individuals are standing, and one is seated on the ground. Behind the fence, there is a graffiti-covered train. The group appears to be engaged in conversation and seem to be casually dressed.
