## Calculate Queue Wait Time

### Goal

Uses image analysis of in-store video to estimate current wait time for new arrivals.

### Set initial values

In [None]:
import os
from dotenv import load_dotenv

load_dotenv(override=True) # take environment variables from .env.

azure_openai_endpoint='https://mrai.openai.azure.com/'
azure_openai_api_version='2024-05-01-preview'
azure_openai_chat_deployment='gpt-4o-mini'
azure_openai_key = os.environ.get("AZURE_OPENAI_KEY") 

framesPerHour = 60
compressionPercent = 50
video_path = '../Data/waiting in line.mp4'
avgServiceTimeInSec = 30
serviceStations = 1

sysPrompt = f"""
Your job is to determine the number of people in the provided image. Respond with just a single number indicating the number of people in the image.
"""

### Acquire video frame

Extract single frames from the video every *3600/framesPerHour* seconds and save them as jpg files.

In [22]:
import cv2
import os

def extract_frames(video_path, output_folder, interval):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    # Get the frames per second (fps) of the video
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(fps * interval)

    frame_count = 0
    while True:
        # Set the position of the next frame to capture
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count * frame_interval)
        # Read the next frame
        success, frame = cap.read()
        # If the frame was not successfully read, we've reached the end of the video
        if not success:
            break
        frame_filename = os.path.join(output_folder, f'frame_{frame_count}.jpg')
        cv2.imwrite(frame_filename, frame, [cv2.IMWRITE_JPEG_QUALITY, compressionPercent])
        frame_count += 1
    cap.release()
    print(f"Extracted {frame_count} frames from the video.")

output_folder = '../Data/frames'
extract_frames(video_path, output_folder, 3600/framesPerHour)

Extracted 1 frames from the video.


### Create AI client

**NOT USED**

In [None]:
from openai import AzureOpenAI
import base64

# from azure.identity import DefaultAzureCredential, get_bearer_token_provider

# openai_credential = DefaultAzureCredential()
# token_provider = get_bearer_token_provider(openai_credential, "https://cognitiveservices.azure.com/.default")

client = AzureOpenAI(
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key,
    #azure_ad_token_provider=azure_openai_key
)

### Get people count

Call gpt4o-mini to get count of people in a frame

In [25]:
import os  
import requests  
import base64  
from azure.identity import ClientSecretCredential  
  
# Configuration  
TENANT_ID = "YOUR_TENANT_ID"  
CLIENT_ID = "YOUR_CLIENT_ID"  
CLIENT_SECRET = "YOUR_CLIENT_SECRET"  
RESOURCE = "https://management.azure.com/.default"  
IMAGE_PATH = "../Data/frames/frame_0.jpg"
  
# Authenticate and get token  
# credential = ClientSecretCredential(tenant_id=TENANT_ID, client_id=CLIENT_ID, client_secret=CLIENT_SECRET)  
# token = credential.get_token(RESOURCE).token  
  
encoded_image = base64.b64encode(open(IMAGE_PATH, 'rb').read()).decode('ascii')  
  
headers = {  
    "Content-Type": "application/json",  
    #"Authorization": f"Bearer {token}"  
    "api-key": azure_openai_key
}  
  
# Payload for the request  
payload = {
  "messages": [
    {
      "role": "system",
      "content": [
        {
          "type": "text",
          "text": sysPrompt
        }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "\n"
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{encoded_image}"
          }
        },
        {
          "type": "text",
          "text": "\n"
        }
      ]
    }
  ],
  "temperature": 0.7,
  "top_p": 0.95,
  "max_tokens": 800,
  #"response_format": "ResponseFormatJsonObject"
}  
ENDPOINT = "https://mrai.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-02-15-preview"  
  
# Send request  
try:  
    response = requests.post(ENDPOINT, headers=headers, json=payload)  
    response.raise_for_status()  # Will raise an HTTPError if the HTTP request returned an unsuccessful status code  
except requests.RequestException as e:  
    raise SystemExit(f"Failed to make the request. Error: {e}")  
  
print(f"People in line: {response.json()['choices'][0]['message']['content']}")
print(f"Input  tokens: {response.json()['usage']['prompt_tokens']}")
print(f"Output tokens: {response.json()['usage']['completion_tokens']}")

People in line: 6
Input  tokens: 36878
Output tokens: 1


### Using OpenAI class

**NOT USED**

In [None]:
image_path = "../Data/frames/frame_0.jpg"
encoded_image = base64.b64encode(open(image_path, 'rb').read()).decode('ascii')  

response = client.chat.completions.create(
    model=azure_openai_chat_deployment,
    messages=[
        {"role": "system", "content": sysPrompt},
        {
            "role": "user",
            "content": [
                {
                "type": "text",
                "text": "\n"
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{encoded_image}"
                }
                },
                {
                "type": "text",
                "text": "\n"
                }
            ]
        }
    ]
)
print(response.choices[0].message)

ChatCompletionMessage(content='0', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None)


### Cost

Based on [pricing per this doc](https://techcommunity.microsoft.com/blog/azure-ai-services-blog/openai%E2%80%99s-gpt-4o-mini-now-available-in-api-with-vision-and-fine-tuning-text-capab/4200640)

In [20]:
costPer1KInput = 0.00015
costPer1KOutput = 0.0006
inputTokensPerFrame = 40000
outputTokensPerFrame = 10

costPerHour = framesPerHour*(costPer1KInput*inputTokensPerFrame*compressionPercent/100 + costPer1KOutput*outputTokensPerFrame)/1000
# Compression makes the image smaller but does not change number of input tokens. Why?
costPerHour = framesPerHour*(costPer1KInput*inputTokensPerFrame + costPer1KOutput*outputTokensPerFrame)/1000
print(f"The cost per store per hour is: ${costPerHour:.2f}")


The cost per store per hour is: $0.36
