In [1]:
import pandas as pd
import os
import numpy as np

import anthropic
from openai import OpenAI

import base64
import httpx
import requests
import json
import time

from dotenv import load_dotenv, find_dotenv
from IPython.display import Image,display

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 500)

BLUE = '\033[94m'
END = '\033[0m' 

load_dotenv(find_dotenv())

True

In [2]:
openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
claude_client = anthropic.Anthropic(api_key=os.environ["CLAUDE_KEY"])

In [3]:
image_url_1 = 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/13/Red_square_Moscow_cityscape_%288309148721%29.jpg/1200px-Red_square_Moscow_cityscape_%288309148721%29.jpg'
image_url_2 = 'https://images.squarespace-cdn.com/content/v1/5b3fc87b45776e887e966e82/ea4b3853-7558-4048-8051-d986e71ec39d/beagle-on-meadow-2021-08-26-15-58-22-utc.jpg'

In [4]:
prompt_1 = '''
        CONTEXT:
        For the given place on the image provide me with historical background information about the location,
        main attractions of interest at the location and outline the pricing information for visiting the location.
        Do not include disclaimer.
        Descriptions should have a dry tone.
        
        OUTPUT:
        After determining historical background, main attractions, and pricing information output them in JSON format. 
        For example:
        [
        {{ historical background : ['historical fact 1', 
                                    'historical fact 2', 
                                     ..., 
                                    'historical fact n'],
        main attractions : ['main attractions 1', 
                            'main attractions 2', 
                             ..., 
                            'main attractions n'],
        pricing :['pricing for 1', 
                  'pricing for 2', 
                   ..., 
                   'pricing for n']
        }}
        ] 

        '''


prompt_2 = '''
        CONTEXT:
        For the givem image of the animal provide me animal name and give a brief overview of the animal, including its species, habitat, behaviors, and any interesting facts
        Do not include disclaimer.
        Descriptions should have a dry tone.
        
        OUTPUT:
        After determining animal name and brief overview of the animal output them in JSON format. 
        For example:
        [
        {{ 'animal name' : ['animal name'],
        'overview' : ['overview 1', 
                      'overview 2', 
                       ..., 
                      'overview n'],
        }}
        ] 

        '''

In [5]:
def gpt_description(image_url, prompt):
    response = openai_client.chat.completions.create(
      model="gpt-4-vision-preview",
      messages=[
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt
            },
            {
              "type": "image_url",
              "image_url": {
                  "url": image_url,
                  "detail": "low"
              }
            }
          ]
        }
      ],
      temperature=0,
      max_tokens=2500,
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0
    )
    return response


def opus_description(image_url, prompt):    
    image_media_type = "image/jpeg"
    image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
    
    response = claude_client.messages.create(
      model="claude-3-opus-20240229",
      messages=[
        {
          "role": "user",
          "content": [
            {
            "type": "image",
                  "source": {
                      "type": "base64",
                      "media_type": image_media_type,
                      "data": image_data,
                  },
            },
            {
              "type": "text",
              "text": prompt
            }
              ],
            }
          ],
      temperature=0,
      max_tokens=2500,
      top_p=1,
    )
    return response


def sonnet_description(image_url, prompt):
    image_media_type = "image/jpeg"
    image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
    
    response = claude_client.messages.create(
      model="claude-3-sonnet-20240229",
      messages=[
        {
          "role": "user",
          "content": [
            {
            "type": "image",
                  "source": {
                      "type": "base64",
                      "media_type": image_media_type,
                      "data": image_data,
                  },
            },
            {
              "type": "text",
              "text": prompt
            }
              ],
            }
          ],
      temperature=0,
      max_tokens=2500,
      top_p=1,
    )
    return response



def haiku_description(image_url, prompt):
    image_media_type = "image/jpeg"
    image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
    
    response = claude_client.messages.create(
      model="claude-3-haiku-20240307",
      messages=[
        {
          "role": "user",
          "content": [
            {
            "type": "image",
                  "source": {
                      "type": "base64",
                      "media_type": image_media_type,
                      "data": image_data,
                  },
            },
            {
              "type": "text",
              "text": prompt
            }
              ],
            }
          ],
      temperature=0,
      max_tokens=2500,
      top_p=1,
    )
    return response

In [6]:
def cost_gpt(total, prompt_tokens, completion_tokens):
    return (((total * prompt_tokens)/1000000) * 10) + (((total * completion_tokens)/1000000) * 30)

def cost_opus(total, input_token, output_token):
  return(((total * input_token)/1000000) * 15) + (((total * output_token)/1000000) * 75)

def cost_sonnet(total, input_token, output_token):
  return(((total * input_token)/1000000) * 3) + (((total * output_token)/1000000) * 15)

def cost_haiku(total, input_token, output_token):
  return(((total * input_token)/1000000) * 0.25) + (((total * output_token)/1000000) * 1.25)

In [7]:
display(Image(url=image_url_1,width=450))

print('\n')
print(BLUE + 'GPT-4-1106-preview' + END, '\n')
st = time.time()
result_gpt = gpt_description(image_url_1, prompt_1)
print('Time: ', round(time.time()-st, 2),'seconds','\n')
print('Number of input tokens:', result_gpt.usage.prompt_tokens)
print('Number of output tokens:', result_gpt.usage.completion_tokens, '\n')
print('Cost for 200k images: ', f'${cost_gpt(100000,result_gpt.usage.prompt_tokens,result_gpt.usage.completion_tokens)} $')
print('Cost per image: ', f'${cost_gpt(1,result_gpt.usage.prompt_tokens,result_gpt.usage.completion_tokens)} $','\n')
for l in result_gpt.choices[0].message.content.split('\n'):
        print(l)


print('\n')
print(BLUE + 'Opus' + END, '\n')
st = time.time()
result_opus = opus_description(image_url_1, prompt_1)
print('Time: ', round(time.time()-st, 2),'seconds','\n')
print('Number of input tokens:', result_opus.usage.input_tokens)
print('Number of output tokens:', result_opus.usage.output_tokens, '\n')
print('Cost for 200k images: 'f'{cost_opus(100000,result_opus.usage.input_tokens,result_opus.usage.output_tokens)} $')
print('Cost per image: ', f'{cost_opus(1,result_opus.usage.input_tokens,result_opus.usage.output_tokens)} $','\n')
for l in result_opus.content[0].text.split('\n'):
        print(l)


print('\n')
print(BLUE + 'Sonnet' + END, '\n')
st = time.time()
result_sonnet = sonnet_description(image_url_1, prompt_1)
print('Time: ', round(time.time()-st, 2),'seconds','\n')
print('Number of input tokens:', result_sonnet.usage.input_tokens)
print('Number of output tokens:', result_sonnet.usage.output_tokens, '\n')
print('Cost for 200k images: 'f'{cost_sonnet(100000,result_sonnet.usage.input_tokens,result_sonnet.usage.output_tokens)} $')
print('Cost per image: ', f'{cost_sonnet(1,result_sonnet.usage.input_tokens,result_sonnet.usage.output_tokens)} $','\n')
for l in result_sonnet.content[0].text.split('\n'):
        print(l)



print('\n')
print(BLUE + 'Haiku' + END, '\n')
st = time.time()
result_haiku = haiku_description(image_url_1, prompt_1)
print('Time: ', round(time.time()-st, 2),'seconds','\n')
print('Number of input tokens:', result_haiku.usage.input_tokens)
print('Number of output tokens:', result_haiku.usage.output_tokens, '\n')
print('Cost for 200k images: 'f'{cost_haiku(100000,result_haiku.usage.input_tokens,result_haiku.usage.output_tokens)} $')
print('Cost per image: ', f'{cost_haiku(1,result_haiku.usage.input_tokens,result_haiku.usage.output_tokens)} $','\n')
for l in result_haiku.content[0].text.split('\n'):
        print(l)



[94mGPT-4-1106-preview[0m 

Time:  25.02 seconds 

Number of input tokens: 269
Number of output tokens: 354 

Cost for 200k images:  $1331.0 $
Cost per image:  $0.013309999999999999 $ 

[
    {
        "historical background": [
            "The image shows Red Square in Moscow, which has been a central place in Russian history since the 15th century.",
            "It is surrounded by significant buildings, including the Kremlin, St. Basil's Cathedral, the State Historical Museum, and the GUM department store.",
            "Red Square has been the site of numerous important events in Russian history, including coronations, parades, and political demonstrations."
        ],
        "main attractions": [
            "St. Basil's Cathedral, known for its colorful, onion-shaped domes and its position as a symbol of Russian architecture.",
            "The Kremlin, a historic fortified complex that includes palaces, cathedrals, and the official residence of the President of Russia.",


In [8]:
display(Image(url=image_url_2,width=450))

print('\n')
print(BLUE + 'GPT-4-1106-preview' + END, '\n')
st = time.time()
result_gpt = gpt_description(image_url_2, prompt_2)
print('Time: ', round(time.time()-st, 2),'seconds','\n')
print('Number of input tokens:', result_gpt.usage.prompt_tokens)
print('Number of output tokens:', result_gpt.usage.completion_tokens, '\n')
print('Cost for 200k images: ', f'${cost_gpt(100000,result_gpt.usage.prompt_tokens,result_gpt.usage.completion_tokens)} $')
print('Cost per image: ', f'${cost_gpt(1,result_gpt.usage.prompt_tokens,result_gpt.usage.completion_tokens)} $','\n')
for l in result_gpt.choices[0].message.content.split('\n'):
        print(l)


print('\n')
print(BLUE + 'Opus' + END, '\n')
st = time.time()
result_opus = opus_description(image_url_2, prompt_2)
print('Time: ', round(time.time()-st, 2),'seconds','\n')
print('Number of input tokens:', result_opus.usage.input_tokens)
print('Number of output tokens:', result_opus.usage.output_tokens, '\n')
print('Cost for 200k images: 'f'{cost_opus(100000,result_opus.usage.input_tokens,result_opus.usage.output_tokens)} $')
print('Cost per image: ', f'{cost_opus(1,result_opus.usage.input_tokens,result_opus.usage.output_tokens)} $','\n')
for l in result_opus.content[0].text.split('\n'):
        print(l)


print('\n')
print(BLUE + 'Sonnet' + END, '\n')
st = time.time()
result_sonnet = sonnet_description(image_url_2, prompt_2)
print('Time: ', round(time.time()-st, 2),'seconds','\n')
print('Number of input tokens:', result_sonnet.usage.input_tokens)
print('Number of output tokens:', result_sonnet.usage.output_tokens, '\n')
print('Cost for 200k images: 'f'{cost_sonnet(100000,result_sonnet.usage.input_tokens,result_sonnet.usage.output_tokens)} $')
print('Cost per image: ', f'{cost_sonnet(1,result_sonnet.usage.input_tokens,result_sonnet.usage.output_tokens)} $','\n')
for l in result_sonnet.content[0].text.split('\n'):
        print(l)



print('\n')
print(BLUE + 'Haiku' + END, '\n')
st = time.time()
result_haiku = haiku_description(image_url_2, prompt_2)
print('Time: ', round(time.time()-st, 2),'seconds','\n')
print('Number of input tokens:', result_haiku.usage.input_tokens)
print('Number of output tokens:', result_haiku.usage.output_tokens, '\n')
print('Cost for 200k images: 'f'{cost_haiku(100000,result_haiku.usage.input_tokens,result_haiku.usage.output_tokens)} $')
print('Cost per image: ', f'{cost_haiku(1,result_haiku.usage.input_tokens,result_haiku.usage.output_tokens)} $','\n')
for l in result_haiku.content[0].text.split('\n'):
        print(l)



[94mGPT-4-1106-preview[0m 

Time:  9.72 seconds 

Number of input tokens: 218
Number of output tokens: 165 

Cost for 200k images:  $713.0 $
Cost per image:  $0.007129999999999999 $ 

[
{
"animal name": ["Beagle"],
"overview": [
"The Beagle is a breed of small hound that is similar in appearance to the much larger foxhound.",
"Beagles are scent hounds, developed primarily for tracking hare, rabbit, and other game.",
"They are known for their keen sense of smell and tracking instinct.",
"Beagles are intelligent, and are popular as pets because of their size, even temper, and lack of inherited health problems.",
"These dogs are also used as detection dogs in quarantine around the world due to their acute sense of smell.",
"Beagles are active companions for kids and adults alike.",
"Their habitats include temperate climates and domestic settings.",
"Beagles were originally bred for hunting purposes and have a strong instinct to follow scents."
]
}
]


[94mOpus[0m 

Time:  17.22 seco