In [1]:
from openai import OpenAI
import anthropic
import google.generativeai as genai

import PIL.Image
import base64
import requests
import httpx
import os
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
system_text = ''
user_text = ''
df = ''


In [4]:
chat_gpt = OpenAI()
claude = anthropic.Anthropic()
genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
gemini = genai.GenerativeModel('gemini-pro-vision')

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [5]:
def convert_png_to_jpeg(png_path, jpeg_path):
    img = Image.open(png_path)
    rgb_img = img.convert('RGB')
    rgb_img.save(jpeg_path, 'JPEG')
    
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def lanuage_models(system_text, user_text, image, max_tokens=1024):
    """
    Generates responses from multiple language models based on the given system text, user text, and image.

    Args:
        system_text (str): The system text to provide context for the conversation.
        user_text (str): The user text representing the user's input.
        image (str): The path to the image file.
        max_tokens (int, optional): The maximum number of tokens to generate in the response. Defaults to 1024.

    Returns:
        tuple: A tuple containing the responses from different language models.
            - chat_gpt_response (str): The response generated by the OpenAI GPT-4 Turbo model.
            - claude_response (str): The response generated by the Anthropic Claude model.
            - gemini_response (str): The response generated by the Google Gemini model.
    """
    convert_png_to_jpeg(image+'.png', image+'.jpeg')
    
    base64_image = encode_image(image+'.jpeg')
    pil_image = PIL.Image.open(image+'.jpeg')
    
    # openai
    response = chat_gpt.chat.completions.create(
        model="gpt-4-turbo",
        max_tokens=max_tokens,
        messages=[
            {"role": "system", "content": system_text},
            {"role": "user", "content": [
                {
                "type": "text",
                "text": user_text
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                }
                }]}],
            
    )
    
    chat_gpt_response = response.choices[0]
    
    # anthropic
    response = claude.messages.create(
        model="claude-3-opus-20240229",
        max_tokens=max_tokens,
        system=system_text,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": "image/jpeg",
                            "data": base64_image,
                        },
                    },
                    {
                        "type": "text",
                        "text": user_text
                    }
                ],
            }
        ],
    )
    
    claude_response = response.content
    
    # google
    response = gemini.generate_content([system_text+ ' ' +user_text, pil_image], stream=True)
    gemini_response = response.text
    
    return chat_gpt_response, claude_response, gemini_response
   

In [None]:
results = pd.DataFrame(columns=['id', 'correct_response' 'chat_gpt_response', 'claude_response', 'gemini_response'])

for id in range(1, 3):
    if df['case'][df['id'] == id].notna().all(): 
        user_text = df['case'][df['id'] == id].values[0] + df['question'][df['id'] == id].values[0]
    else:
        user_text = df['question'][df['id'] == id].values[0]
        
    chat_gpt_response, claude_response, gemini_response = lanuage_models(system_text, user_text, f'data/images/Amboss_SURG_images_{id:02}', max_tokens=1024)
    
    results.append({'id': id, 'correct_response': df['correct_response'][df['id'] == id].values[0], 'chat_gpt_response': chat_gpt_response, 'claude_response': claude_response, 'gemini_response': gemini_response}, ignore_index=True)
    

In [None]:
results = pd.DataFrame(columns=['id', 'correct_response' 'chat_gpt_response', 'claude_response', 'gemini_response'])

for id in df['id']:
    if df['case'][df['id'] == id].notna().all(): 
        user_text = df['case'][df['id'] == id].values[0] + df['question'][df['id'] == id].values[0]
    else:
        user_text = df['question'][df['id'] == id].values[0]
        
    chat_gpt_response, claude_response, gemini_response = lanuage_models(system_text, user_text, f'data/images/Amboss_SURG_images_{id:02}', max_tokens=1024)
    
    results.append({'id': id, 'correct_response': df['correct_response'][df['id'] == id].values[0], 'chat_gpt_response': chat_gpt_response, 'claude_response': claude_response, 'gemini_response': gemini_response}, ignore_index=True)
    