In [20]:
import os
import base64
import requests
import pandas as pd
from dotenv import load_dotenv
import ast
from tqdm.notebook import tqdm
import time

In [21]:
load_dotenv()

OPENAI_API = os.getenv("OPENAI_API_KEY") 
CLAUDE_API = os.getenv("CLAUDE_API_KEY")
# OPENAI_API

In [22]:
image_path = "../dataset/images"
csv_path = "../annotation/relevants/all/test_data.csv"

test_df = pd.read_csv(csv_path)
test_df.head()

Unnamed: 0,filename,question,options,answer,category,id
0,CHITTRON_7834.png,চিত্রের শিশুটি কোন ধরনের শিক্ষা গ্রহণ করছে?,"['চারুকলা', 'গান', 'একাডেমিক', 'নৃত্য']",নৃত্য,Arts & History,F3CH7834
1,CHITTRON_3642.png,বাংলার মর্মান্তিক ইতিহাস বহনকারী এই স্তম্ভটির ...,"['স্মৃতিসৌধ', 'শহীদ মিনার', 'রাজু ভাস্কর্য', '...",শহীদ মিনার,Arts & History,P9CH3642
2,BORNON_215.jpg,ছবিতে দেখা মুদ্রাটি কোন দেশের?,"['বাংলাদেশ', 'শ্রীলঙ্কা', 'ভারত', 'পাকিস্তান']",বাংলাদেশ,Arts & History,P5BO215
3,BNATURE_3915.jpg,ছবিতে মেয়েটি মাইক্রোফোনটি হাতে ধরে কী করছে?,"['গান গাচ্ছে', 'নাচছে', 'কথা বলছে', 'পড়াশোনা ...",গান গাচ্ছে,Arts & History,P5BN3915
4,CHITTRON_496.png,প্রাচীনকালে বাংলায় এমন বড় আকৃতির মাটির পাত্র ক...,"['ধান-চাল সংরক্ষনে', 'পানি সংরক্ষনে', 'কাপড় সং...",ধান-চাল সংরক্ষনে,Arts & History,P1CH496


In [23]:
# test_df = test_df[0:1]

In [6]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')        

In [7]:
def prompt_prep(question:str, options:list):
    return """

You are an expert multimodal AI assistant. You will be given an image as context. Based on this image, you will be asked a multiple-choice question with four options.  
Your task is to select the most accurate answer from the given options.  

### **Instructions:**  
- The question and options will be presented in the following format:  
    - **question:** QUESTION  
    - **options:**  
        1. option_a  
        2. option_b  
        3. option_c  
        4. option_d  

- Your response should **only contain the number** corresponding to the correct answer.  
  - Example:  
    - If **option 1 is correct**, respond with `"answer: 1"`  
    - If **option 3 is correct**, respond with `"answer: 3"`  

Now, answer the following question based on the provided image:  

**question:** {}  
**options:**  
1. {}  
2. {}  
3. {}  
4. {}  
    """.format(question, options[0], options[1],options[2], options[3])

### openai

In [17]:
def generate(question: str, image: base64, options:list):

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API}"
    }
    payload = {
        
        "model": "gpt-4o-mini",
        "messages": [
            {
                "role": "user",
                "content": [
                    
                    {"type": "text", "text": prompt_prep(question, options)},
                    {"type": "image_url", "image_url": {"url": 
                        f"data:image/jpeg;base64,{image}"}}
                ]
            }
        ],
        "max_tokens": 30
    }  
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    
    return response.json() #['choices'][0]['message']['content']


In [18]:
def generate_answer(item):
    image = encode_image(f"{image_path}/{item['filename']}")
    question = item["question"]
    options = ast.literal_eval(item["options"])
    response = generate(question, image, options)
    print(response['choices'][0]['message']['content'])
    return response['choices'][0]['message']['content']

In [19]:
test_df["openai_response"] = test_df.apply(generate_answer, axis=1)

answer: 4


In [16]:
test_df

Unnamed: 0,filename,question,options,answer,category,id,openai_response2
0,CHITTRON_7834.png,চিত্রের শিশুটি কোন ধরনের শিক্ষা গ্রহণ করছে?,"['চারুকলা', 'গান', 'একাডেমিক', 'নৃত্য']",নৃত্য,Arts & History,F3CH7834,answer: 2


### claude

In [9]:
import anthropic

client = anthropic.Anthropic(api_key=CLAUDE_API)

message = client.messages.create(
    model="claude-3-5-sonnet-20241022",
    max_tokens=1000,
    temperature=0,
    system="You are a world-class poet. Respond only with short poems.",
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Why is the ocean salty?"
                }
            ]
        }
    ]
)
print(message.content[0].text)


Tears of ancient earth,
Minerals dissolved in time,
Rivers carry salt to sea -
Nature's briny paradigm.

Eons pass, waves roll on,
Each drop holds earth's history,
While creatures swim in crystal depths
Of this eternal salty sea.


In [10]:
message

Message(id='msg_01HFEizqfpzYNy36qTxNJc7D', content=[TextBlock(citations=None, text="Tears of ancient earth,\nMinerals dissolved in time,\nRivers carry salt to sea -\nNature's briny paradigm.\n\nEons pass, waves roll on,\nEach drop holds earth's history,\nWhile creatures swim in crystal depths\nOf this eternal salty sea.", type='text')], model='claude-3-5-sonnet-20241022', role='assistant', stop_reason='end_turn', stop_sequence=None, type='message', usage=Usage(cache_creation_input_tokens=0, cache_read_input_tokens=0, input_tokens=29, output_tokens=64))

In [14]:

message.usage.input_tokens + message.usage.output_tokens

93