In [2]:
import os
import PIL.Image as Image
import json
import ast
import pandas as pd
import google.generativeai as genai
import random
import time
import tqdm

from dotenv import load_dotenv

In [3]:
load_dotenv()

GEMINI_API = os.getenv("GOOGLE_API")

In [4]:
image_path = "../dataset/images"
csv_path = "outputs/test_data_claude.csv"

test_df = pd.read_csv(csv_path)
test_df.head()

Unnamed: 0,filename,question,options,answer,category,id,openai_response
0,CHITTRON_7834.png,চিত্রের শিশুটি কোন ধরনের শিক্ষা গ্রহণ করছে?,"['চারুকলা', 'গান', 'একাডেমিক', 'নৃত্য']",নৃত্য,Arts & History,F3CH7834,answer: 4
1,CHITTRON_3642.png,বাংলার মর্মান্তিক ইতিহাস বহনকারী এই স্তম্ভটির ...,"['স্মৃতিসৌধ', 'শহীদ মিনার', 'রাজু ভাস্কর্য', '...",শহীদ মিনার,Arts & History,P9CH3642,answer: 2
2,BORNON_215.jpg,ছবিতে দেখা মুদ্রাটি কোন দেশের?,"['বাংলাদেশ', 'শ্রীলঙ্কা', 'ভারত', 'পাকিস্তান']",বাংলাদেশ,Arts & History,P5BO215,answer: 1
3,BNATURE_3915.jpg,ছবিতে মেয়েটি মাইক্রোফোনটি হাতে ধরে কী করছে?,"['গান গাচ্ছে', 'নাচছে', 'কথা বলছে', 'পড়াশোনা ...",গান গাচ্ছে,Arts & History,P5BN3915,answer: 1
4,CHITTRON_496.png,প্রাচীনকালে বাংলায় এমন বড় আকৃতির মাটির পাত্র ক...,"['ধান-চাল সংরক্ষনে', 'পানি সংরক্ষনে', 'কাপড় সং...",ধান-চাল সংরক্ষনে,Arts & History,P1CH496,answer: 2


In [5]:
api_keys = ['AIzaSyA5NgHvV2JHIBHDDtOOR226-vs_ul4Pe_E',
            'AIzaSyArfIpnxMnbLa0N__Pm0hFcDPITt-1WBJA', #77
            'AIzaSyBdD0SDqq6TwnYaE4navII0xutePGocwRQ', #surs
            'AIzaSyClKaxHdR1g64yllo2oIL45bCfUAlyny00', #fabiha
            'AIzaSyBu-S7ovwJBmeFJWfQZL00VS4fkj5DJiSY', #deep1
            'AIzaSyCjbNGV9Wii2j6HB_QibGdW-0DeNx-8m0U', #deep2
            'AIzaSyAkwbMZZ-rxt6abHyQ9pt3yMmVrFipu04U', #deep3
            'AIzaSyCs51WCsYxqc0BoThbM2syyyQ2-lasMsdQ', #deep4
            'AIzaSyDC4_XuDnK5r4DETKAqPnG6gyhGkq0Oo44', #deep5
            ]

In [6]:
genai.configure(api_key=GEMINI_API)

In [7]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experimental


In [8]:
model = genai.GenerativeModel('models/gemini-2.0-flash')

In [9]:
def prompt_prep(question:str, options:list):
    return """

You are an expert multimodal AI assistant. You will be given an image as context. Based on this image, you will be asked a multiple-choice question with four options.  
Your task is to select the most accurate answer from the given options.  

### **Instructions:**  
- The question and options will be presented in the following format:  
    - **question:** QUESTION  
    - **options:**  
        1. option_a  
        2. option_b  
        3. option_c  
        4. option_d  

- Your response should **only contain the number** corresponding to the correct answer.  [IMPORTANT]
  - Example:  
    - If **option 1 is correct**, respond with `"answer: 1"`  
    - If **option 3 is correct**, respond with `"answer: 3"`  

Now, answer the following question based on the provided image:  

**question:** {}  
**options:**  
1. {}  
2. {}  
3. {}  
4. {}  

    """.format(question, options[0], options[1],options[2], options[3])

In [12]:
column_name = 'gemini_generated'
test_df.loc[:,column_name] = None

In [16]:
slicedf = test_df.iloc[0:53]
slicedf.tail(2)

Unnamed: 0,filename,question,options,answer,category,id,openai_response,gemini_generated
51,BNATURE_6715.jpg,নৌকা টি কোন ধরনের?,"['ডিঙ্গি', 'বাজরা', 'সাম্পান', 'জাহাজ']",ডিঙ্গি,Arts & History,P7BN6715,answer: 3,
52,CHITTRON_3677.png,বাংলাদেশের প্রাচীন এই লোকসংগীতটির নাম কি?,"['হাসন রাজার গান', 'ভাটিয়ালী', 'জারি গান', 'ব...",বাউল গান,Arts & History,P9CH3677,answer: 4,


In [10]:
# def generate_gemini(question: str, image: str, options:list):
   
    
#     image = Image.open(f"{image_path}/{image}")
    
#     genai.configure(api_key=api_keys[random.randint(0,8)])
#     model = genai.GenerativeModel('models/gemini-1.5-pro-latest')    
#     response = model.generate_content([prompt_prep(question, options), image], stream=True)
#     response.resolve()
#     time.sleep(1)
    
#     return response.text

In [18]:
for idx, row in (slicedf.iterrows()):
        
    image = row['filename']
    
    image = Image.open(f"{image_path}/{image}")
    
    question = row["question"]
    options = ast.literal_eval(row["options"])

    
    genai.configure(api_key=api_keys[(idx)%9])
    model = genai.GenerativeModel('models/gemini-1.5-pro-latest')    
    response = model.generate_content([prompt_prep(question, question), image], stream=True)
    response.resolve()

    test_df.loc[test_df['filename'] == str(row['filename']), column_name] = response.text
    
    print(f"done: {row['filename']} and idx: ",idx," answer: ", str(response.text))
    
    time.sleep(1)

done: CHITTRON_7834.png and idx:  0  filename:  answer: 1

done: CHITTRON_3642.png and idx:  1  filename:  answer: 3
done: BORNON_215.jpg and idx:  2  filename:  answer: 2
done: BNATURE_3915.jpg and idx:  3  filename:  answer: 2

done: CHITTRON_496.png and idx:  4  filename:  answer: 4
done: CHITTRON_4688.png and idx:  5  filename:  answer: 2
done: BORNON_3396.jpg and idx:  6  filename:  answer: 1

done: BORNON_324.jpg and idx:  7  filename:  answer: 2
done: BORNON_3694.jpg and idx:  8  filename:  answer: 2
done: CHITTRON_4894.png and idx:  9  filename:  answer: 2

done: BNATURE_1109.jpg and idx:  10  filename:  answer: 1
done: BNATURE_1109.jpg and idx:  11  filename:  answer: 2

done: CHITTRON_4708.png and idx:  12  filename:  answer: 2
done: BORNON_42.jpg and idx:  13  filename:  answer: 3
done: BNATURE_7972.jpg and idx:  14  filename:  answer: 2

done: CHITTRON_4803.png and idx:  15  filename:  answer: 3

done: CHITTRON_151.png and idx:  16  filename:  answer: 2
done: BNATURE_3341.j

In [43]:
# def generate_answer(item):
#     image = item['filename']
#     question = item["question"]
#     options = ast.literal_eval(item["options"])
#     response = generate_gemini(question, image, options)
#     print(1)
    
#     return response

In [44]:
test_df["gemini_response"] = test_df.apply(generate_answer, axis=1)

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).

In [36]:
test_df.isna().sum()

filename           0
question           0
options            0
answer             0
category           0
id                 0
openai_response    0
dtype: int64