In [1]:
import dotenv
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import json
import pandas as pd

# Load environment variables from .env file
dotenv.load_dotenv()
api_key = os.getenv("API_KEY")

In [2]:
from google import genai

client = genai.Client(api_key = api_key)

In [4]:
images_data = pd.read_csv("Dataset/metadata/image_data.csv")
images_data.head()

Unnamed: 0,image_id,image_path,listing
0,81iZlv3bjpL,Dataset/final_dataset/8ccb5859.jpg,"{""brand"": [{""language_tag"": ""nl_NL"", ""value"": ..."
1,619y9YG9cnL,Dataset/final_dataset/9f76d27b.jpg,"{""item_dimensions"": {""height"": {""normalized_va..."
2,81NP7qh2L6L,Dataset/final_dataset/665cc994.jpg,"{""item_dimensions"": {""height"": {""normalized_va..."
3,61Rp4qOih9L,Dataset/final_dataset/b4f9d0cc.jpg,"{""brand"": [{""language_tag"": ""en_GB"", ""value"": ..."
4,714CmIfKIYL,Dataset/final_dataset/2b1c2516.jpg,"{""brand"": [{""language_tag"": ""en_AU"", ""value"": ..."


In [6]:
def test_response(image_path,prompt):
    response = client.models.generate_content(
    model="gemini-2.0-flash",
    contents=[image_path, prompt],
    )

    return response.text

In [7]:
def chat_response(image_path, user_prompt, system_prompt):
    # Read the image as binary
    with open(image_path, "rb") as img_file:
        image_data = img_file.read()

    # Define the content with system and user messages
    contents = [
        # {
        #     "role": "system",
        #     "parts": [
        #         {"text": system_prompt}
        #     ]
        # },
        {
            "role": "user",
            "parts": [
                {"inline_data": {"mime_type": "image/jpeg", "data": image_data}},
                {"text": system_prompt+user_prompt}
            ]
        }
    ]

    # Initialize the model
    model = "gemini-2.0-flash"

    # Generate the response
    response = client.models.generate_content(model=model, contents=contents)

    return response.text

In [8]:
def get_keywords(data):
    vqa_data = {}
    vqa_data["item_keywords"] = [kw.get("value", "") for kw in data.get("item_keywords", []) if kw.get("language_tag", "").startswith('en')]
    temp = vqa_data["item_keywords"].copy()
    keywords = ['color']
    for i in range(min(5, len(temp))):
        keywords.append(temp[i])
    return keywords

In [9]:
for i in range(3):
    image = images_data.iloc[i]['image_path']
    image_id = images_data.iloc[i]['image_id']
    listing = images_data.iloc[i]['listing']
    listing = json.loads(listing)
    keywords = get_keywords(listing)
    print(keywords)

['color']
['color']
['color', '3d printer filament', 'petg printer filament', 'petg filament', '1.75mm printer filament', '1kg spool printer filament']


In [10]:
# Define the file path
csv_file_path = "Dataset/metadata/image_data_with_vqa.csv"

# Check if the file exists
if not os.path.exists(csv_file_path):
    # If the file doesn't exist, create a new DataFrame with random values
    l = len(images_data)
    data = {
        "image_id": [f"" for _ in range(l)],  # Example random IDs
        "image_path": [f"" for _ in range(l)],  # Example random paths
        "vqa_responses": ["" for _ in range(l)]  # Random responses
    }
    df = pd.DataFrame(data)
    
    # Save the DataFrame to a CSV file
    os.makedirs(os.path.dirname(csv_file_path), exist_ok=True)  # Ensure the directory exists
    df.to_csv(csv_file_path, index=False)
    print(f"File created: {csv_file_path}")
else:
    print(f"File already exists: {csv_file_path}")

File created: Dataset/metadata/image_data_with_vqa.csv


In [11]:
system_prompt = """
You are a Visual Question Answering (VQA) dataset generator.
Given an image and a list of metadata strings, generate diverse, high-quality question-answer pairs that cover visual recognition, attributes, relationships, metadata, and reasoning.
Output each pair as list.
"""
vqa_responses = []
for i in range(1):
    image = images_data.iloc[i]['image_path']
    image_id = images_data.iloc[i]['image_id']
    listing = images_data.iloc[i]['listing']
    listing = json.loads(listing)
    keywords = get_keywords(listing)
    user_prompt = f"""
    Image ID: {image_id}
    Metadata: {keywords}
    Generate 2-3 question-answer pairs.
    """

    response = chat_response(image, user_prompt, system_prompt)
    print(response)
    
# Save the responses to a CSV file
# images_data['vqa_response'] = vqa_responses
# images_data.to_csv("Dataset/metadata/image_data_with_vqa.csv", index=False)
    

Here are some question-answer pairs based on the image and metadata:

```json
[
  ["What is the color of the shoe in the image?", "Pink and brown"],
  ["Is the shoe patterned in color?", "Yes"]
]
```
