In [1]:
import os
import json
import random

In [2]:
with open(f'/kaggle/input/vqa-labeled/image2id.json', 'r') as f:
    image2id = json.load(f)
with open(f'/kaggle/input/vqa-labeled/id2image.json', 'r') as f:
    id2image = json.load(f)
with open(f'/kaggle/input/vqa-labeled/error_image.json', 'r') as f:
    error_image = json.load(f)
with open(f'/kaggle/input/vqa-labeled/annotations.json', 'r') as f:
    combined_annotations = json.load(f)

In [3]:
count = {
    "yes": 0,
    "no": 0
}
def get_opposite_position(position):
    opposite = {
        "above": "below",
        "below": "above",
        "behind": "front",
        "front": "behind",
        "left": "right",
        "right": "left"
    }
    return opposite.get(position, position)

def get_random_color(exclude_color, color_list):
    available_colors = [c for c in color_list if c != exclude_color]
    return random.choice(available_colors) if available_colors else exclude_color
    
def modify_yes_no_questions(annotations, position_list, color_list):
    modified_annotations = []
    for qa_pair in annotations:
        question = qa_pair["question"]
        answer = qa_pair["answer"]
        
        if answer.lower() == "yes":
            words = question.split()
            modified = False
            
            for i, word in enumerate(words):
                if word in position_list:
                    words[i] = get_opposite_position(word)
                    modified = True
                    break  # Modify only one term per question
                elif word in color_list:
                    words[i] = get_random_color(word, color_list)
                    modified = True
                    break
            
            if modified:
                if random.random() < 0.5:
                    count["no"] += 1
                    modified_annotation = " ".join(words)
                    modified_annotations.append({"question": modified_annotation, "answer": "No"})
                else:
                    count["yes"] += 1
            else:
                modified_annotations.append(qa_pair)
        else: 
            modified_annotations.append(qa_pair)
    
    return modified_annotations

In [4]:
total = 0
error = 0
error_2 = 0
error_llm = None
clean_error_llm = []
position = ["above", "below", "behind", "front", "left", "right"]
color  = ['black', 'blue', 'brown', 'green', 'grey', 'orange', 'pink', 'purple', 'red', 'white', 'yellow']
yes_no = ['yes', 'no']
for folder in sorted(os.listdir("/kaggle/input")):
    if folder != "vqa-labeled":
        path = os.path.join("/kaggle/input", folder)
        print(folder)
        with open(f'{path}/annotations.json', 'r') as f:
            annotations = json.load(f)
        for annotate in annotations:
            if len(annotate["annotations"]) > 0:
                total += 1
                filtered_annotations = []
                for pairs in annotate["annotations"]:
                    if pairs['answer'].lower() in position or pairs['answer'].lower() in color or pairs['answer'].lower() in yes_no:
                        filtered_annotations.append(pairs)
                
                modified_annotations = modify_yes_no_questions(filtered_annotations, position, color)
                combined_annotations[annotate["image_id"]]["annotations"].extend(modified_annotations)
            else:
                error += 1
        with open(f'{path}/error_llm.json', 'r') as f:
            error_llm = json.load(f)
        filtered_error = [x for x in error_llm if isinstance(x, int)]
        clean_error_llm.extend(filtered_error)
        error_2 += len(filtered_error)

label-vqa-dataset-raw
label-vqa-dataset-raw-10
label-vqa-dataset-raw-13
label-vqa-dataset-raw-16
label-vqa-dataset-raw-19
label-vqa-dataset-raw-25
label-vqa-dataset-raw-28
label-vqa-dataset-raw-3
label-vqa-dataset-raw-30
label-vqa-dataset-raw-5
label-vqa-dataset-raw-8


In [5]:
count

{'yes': 10806, 'no': 10887}

In [6]:
import pandas as pd

def annotations_to_dataframe(annotation_list, id2image=None, image2id=None):
    """
    Converts a list of annotation dictionaries to a Pandas DataFrame.

    Args:
        annotation_list: A list of dictionaries, where each dictionary has the structure:
            {image_id: int, annotations: list[Dict {"questions": str, "answer": str | int}]}
        id2image: (Optional) A dictionary mapping image IDs to image paths.
        image2id: (Optional) A dictionary mapping image paths to image IDs. (Not directly used in this function, but provided for context)

    Returns:
        pandas.DataFrame: A DataFrame where each row represents a question-answer pair
                         for a specific image. Columns will be:
                         'image_id', 'question', 'answer', and optionally 'image_path' if id2image is provided.
    """

    data = []
    for annotation_item in annotation_list:
        image_id = annotation_item['image_id']
        annotations = annotation_item['annotations']

        for annotation in annotations:
            question = annotation['question']
            answer = annotation['answer']
            data.append({
                'image_id': image_id,
                'question': question,
                'answer': answer
            })

    df = pd.DataFrame(data)

    if id2image:
        df['image_path'] = df['image_id'].astype(str).map(id2image)
    df['answer'] = df['answer'].astype(str)
    return df

df_annotations_with_path = annotations_to_dataframe(combined_annotations, id2image=id2image)
print("\nDataFrame with image_path:")
df_annotations_with_path.to_csv("data.csv", index=False)
df_annotations_with_path


DataFrame with image_path:


Unnamed: 0,image_id,question,answer,image_path
0,0,How many person in image?,1,airplane/14712178869_d91526ac2d_b.jpg
1,0,Are there any person in image?,Yes,airplane/14712178869_d91526ac2d_b.jpg
2,0,What is the name of the object in the image?,person,airplane/14712178869_d91526ac2d_b.jpg
3,0,Are there any bear in image?,No,airplane/14712178869_d91526ac2d_b.jpg
4,0,Is the thumb near the door?,Yes,airplane/14712178869_d91526ac2d_b.jpg
...,...,...,...,...
235418,23010,Is the zebra in the background?,Yes,zebra/zebra-zebra-crossing-animals-black-and-w...
235419,23010,What color are the stripes?,Black,zebra/zebra-zebra-crossing-animals-black-and-w...
235420,23010,What is the other color of the stripes?,White,zebra/zebra-zebra-crossing-animals-black-and-w...
235421,23010,Are there blue stripes?,No,zebra/zebra-zebra-crossing-animals-black-and-w...


In [7]:
!cp /kaggle/input/vqa-labeled/id2image.json /kaggle/working/id2image.json
!cp /kaggle/input/vqa-labeled/id2img.csv /kaggle/working/id2image.csv
!cp /kaggle/input/vqa-labeled/image2id.json /kaggle/working/image2id.json
!cp /kaggle/input/vqa-labeled/img2id.csv /kaggle/working/image2id.csv
!cp /kaggle/input/vqa-labeled/error_image.json /kaggle/working/error_image.json

In [8]:
total

12897

In [9]:
error

7982

In [10]:
error_2

9337

In [11]:
with open(f'error_llm.json', 'w') as f:
    f.write(json.dumps(clean_error_llm))
with open(f'annotations.json', 'w') as f:
    f.write(json.dumps(combined_annotations))

In [12]:
import pandas as pd
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split

# Step 3: Split unique images (Train 70%, Validation 10%, Test 20%)
unique_images = df_annotations_with_path["image_id"].unique()
train_ids, temp_ids = train_test_split(unique_images, test_size=0.3, random_state=42)
val_ids, test_ids = train_test_split(temp_ids, test_size=0.35, random_state=42)

# Step 4: Create train, validation, and test DataFrames
train_df = df_annotations_with_path[df_annotations_with_path["image_id"].isin(train_ids)]
val_df = df_annotations_with_path[df_annotations_with_path["image_id"].isin(val_ids)]
test_df = df_annotations_with_path[df_annotations_with_path["image_id"].isin(test_ids)]

train_df.to_csv("train.csv", index=False)
val_df.to_csv("val.csv", index=False)
test_df.to_csv("test.csv", index=False)