In [41]:
import json
import os
import re
import argparse
from tqdm import tqdm 
import pandas as pd


TASKS = [
    "Reasoning",
    "Perception",
]

SUBTASKS = [
    # "Monitoring",
    # "OCR with Complex Context",
    # "Diagram and Table",
    # "Autonomous_Driving",
    'Remote Sensing'
]


def extract_characters_regex(s, choices):
    s = s.strip()
    answer_prefixes = [
        "The best answer is",
        "The correct answer is",
        "The answer is",
        "The answer",
        "The best option is"
        "The correct option is",
        "Best answer:"
        "Best option:",
        "Answer",
        # "Option"
    ]
    for answer_prefix in answer_prefixes:
        # s = s.replace(answer_prefix, "")
        if answer_prefix in s:
            s = s.split(answer_prefix)[-1]
            break
        elif answer_prefix.lower() in s:
            s = s.split(answer_prefix.lower())[-1]
            break

    if len(s.split()) > 10 and not re.search("[ABCDE]", s):
        return ""
    matches = re.search(r'[ABCDE]', s)
    if matches is None:
        for choice in choices:
            if s.lower() in choice.lower():
                return choice[1]
        return ""
    return matches[0]

In [42]:
def extract_single_file(file_path):
    # print(file_path)

    # Check the file extension and process accordingly
    if file_path.endswith(".jsonl"):
        # For JSONL files (line-delimited JSON)
        data = [json.loads(line) for line in open(file_path, "r")]
    elif file_path.endswith(".json"):
        # For standard JSON files
        with open(file_path, "r") as f:
            data = json.load(f)  # Load the entire JSON file as a single object
    else:
        raise ValueError(f"Unsupported file format: {file_path}")
    file_path = file_path.split("/")[-1]
    file_dict = dict()
    name_list = file_path[:-6].split("_")

    file_dict["patch_method"] = name_list[3]
    file_dict["encoder_name"] = name_list[4]
    file_dict["path_T"] = name_list[5]
    file_dict["lrsd_T"] = name_list[6]
    file_dict["crsd_T"] = name_list[7]
    file_dict["reduce_fn"] = name_list[8]

    cnt = 0

    results = {}
    for task in TASKS:
        results[f'{task}'] = {}
        for subtask in SUBTASKS:
            results[f'{task}'][f'{subtask}'] = {}

    index = 1
    for question in data:
        Task = question['Task']
        Subtask = question['Subtask']
        Category = question['Category'].lower()
        question_id = question["Question_id"]
        ground_truth = question["Ground truth"]
        text = question["output"]

        if 'attribute' in Category.lower():
            Category = Category.split('/')[0] + '/attribute'

        text = extract_characters_regex(text, question['Answer choices'])
        # 检查 Ground Truth 和 text 是否相同
        # print(index, ground_truth, text)

        if isinstance(ground_truth, str):
            cnt = ground_truth == text
        elif isinstance(ground_truth, list):
            cnt = True if text in ground_truth else False

        if Category not in results[Task][Subtask].keys():
            results[Task][Subtask][f'{Category}'] = {'true': cnt, 'false': 1 - cnt, 'is_E': text == 'E'}
        else:
            results[Task][Subtask][f'{Category}']['true'] += cnt
            results[Task][Subtask][f'{Category}']['false'] += 1 - cnt
            results[Task][Subtask][f'{Category}']['is_E'] += text == 'E'

        index += 1

    result_dict = dict()
    sum_all, succ_all = 0, 0
    for task, tasks_values in results.items():
        # print(f'*' * 32 + f'{task} (Task Start)')
        cnt_task, cnt_E, sum_task = 0, 0, 0
        for substask, subtask_value in tasks_values.items():
            # print(f'+' * 16 + f'{substask} (Subtask Start)')
            cnt_subtask, sum_subtask, e_subtask = 0, 0, 0
            for category, category_dict in subtask_value.items():
                cnt_subtask += category_dict['true']
                sum_subtask += category_dict['false'] + category_dict['true']
                e_subtask += category_dict['is_E']
                acc = category_dict['true'] / (category_dict['false'] + category_dict['true'])
                # print(f'-' * 4 + f'\t' + 'Acc ' + '{:.4f}'.format(acc) + f"\t{category.capitalize()} ({category_dict['false'] + category_dict['true']} items)")
                result_dict[category.capitalize()] = [acc, category_dict['false'] + category_dict['true']]
            if sum_subtask == 0:
                acc_subtasks = 0
                e_subtask = 0
            else:
                acc_subtasks = cnt_subtask / sum_subtask
            # print(f'+' * 16 + f'\t Acc ' + '{:.4f}'.format(acc_subtasks) + f'\t E choice {e_subtask} \t{substask} ({sum_subtask} items)')
            cnt_task += cnt_subtask
            sum_task += sum_subtask
            cnt_E += e_subtask

        if sum_task == 0:
            acc_task = 0
        else:
            acc_task = cnt_task / sum_task
        succ_all += cnt_task
        sum_all += sum_task
        
        # print(f'*' * 32 + f'Acc ' + '{:.4f}'.format(acc_task) + f'\t E choice {cnt_E} \t{task} ({sum_task} items)\n')
        result_dict[task] = [float(acc_task), float(sum_task)]
        # print(result_dict[task])

    # print(f'*' * 32 + f'Overall Acc ' + '{:.4f}'.format(succ_all / sum_all))
    file_dict["result"] = result_dict
    # print(len(result_dict))
    return file_path, file_dict

In [64]:
# json_dir = os.getcwd()
# json_dir = "/training/zilun/ImageRAG_final/data/imagerag_result/old_mmerealworldlite300_5200_all"
json_dir = "/training/zilun/ImageRAG_final/data/imagerag_result/old_mmerealworldlite450_5200_all"
# json_dir = "/training/zilun/ImageRAG_final/data/imagerag_result/old_mmerealworldlite510_5196_all"
all_files = os.listdir(json_dir)
all_files = [file for file in all_files if file.endswith(".jsonl")]

In [65]:
len(all_files)

648

In [68]:
file_dict_list = []
for filename in all_files:
    # print(filename)
    filepath = os.path.join(json_dir, filename)
    # {'patch_method': 'cc', 'encoder_name': 'georsclip', 'path_T': '0.3', 'lrsd_T': '0.1', 'crsd_T': '0.7', 'result': {'Position': [0.54, 50], 'Color': [0.52, 50], 'Count': [0.28, 50]}}
    try:
        file_name, file_dict = extract_single_file(filepath)
        # print(file_dict)
        file_dict["Position"] = file_dict["result"]['Position'][0]
        file_dict["Color"] = file_dict["result"]['Color'][0]
        file_dict["Count"] = file_dict["result"]['Count'][0]

        file_dict["Perception"] = file_dict["result"]['Perception'][0]
        # if file_dict["result"]['Perception'][1] != 3738:
        #     continue
        if file_dict["result"]['Perception'][1] != 450:
            continue
        del file_dict["result"]
        file_dict["name"] = file_name
        file_dict["reduce_fn"] = file_dict["reduce_fn"]
        file_dict_list.append(file_dict)
    except:
        continue
    # print(file_dict)

In [69]:
file_dict_list

[{'patch_method': 'cc',
  'encoder_name': 'clip',
  'path_T': '0.3',
  'lrsd_T': '0.2',
  'crsd_T': '0.5',
  'reduce_fn': 'mean',
  'Position': 0.52,
  'Color': 0.6066666666666667,
  'Count': 0.29333333333333333,
  'Perception': 0.47333333333333333,
  'name': 'mmerealworld_zoom4kvqa10k_imagerag_cc_clip_0.3_0.2_0.5_mean.jsonl'},
 {'patch_method': 'cc',
  'encoder_name': 'georsclip',
  'path_T': '0.7',
  'lrsd_T': '0.2',
  'crsd_T': '0.5',
  'reduce_fn': 'rerank',
  'Position': 0.5133333333333333,
  'Color': 0.62,
  'Count': 0.29333333333333333,
  'Perception': 0.47555555555555556,
  'name': 'mmerealworld_zoom4kvqa10k_imagerag_cc_georsclip_0.7_0.2_0.5_rerank.jsonl'},
 {'patch_method': 'cc',
  'encoder_name': 'mcipclip',
  'path_T': '0.3',
  'lrsd_T': '0.1',
  'crsd_T': '0.1',
  'reduce_fn': 'mean',
  'Position': 0.54,
  'Color': 0.6133333333333333,
  'Count': 0.32,
  'Perception': 0.4911111111111111,
  'name': 'mmerealworld_zoom4kvqa10k_imagerag_cc_mcipclip_0.3_0.1_0.1_mean.jsonl'},
 {'p

In [70]:
# 将字典转换为 DataFrame
df = pd.DataFrame(file_dict_list)

# 设置 'name' 列为索引
df.set_index('name', inplace=True)
df['path_T'] = df['path_T'].astype(float)
df['lrsd_T'] = df['lrsd_T'].astype(float)
df['crsd_T'] = df['crsd_T'].astype(float)

In [71]:
df

Unnamed: 0_level_0,patch_method,encoder_name,path_T,lrsd_T,crsd_T,reduce_fn,Position,Color,Count,Perception
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
mmerealworld_zoom4kvqa10k_imagerag_cc_clip_0.3_0.2_0.5_mean.jsonl,cc,clip,0.3,0.2,0.5,mean,0.520000,0.606667,0.293333,0.473333
mmerealworld_zoom4kvqa10k_imagerag_cc_georsclip_0.7_0.2_0.5_rerank.jsonl,cc,georsclip,0.7,0.2,0.5,rerank,0.513333,0.620000,0.293333,0.475556
mmerealworld_zoom4kvqa10k_imagerag_cc_mcipclip_0.3_0.1_0.1_mean.jsonl,cc,mcipclip,0.3,0.1,0.1,mean,0.540000,0.613333,0.320000,0.491111
mmerealworld_zoom4kvqa10k_imagerag_cc_mcipclip_0.3_0.1_0.3_rerank.jsonl,cc,mcipclip,0.3,0.1,0.3,rerank,0.520000,0.606667,0.320000,0.482222
mmerealworld_zoom4kvqa10k_imagerag_cc_mcipclip_0.3_0.1_0.5_mean.jsonl,cc,mcipclip,0.3,0.1,0.5,mean,0.506667,0.606667,0.333333,0.482222
...,...,...,...,...,...,...,...,...,...,...
mmerealworld_zoom4kvqa10k_imagerag_grid_remoteclip_0.7_0.2_0.3_rerank.jsonl,grid,remoteclip,0.7,0.2,0.3,rerank,0.533333,0.533333,0.286667,0.451111
mmerealworld_zoom4kvqa10k_imagerag_vit_clip_0.3_0.1_0.3_rerank.jsonl,vit,clip,0.3,0.1,0.3,rerank,0.520000,0.613333,0.293333,0.475556
mmerealworld_zoom4kvqa10k_imagerag_vit_clip_0.3_0.2_0.1_rerank.jsonl,vit,clip,0.3,0.2,0.1,rerank,0.506667,0.600000,0.293333,0.466667
mmerealworld_zoom4kvqa10k_imagerag_vit_clip_0.3_0.3_0.3_rerank.jsonl,vit,clip,0.3,0.3,0.3,rerank,0.520000,0.606667,0.286667,0.471111


In [72]:
max_perception = df["Perception"].max()

# 获取对应的最大值的行
max_row = df[df["Perception"] == max_perception]

In [73]:
max_row

Unnamed: 0_level_0,patch_method,encoder_name,path_T,lrsd_T,crsd_T,reduce_fn,Position,Color,Count,Perception
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
mmerealworld_zoom4kvqa10k_imagerag_cc_mcipclip_0.5_0.1_0.1_mean.jsonl,cc,mcipclip,0.5,0.1,0.1,mean,0.56,0.626667,0.373333,0.52


In [74]:
df_sorted = df.sort_values(by="Perception", ascending=False)
df_sorted.head(20)

Unnamed: 0_level_0,patch_method,encoder_name,path_T,lrsd_T,crsd_T,reduce_fn,Position,Color,Count,Perception
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
mmerealworld_zoom4kvqa10k_imagerag_cc_mcipclip_0.5_0.1_0.1_mean.jsonl,cc,mcipclip,0.5,0.1,0.1,mean,0.56,0.626667,0.373333,0.52
mmerealworld_zoom4kvqa10k_imagerag_cc_clip_0.7_0.1_0.3_mean.jsonl,cc,clip,0.7,0.1,0.3,mean,0.553333,0.62,0.373333,0.515556
mmerealworld_zoom4kvqa10k_imagerag_cc_clip_0.7_0.3_0.1_mean.jsonl,cc,clip,0.7,0.3,0.1,mean,0.546667,0.633333,0.36,0.513333
mmerealworld_zoom4kvqa10k_imagerag_cc_mcipclip_0.5_0.3_0.3_mean.jsonl,cc,mcipclip,0.5,0.3,0.3,mean,0.586667,0.633333,0.32,0.513333
mmerealworld_zoom4kvqa10k_imagerag_grid_georsclip_0.3_0.1_0.5_mean.jsonl,grid,georsclip,0.3,0.1,0.5,mean,0.553333,0.64,0.346667,0.513333
mmerealworld_zoom4kvqa10k_imagerag_grid_mcipclip_0.3_0.1_0.3_rerank.jsonl,grid,mcipclip,0.3,0.1,0.3,rerank,0.553333,0.633333,0.346667,0.511111
mmerealworld_zoom4kvqa10k_imagerag_grid_clip_0.3_0.3_0.3_rerank.jsonl,grid,clip,0.3,0.3,0.3,rerank,0.546667,0.62,0.36,0.508889
mmerealworld_zoom4kvqa10k_imagerag_cc_remoteclip_0.5_0.3_0.5_rerank.jsonl,cc,remoteclip,0.5,0.3,0.5,rerank,0.586667,0.586667,0.353333,0.508889
mmerealworld_zoom4kvqa10k_imagerag_cc_mcipclip_0.5_0.3_0.3_rerank.jsonl,cc,mcipclip,0.5,0.3,0.3,rerank,0.566667,0.626667,0.333333,0.508889
mmerealworld_zoom4kvqa10k_imagerag_grid_clip_0.3_0.3_0.1_mean.jsonl,grid,clip,0.3,0.3,0.1,mean,0.553333,0.62,0.353333,0.508889


In [16]:
# grouped = df.groupby(['encoder_name', 'reduce_fn', 'patch_method'])[['Position', 'Color', 'Count', 'Perception']].mean()
select_df = df[(df['path_T'] == 0.5) & (df['lrsd_T'] == 0.3) & (df['crsd_T'] == 0.5)]
grouped = df.groupby(['encoder_name', 'reduce_fn', 'patch_method'])[['Position', 'Color', 'Count', 'Perception']].max()

# 打印分组统计结果
print(grouped.shape)
grouped

(12, 4)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Position,Color,Count,Perception
encoder_name,reduce_fn,patch_method,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
clip,mean,cc,0.56,0.64,0.346667,0.497778
clip,mean,grid,0.586667,0.666667,0.353333,0.508889
clip,mean,vit,0.526667,0.646667,0.34,0.493333
clip,rerank,cc,0.56,0.653333,0.333333,0.497778
clip,rerank,grid,0.586667,0.646667,0.36,0.508889
clip,rerank,vit,0.526667,0.646667,0.326667,0.497778
mcipclip,mean,cc,0.586667,0.64,0.373333,0.52
mcipclip,mean,grid,0.553333,0.626667,0.353333,0.504444
mcipclip,mean,vit,0.553333,0.646667,0.34,0.495556
mcipclip,rerank,cc,0.566667,0.633333,0.373333,0.504444


In [17]:
grouped_all = df.groupby(['path_T','lrsd_T','crsd_T'])[['Position', 'Color', 'Count', 'Perception']].mean()

# 打印分组统计结果
df

Unnamed: 0_level_0,patch_method,encoder_name,path_T,lrsd_T,crsd_T,reduce_fn,Position,Color,Count,Perception
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
mmerealworld_zoom4kvqa10k_imagerag_grid_mcipclip_0.3_0.1_0.1_rerank.jsonl,grid,mcipclip,0.3,0.1,0.1,rerank,0.533333,0.633333,0.266667,0.477778
mmerealworld_zoom4kvqa10k_imagerag_cc_clip_0.3_0.1_0.1_rerank.jsonl,cc,clip,0.3,0.1,0.1,rerank,0.513333,0.633333,0.320000,0.488889
mmerealworld_zoom4kvqa10k_imagerag_vit_clip_0.3_0.1_0.1_rerank.jsonl,vit,clip,0.3,0.1,0.1,rerank,0.513333,0.586667,0.306667,0.468889
mmerealworld_zoom4kvqa10k_imagerag_vit_mcipclip_0.3_0.1_0.3_mean.jsonl,vit,mcipclip,0.3,0.1,0.3,mean,0.520000,0.626667,0.340000,0.495556
mmerealworld_zoom4kvqa10k_imagerag_cc_clip_0.3_0.1_0.5_rerank.jsonl,cc,clip,0.3,0.1,0.5,rerank,0.493333,0.606667,0.300000,0.466667
...,...,...,...,...,...,...,...,...,...,...
mmerealworld_zoom4kvqa10k_imagerag_grid_clip_0.5_0.2_0.5_mean.jsonl,grid,clip,0.5,0.2,0.5,mean,0.566667,0.600000,0.300000,0.488889
mmerealworld_zoom4kvqa10k_imagerag_vit_clip_0.5_0.1_0.5_mean.jsonl,vit,clip,0.5,0.1,0.5,mean,0.526667,0.640000,0.306667,0.491111
mmerealworld_zoom4kvqa10k_imagerag_vit_clip_0.5_0.2_0.1_mean.jsonl,vit,clip,0.5,0.2,0.1,mean,0.513333,0.646667,0.286667,0.482222
mmerealworld_zoom4kvqa10k_imagerag_grid_clip_0.7_0.1_0.3_rerank.jsonl,grid,clip,0.7,0.1,0.3,rerank,0.533333,0.593333,0.320000,0.482222
