In [1]:
import json
from prettytable import PrettyTable

In [3]:
# Read the jsonl file and convert it to a JSON list
def jsonl_to_json_list(jsonl_file_path):
    json_list = []
    with open(jsonl_file_path, 'r', encoding='utf-8') as file:
        for line in file:
            json_obj = json.loads(line.strip())  # Parse each line as JSON
            json_list.append(json_obj)
    
    return json_list

# Save the JSON list to a file
def save_as_json(json_list, output_file_path):
    with open(output_file_path, 'w', encoding='utf-8') as outfile:
        json.dump(json_list, outfile, indent=4)

def save_as_jsonl(json_list, output_file_path):
    with open(output_file_path, 'w', encoding='utf-8') as outfile:
        for json_obj in json_list:
            json.dump(json_obj, outfile)
            outfile.write('\n')

In [7]:
def load_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def deduplicate_data(data):
    seen = set()
    deduplicated_data = []
    for item in data:
        idx = item['id']
        if idx not in seen:
            deduplicated_data.append(item)
            seen.add(idx)
    return deduplicated_data

def calculate_accuracy(data):
    correct_predictions = 0
    total_predictions = len(data)
    for item in data:
        if item['gold_answer'] == item['pred_answer']:
            correct_predictions += 1
    accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
    return accuracy

models = ['gpt-4o-mini', 'gpt-4o']
tasks = ['sampled_50', 'sampled_50_hard', 'sampled_50_5options']
methods = ['syn_verif']

table = PrettyTable()
table.field_names = ["Model", "Task", "Method", "Accuracy", "Total Number"]

for model in models:
    for task in tasks:
        for method in methods:
            file_path = f'./output/{model}-medqa-{task}-{method}.json'
            data = load_json(file_path)
            deduplicated_data = deduplicate_data(data)
            accuracy = calculate_accuracy(deduplicated_data)
            total = len(deduplicated_data)
            table.add_row([
            model, task, method,
            f"{accuracy * 100:.1f}%",
            total
        ])

print(table)
# End of Selection

+-------------+---------------------+-----------+----------+--------------+
|    Model    |         Task        |   Method  | Accuracy | Total Number |
+-------------+---------------------+-----------+----------+--------------+
| gpt-4o-mini |      sampled_50     | syn_verif |  78.0%   |      50      |
| gpt-4o-mini |   sampled_50_hard   | syn_verif |  50.0%   |      50      |
| gpt-4o-mini | sampled_50_5options | syn_verif |  80.0%   |      50      |
|    gpt-4o   |      sampled_50     | syn_verif |  90.0%   |      50      |
|    gpt-4o   |   sampled_50_hard   | syn_verif |  78.0%   |      50      |
|    gpt-4o   | sampled_50_5options | syn_verif |  88.0%   |      50      |
+-------------+---------------------+-----------+----------+--------------+
