In [19]:
import json
with open("Oracle Database_single_choice_data_lama3_gemma.json", "r", encoding="utf-8") as f:
    data = json.load(f)

def filter_unique_ids(data_list):
    unique_ids = set()
    filtered_list = []
    
    for item in data_list:
        if item['id'] not in unique_ids:
            unique_ids.add(item['id'])
            filtered_list.append(item)
    
    return filtered_list
# 确保id唯一
filtered_data = filter_unique_ids(data)
print(f"old data: {len(data)}")
print(f"filtered data: {len(filtered_data)}")

old data: 184
filtered data: 184


In [20]:
import json
import re

def evaluate_answers(data_list):
    filtered_data = []
    correct_counts = {'llama3_8b': 0, 'gemma_7b': 0}
    incorrect_counts = {'llama3_8b': 0, 'gemma_7b': 0}

    # 定义正则表达式匹配第一个大写字母（范围A-G）
    pattern = re.compile(r'[A-G]')
    total_data = []

    for item in data_list:
        # 匹配llama3-8b-answer中的第一个大写字母
        llama3_8b_match = pattern.search(item.get('llama3-8b-answer', ''))
        llama3_8b_first_letter = llama3_8b_match.group() if llama3_8b_match else None
        
        # 匹配gemma-7b-answer中的第一个大写字母
        gemma_7b_match = pattern.search(item.get('gemma-7b-answer', ''))
        gemma_7b_first_letter = gemma_7b_match.group() if gemma_7b_match else None
        
        # 获取正确答案
        correct_answer = item.get('answer', '')
        
        # 判断答案是否正确
        item['llama3-8b-iscorrect'] = (llama3_8b_first_letter == correct_answer)
        item['gemma-7b-iscorrect'] = (gemma_7b_first_letter == correct_answer)

        # 统计正确和错误的数量
        if item['llama3-8b-iscorrect']:
            correct_counts['llama3_8b'] += 1
        else:
            incorrect_counts['llama3_8b'] += 1

        if item['gemma-7b-iscorrect']:
            correct_counts['gemma_7b'] += 1
        else:
            incorrect_counts['gemma_7b'] += 1
        
        # 检查一方为True另一方为False的条件
        if item['llama3-8b-iscorrect'] != item['gemma-7b-iscorrect']:
            filtered_data.append(item)
        total_data.append(item)
    
    # 计算比例
    llama3_8b_total = correct_counts['llama3_8b'] + incorrect_counts['llama3_8b']
    gemma_7b_total = correct_counts['gemma_7b'] + incorrect_counts['gemma_7b']

    llama3_8b_correct_ratio = correct_counts['llama3_8b'] / llama3_8b_total if llama3_8b_total > 0 else 0
    gemma_7b_correct_ratio = correct_counts['gemma_7b'] / gemma_7b_total if gemma_7b_total > 0 else 0

    llama3_8b_incorrect_ratio = incorrect_counts['llama3_8b'] / llama3_8b_total if llama3_8b_total > 0 else 0
    gemma_7b_incorrect_ratio = incorrect_counts['gemma_7b'] / gemma_7b_total if gemma_7b_total > 0 else 0

    # 打印统计结果
    print(f"正确个数: {correct_counts}, 错误个数: {incorrect_counts}")
    print(f"llama3_8b 正确比例: {llama3_8b_correct_ratio:.2f}, 错误比例: {llama3_8b_incorrect_ratio:.2f}")
    print(f"gemma_7b 正确比例: {gemma_7b_correct_ratio:.2f}, 错误比例: {gemma_7b_incorrect_ratio:.2f}")
    
    return filtered_data, total_data

# 示例数据
data = [
    {
        "id": "Wired Network-152",
        "type": 0,
        "question": "Your boss has asked you to maximize the RAM in their laptop computer. You check the specifications and discover that the maximum system RAM is 32 GB of DDR4 RAM. The laptop currently has 16 GB onboard RAM and one empty slot. Which of the following RAM modules will you purchase?\n\nA. One 16 GB SODIMM B. One 32 GB SODIMM C. One 16 GB DIMM D. One 32 GB DIMM ",
        "choices": [
            "One 16 GB SODIMM",
            "One 32 GB SODIMM",
            "One 16 GB DIMM",
            "One 32 GB DIMM"
        ],
        "answer": "A",
        "solution": "A: One 16 GB SODIMM - This is a correct choice. SODIMM is the type of RAM used in laptops and 16 GB will bring the total RAM to the maximum of 32 GB.\n\nB: One 32 GB SODIMM - This is not a correct choice. Although SODIMM is the correct type of RAM for a laptop, adding a 32 GB module would exceed the system's maximum RAM capacity.\n\nC: One 16 GB DIMM - This is not a correct choice. DIMM is a type of RAM used in desktop computers, not laptops.\n\nD: One 32 GB DIMM - This is not a correct choice. Not only is DIMM the wrong type of RAM for a laptop, but a 32 GB module would also exceed the system's maximum RAM capacity.",
        "topic": [
            "Mobile Devices"
        ],
        "original_solution": "You will always check the device's documentation before purchasing components to ensure compatibility. In a laptop, RAM is often soldered directly onto the motherboard (onboard) and isn't meant to be replaced by most users. Since the maximum system RAM is 32 GB, you would need one 16 GB RAM module to bring it up to that total. The SO in SODIMM stands for Small Outline and is the type of RAM module used in devices like laptops and sometimes printers. DIMMs (Dual Inline Memory Modules) are used in desktop computers",
        "llama3-8b-answer": "B. One 16 GB SODIMM",
        "gemma-7b-answer": "A"
    }
]

# evaluation_data = evaluate_answers(data)
evaluation_data, total_data = evaluate_answers(filtered_data)

llama3_is_correct = 0
gemma_is_correct = 0
for item in evaluation_data:
    if item['llama3-8b-iscorrect']:
        llama3_is_correct += 1
    else:
        gemma_is_correct += 1

print(f"一方对，一方错的数据个数{len(evaluation_data)}, 其中前者对的个数为{llama3_is_correct}, 后者对的个数为{gemma_is_correct}")
print(f"total_data数据个数{len(total_data)}")

# 将过滤后的数据保存到一个新的JSON文件中
with open('5G_train_data.json', 'w') as json_file:
    json.dump(evaluation_data, json_file, indent=4)

# 将过滤后的数据保存到一个新的JSON文件中
with open('5G_total_data.json', 'w') as json_file:
    json.dump(total_data, json_file, indent=4)

正确个数: {'llama3_8b': 82, 'gemma_7b': 83}, 错误个数: {'llama3_8b': 102, 'gemma_7b': 101}
llama3_8b 正确比例: 0.45, 错误比例: 0.55
gemma_7b 正确比例: 0.45, 错误比例: 0.55
一方对，一方错的数据个数61, 其中前者对的个数为30, 后者对的个数为31
total_data数据个数184
