# 提取所有题目信息

In [4]:
import json
import pandas as pd

In [5]:
LANGUAGE_LIST = ["javascript","typescript"]
SUFFIX_MAP = {
    "python": "py",
    "javascript": "js",
    "typescript": "ts",
    "c&cpp": "cpp",
    "java": "java"
}


def generate_python_prompt(code_signature, code_type):
    if code_type == "method":
        return f"please write a python function , the function signature as below {code_signature}"
    elif code_type == "class":
        return f"please write a python class , the class signature as below {code_signature}"


def generate_javascript_prompt(code_signature, code_type):
    if code_type == "method":
        return f"please write a javascript function , the function signature as below {code_signature}"
    elif code_type == "class":
        return f"please write a javascript class , the class signature as below {code_signature}"


def generate_typescript_prompt(code_signature, code_type):
    if code_type == "method":
        return f"please write a typescript function , the function signature as below {code_signature}"
    elif code_type == "class":
        return f"please write a typescript class , the class signature as below {code_signature}"


def generate_ccpp_prompt(code_signature, code_type):
    if code_type == "method":
        return f"please write a cpp function , the function signature as below {code_signature}"
    elif code_type == "class":
        return f"please write a cpp class , the class signature as below {code_signature}"


LANGUAGE_PROMPT_MAP = {
    "python": generate_python_prompt,
    "javascript": generate_javascript_prompt,
    "typescript": generate_typescript_prompt,
    "c&cpp": generate_ccpp_prompt
}

In [6]:
for language in LANGUAGE_LIST:
    # 读取excel表 获取数据信息
    excel_data = pd.read_excel("./xlsx/RealisticEval-Data.xlsx", sheet_name=language)
    data = excel_data[excel_data['check'] == 'yes']
    # 遍历每一行
    question_array = []
    for index, row in data.iterrows():
        task_id = int(row["task_id"])
        code_type = row["code_type"]
        dir_path = f"../all/t{task_id}"
        signature_path = f"{dir_path}/{language}/signature.{SUFFIX_MAP[language]}"
        with open(signature_path, "r", encoding="utf8") as signature_file:
            code_signature = signature_file.read()
        test_path = f"{dir_path}/{language}/test.{SUFFIX_MAP[language]}"
        with open(test_path, "r", encoding="utf8") as test_file:
            code_test = test_file.read()
        prompt = LANGUAGE_PROMPT_MAP[language](code_signature, code_type)
        question_info = {
            "task_id": task_id,
            "code_type": code_type,
            "code_signature":code_signature,
            "code_language": language,
            "test_code": code_test,
            "prompt": prompt,

        }
        question_array.append(question_info)
        print(f"task_id:{task_id} signature:{len(code_signature)} test:{len(code_test)}")
    with open(f"./question/{language}.json", "w", encoding="utf8") as question_file:
        json_str = json.dumps(question_array)
        question_file.write(json_str)
        question_file.flush()

task_id:87 signature:220 test:939
task_id:88 signature:164 test:628
task_id:89 signature:347 test:1449
task_id:92 signature:285 test:1735
task_id:93 signature:236 test:1264
task_id:94 signature:332 test:1277
task_id:95 signature:419 test:1257
task_id:96 signature:391 test:1696
task_id:97 signature:189 test:1329
task_id:99 signature:271 test:718
task_id:100 signature:1018 test:699
task_id:101 signature:393 test:854
task_id:103 signature:334 test:989
task_id:104 signature:225 test:1644
task_id:106 signature:210 test:1546
task_id:107 signature:166 test:1778
task_id:108 signature:481 test:2457
task_id:109 signature:364 test:1553
task_id:110 signature:127 test:1241
task_id:111 signature:426 test:2262
task_id:112 signature:313 test:1603
task_id:113 signature:349 test:2525
task_id:114 signature:239 test:1937
task_id:115 signature:302 test:1243
task_id:116 signature:495 test:1857
task_id:117 signature:291 test:541
task_id:118 signature:266 test:778
task_id:119 signature:206 test:1377
task_id:1