# LLM回答校验

In [None]:
import os

In [None]:
def delete_non_ipynb_files():
    # 获取当前工作目录
    current_directory = os.getcwd()

    # 遍历当前目录中的所有文件
    for filename in os.listdir(current_directory):
        # 检查文件是否为.ipynb文件
        if not filename.endswith('.ipynb'):
            file_path = os.path.join(current_directory, filename)
            # 检查路径是否为文件，避免删除目录
            if os.path.isfile(file_path):
                try:
                    # 删除文件
                    os.remove(file_path)
                    print(f"Deleted: {file_path}")
                except Exception as e:
                    print(f"Error deleting {file_path}: {e}")


In [None]:
TASK_LIST = [
    # {
    #     "model": "chatglm-6b",
    #     "language": ["c&cpp"]
    # },
    # {
    #     "model": "codegeex4-all-9b",
    #     "language": ["python","java"]
    # },
    # {
    #     "model": "codegen25-7b-instruct_P",
    #     "language": ["c&cpp"]
    # },
    # {
    #     "model": "deepseek-coder-6.7b-instruct",
    #     "language": ["python", "javascript", "typescript", "java"]
    # },
    # {
    #     "model": "Meta-Llama-3.1-8B-Instruct",
    #     "language": ["javascript","python","typescript","java"]
    # },
    # {
    #     "model": "Mistral-7B-Instruct-v0.3",
    #     "language": ["python"]
    # },
    # {
    #     "model": "Phi-3-small-8k-instruct",
    #     "language": ["python"]
    # },
    # {
    #     "model": "CodeLlama-7b-hf",
    #     "language": ["python", "javascript", "typescript", "java"]
    # },
    # {
    #     "model": "starcoder2-7b",
    #     "language": ["python", "javascript", "typescript", "c&cpp"]
    # },
    {
        "model": "gpt-3.5-turbo",
        "language": ["python","java","javascript","typescript"]
    }
]
ANSWER_PATH = "E:\code\code_back\python_project\llm\qa"

In [None]:
from executor.java_executor import JavaExecutor
from executor.ccpp_executor import CCPPExecutor
from executor.typescript_executor import TypeScriptExecutor
from executor.javascript_executor import JavaScriptExecutor
from executor.python_executor import PythonExecutor


def get_parser(language, model):
    if language == "python":
        return PythonExecutor(model)
    elif language == "javascript":
        return JavaScriptExecutor(model)
    elif language == "typescript":
        return TypeScriptExecutor(model)
    elif language == "c&cpp":
        return CCPPExecutor(model)
    elif language == "java":
        return JavaExecutor(model)


from pathlib import Path

for task in TASK_LIST:
    answer_path = f"{ANSWER_PATH}\\{task['model']}"
    Path(f"./xlsx/model_answer_result/{task['model']}").mkdir(exist_ok=True)
    for language in task["language"]:
        try:
            print(language)
            parser = get_parser(language, task['model'])
            file_path = f"{ANSWER_PATH}\\{task['model']}_answer\\{language}_answer.json"
            parser.batch_run(file_path)
        except Exception as e:
            print(e)
    delete_non_ipynb_files()

# LLM回答数据统计

In [1]:
import pandas as pd

In [15]:
TASK_LIST = [
    {
        "model": "chatglm-6b",
        "language": ["python"]
    },
    {
        "model": "codegeex4-all-9b",
        "language": ["python"]
    },
    {
        "model": "codegen25-7b-instruct_P",
        "language": ["python"]
    },
    {
        "model": "CodeLlama-7b-hf",
        "language": ["python"]
    },
    {
        "model": "deepseek-coder-6.7b-instruct",
        "language": ["python"]
    },
    {
        "model": "Meta-Llama-3.1-8B-Instruct",
        "language": ["python"]
    },
    {
        "model": "Mistral-7B-Instruct-v0.3",
        "language": ["python"]
    },
    {
        "model": "Phi-3-small-8k-instruct",
        "language": ["python"]
    },

    {
        "model": "starcoder2-7b",
        "language": ["python"]
    },
    {
        "model": "gpt-3.5-turbo",
        "language":["python"]
    },
        {
        "model": "gpt-4",
        "language":["python"]
    }
]

In [17]:
result_list = []
for task in TASK_LIST:
    for type in ["pass1","pass10"]:
        model_name = task["model"]
        language = task["language"]
        temp = {
            "model": model_name
        }
        for l in language:
            try:
                file_path = f"./model_answer_result/{model_name}/{type}/{model_name}_{l}_{type}.xlsx"
                data = pd.read_excel(file_path)
                pass_count = (data["result_return_code"] == 0).sum()
                failed_count = (data["result_return_code"] != 0).sum()
                pass_rate = (pass_count / len(data)) * 100
                temp[f"{l}_pass_count"] = pass_count
                temp[f"{l}_failed_count"] = failed_count
                temp["type"] = type
                temp[f"{l}_pass_rate"] = f"{pass_rate:.2f}%"
                result_list.append(temp)
            except Exception as e:
                print(e)
                continue
result_data = pd.DataFrame(result_list)
result_data.to_excel("./model_answer_result.xlsx")

[Errno 2] No such file or directory: './model_answer_result/chatglm-6b/pass10/chatglm-6b_python_pass10.xlsx'
[Errno 2] No such file or directory: './model_answer_result/codegen25-7b-instruct_P/pass1/codegen25-7b-instruct_P_python_pass1.xlsx'
[Errno 2] No such file or directory: './model_answer_result/codegen25-7b-instruct_P/pass10/codegen25-7b-instruct_P_python_pass10.xlsx'
[Errno 2] No such file or directory: './model_answer_result/CodeLlama-7b-hf/pass1/CodeLlama-7b-hf_python_pass1.xlsx'
[Errno 2] No such file or directory: './model_answer_result/CodeLlama-7b-hf/pass10/CodeLlama-7b-hf_python_pass10.xlsx'
[Errno 2] No such file or directory: './model_answer_result/Mistral-7B-Instruct-v0.3/pass10/Mistral-7B-Instruct-v0.3_python_pass10.xlsx'
[Errno 2] No such file or directory: './model_answer_result/Phi-3-small-8k-instruct/pass10/Phi-3-small-8k-instruct_python_pass10.xlsx'
[Errno 2] No such file or directory: './model_answer_result/starcoder2-7b/pass1/starcoder2-7b_python_pass1.xlsx'
[E