In [2]:
import subprocess
def evaluate_performance(program_name: str, run_args: str, run_count: int = 1) -> dict:
    # 构建命令列表（拆分参数字符串为列表元素）
    command = [program_name] + run_args.split()
    
    # 存储每次运行的性能数据
    tok_per_s_list: List[float] = []
    ftl_per_ms_list: List[float] = []
    memory_usage_list: List[float] = []
    power_consumption_list: List[float] = []
    
    def _run_single() -> Dict[str, float]:
        """单次运行程序并返回性能数据"""
        try:
            result = subprocess.run(
                command,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                check=True
            )
        except subprocess.CalledProcessError as e:
            raise RuntimeError(f"第{len(tok_per_s_list)+1}次运行失败: {e.stderr}") from e
        except FileNotFoundError:
            raise FileNotFoundError(f"未找到程序: {program_name}")
        
        output_lines = result.stdout.splitlines()
        if len(output_lines) < 4:
            raise ValueError(f"第{len(tok_per_s_list)+1}次运行输出内容不足，无法提取性能数据")
        
        performance_lines = output_lines[-4:]
        try:
            return {
                'tok_per_s': float(performance_lines[0].split(': ')[1]),
                'ftl_per_ms': float(performance_lines[1].split(': ')[1]),
                'memory_usage': float(performance_lines[2].split(': ')[1]),
                'power_consumption': float(performance_lines[3].split(': ')[1])
            }
        except (IndexError, ValueError) as e:
            raise ValueError(f"第{len(tok_per_s_list)+1}次运行性能数据格式错误: {e}") from e
    
    # 执行多次运行
    for _ in range(run_count):
        single_result = _run_single()
        tok_per_s_list.append(single_result['tok_per_s'])
        ftl_per_ms_list.append(single_result['ftl_per_ms'])
        memory_usage_list.append(single_result['memory_usage'])
        power_consumption_list.append(single_result['power_consumption'])
    
    # 计算平均值
    def _average(data_list: List[float]) -> float:
        return sum(data_list) / len(data_list)
    
    return {
        'program_name': program_name,
        'run_args': run_args,
        'run_count': run_count,  # 新增：记录实际运行次数
        'tok_per_s_avg': _average(tok_per_s_list),
        'ftl_per_ms_avg': _average(ftl_per_ms_list),
        'memory_usage_avg': _average(memory_usage_list),
        'power_consumption_avg': _average(power_consumption_list)
    }

In [3]:
!ls

build_debug.sh	     modelq_15M.bin	run_avx.c		 run.out
build_perf.sh	     modelq16_110M.bin	run_avx_plus.c		 run_paral.c
convert.py	     modelq16_15M.bin	run_avx_plus.out	 run_paral.out
json.hpp	     modelq16_42M.bin	run_avx_plus_sparse.c	 runq16.out
llama2.c	     modelq_42M.bin	run_avx_plus_sparse.out  runq.out
matplotlib-tutorial  report.ipynb	run.c			 stories110M.pt
model_110M.bin	     run0.c		run_int16.c		 stories15M.pt
model_15M.bin	     run0.out		run_no_paral.c		 stories42M.pt
model_42M.bin	     run2.c		run_no_paral_o0.out	 tokenizer.bin
modelq_110M.bin      run4.c		run_no_paral.out	 Untitled.ipynb
