In [5]:
import os 
import json
from tqdm import tqdm 

benchmark_dir = "/largespace/tydata/code_optimization/cpp/saved_models/pie-gem5-by-user-cpp_deepseekcoder-7b_sft_0418/generate/benchmark_gem5_testcases_3"
print(len(os.listdir(benchmark_dir)))

1169


In [3]:
def is_answer_correct(input_case_path, stdout_bin):
    """
    check stdout_bin is correct 
    """
    output_case_path = input_case_path.replace("input", "output")
    with open(output_case_path, 'r') as g:
        truth = g.read().strip()
    
    ground_truth_lines = truth.strip().splitlines()
    output_lines = stdout_bin.strip().splitlines()

    IsCorrect = True
    for gen_output, ground_truth_output in zip(output_lines, ground_truth_lines):
        is_corr = gen_output == ground_truth_output
        if not is_corr:
            try:
                gen_output = float(gen_output)
                ground_truth_output = float(ground_truth_output)
                is_corr = abs(gen_output - ground_truth_output) < 1e-3
            except:
                pass
        
        if not is_corr:
            IsCorrect = False
    
    return IsCorrect

In [6]:
def benchmark_postprocess(test_binary_benchmark_dir):
    # for each test binary benchmark dir
    benchmark_results_path = "testcases_3_benchmark_results.json"
    with open(os.path.join(test_binary_benchmark_dir, benchmark_results_path), 'r') as f:
        results = json.load(f)
    
    final_result = {
        "binary_exec_right_and_gem5_right":{"testcases_id": [],"binary_exec_right_and_gem5_right_and_answer_correct":[], "binary_exec_right_and_gem5_right_and_answer_wrong":[]},
        "binary_exec_right_and_gem5_wrong":[],
        "binary_exec_wrong_and_gem5_right":[],
        "binary_exec_wrong_and_gem5_wrong":[],
        "testcases_number": 0
    }

    for testcase_result in results:
        returncode_bin = testcase_result["returncode_bin"]
        returncode_gem5 = testcase_result["returncode_gem5"]
        test_case_id = int(testcase_result["test_case_id"])
        
        if returncode_bin == 0 and returncode_gem5 == 0:
            final_result["binary_exec_right_and_gem5_right"]["testcases_id"].append(test_case_id)
            Is_answer_correct = is_answer_correct(testcase_result["input_case_path"], testcase_result["stdout_bin"])
            if Is_answer_correct:
                final_result["binary_exec_right_and_gem5_right"]["binary_exec_right_and_gem5_right_and_answer_correct"].append(test_case_id)
            else:
                final_result["binary_exec_right_and_gem5_right"]["binary_exec_right_and_gem5_right_and_answer_wrong"].append(test_case_id)
        elif returncode_bin == 0 and returncode_gem5 != 0:
            final_result["binary_exec_right_and_gem5_wrong"].append(test_case_id)
        elif returncode_bin != 0 and returncode_gem5 == 0:
            final_result["binary_exec_wrong_and_gem5_right"].append(test_case_id)
        else:
            final_result["binary_exec_wrong_and_gem5_wrong"].append(test_case_id)
        
        final_result["testcases_number"] += 1

    with open(os.path.join(test_binary_benchmark_dir, "analysis_result.json"), 'w') as gg:
        json.dump(final_result, gg, indent=4)

    return None

In [7]:
test_benchmark_store_path = "/largespace/tydata/code_optimization/cpp/saved_models/pie-gem5-by-user-cpp_deepseekcoder-7b_sft_0418/generate/benchmark_gem5_testcases_3"
generate_benchmarks = os.listdir(test_benchmark_store_path)
# test_binary_benchmark_dir = os.path.join(test_benchmark_store_path, "p01717", "u923320778", "s651875974")
# benchmark_postprocess(test_binary_benchmark_dir)

for each_generate in tqdm(generate_benchmarks):
    each_generate_path = os.path.join(test_benchmark_store_path, each_generate)
    benchmark_postprocess(each_generate_path)

100%|██████████| 1169/1169 [00:00<00:00, 4716.70it/s]


In [8]:
def result_statistics(analysis_result):
    with open(analysis_result, 'r') as f:
        data = json.load(f)
    
    count = len(data["binary_exec_right_and_gem5_right"]["binary_exec_right_and_gem5_right_and_answer_correct"])
    if count == data["testcases_number"]:
        return True
    else:
        return False

In [10]:
count_all_right = 0
all_right_binary = []
for each_generate in tqdm(generate_benchmarks):
    each_generate_path = os.path.join(test_benchmark_store_path, each_generate)
    result = result_statistics(os.path.join(each_generate_path, "analysis_result.json"))
    if result:
        count_all_right += 1
        binary = os.path.join(f"{each_generate}_maybe_faster.out")
        all_right_binary.append(binary)

print(f"all right count = {count_all_right}")
with open(os.path.join("/largespace/tydata/code_optimization/cpp/saved_models/pie-gem5-by-user-cpp_deepseekcoder-7b_sft_0418/generate/generate_all_right.txt"), 'w') as f:
    for item in all_right_binary:
        f.write(item + '\n')

100%|██████████| 1169/1169 [00:00<00:00, 41674.60it/s]

all right count = 551





In [26]:
def calculate_sim_seconds(stats):
    # more accurate than sim_seconds
    return float(stats["sim_ticks"]) / float(stats["sim_freq"])

In [27]:
import re
import ast
def parse_stats_txt(gem5_stats_path):
    with open(gem5_stats_path, 'r') as f:
        stats_lines = f.readlines()
    
    stats = {}
    for line in stats_lines:
        if line.strip() == '':
            continue 
        if "Begin" in line:
            continue
        if "End" in line:
            continue
        line = re.sub("#.*", "", line).strip() # remove comments
        parts = line.split()
        parts = [part.strip() for part in parts]
        if len(parts) > 2:
            value = parts[1:]
        elif len(parts) == 2:
            value = parts[1]
        else:
            print(f"could not parse line {line}")
            continue
        key = parts[0]
        if isinstance(value, str):
            try:
                value = value.replace("%", "").replace("nan", "None").replace("inf", "None").replace("-inf", "None")
                value = ast.literal_eval(value) if value != "None" else None
            except:
                print(f"could not parse value {value} for key {key}")
        elif isinstance(value, list):
            try:
                value = [v.replace("%", "").replace("nan", "None").replace("inf", "None").replace("-inf", "None") for v in value]
                value = [ast.literal_eval(v) if v!= "None" else None for v in value]
            except:
                print(f"could not parse value {value} for key {key}")
        
        stats[key] = value
    stats["sim_seconds_precise"] = calculate_sim_seconds(stats)
    return stats

In [40]:
import glob
def get_average_time(binary):
    bin_file_path = os.path.join(binary)

    base_path = "/largespace/tydata/code_optimization/cpp/saved_models/pie-gem5-by-user-cpp_deepseekcoder-7b_sft_0418/generate/benchmark_gem5_testcases_3"
    testcases_path = os.path.join(base_path, bin_file_path)
    sim_seconds_precise_all = []
    gem5_stats = glob.glob(testcases_path + "/gem5_stats.*.txt")
    for gem5_stat in gem5_stats:
        stats = parse_stats_txt(gem5_stat)
        sim_seconds_precise = stats["sim_seconds_precise"]
        sim_seconds_precise_all.append(sim_seconds_precise)

    if len(sim_seconds_precise_all) != 0:
        return sum(sim_seconds_precise_all) / len(sim_seconds_precise_all)
    else:
        return 820

In [41]:
with open(os.path.join("/largespace/tydata/code_optimization/cpp/saved_models/pie-gem5-by-user-cpp_deepseekcoder-7b_sft_0418/generate/generate_all_right.txt"), 'r') as f:
    binarys = f.read().splitlines()

results = {}
for binary in tqdm(binarys):
    binary_solo = binary.replace("_maybe_faster.out", "")
    average_sim_seconds_precise = get_average_time(binary_solo)
    
    results[binary] = average_sim_seconds_precise

with open("/largespace/tydata/code_optimization/cpp/saved_models/pie-gem5-by-user-cpp_deepseekcoder-7b_sft_0418/generate/average_time_tesecases3.json", 'w') as f:
    json.dump(results, f, indent=4)

100%|██████████| 551/551 [00:26<00:00, 21.13it/s]


In [58]:
def relative_improve(slow:float, fast:float):
    return round((slow - fast) / slow, 4)

In [61]:
count = 0
opt_count = 0
speedups = []
with open("/largespace/tydata/code_optimization/cpp/saved_models/pie-gem5-by-user-cpp_deepseekcoder-7b_sft_0418/generate/final_results.json", 'r') as f:
    originals = json.load(f)
for each_original in originals:
    problem_id = each_original["problem_id"]
    user_id = each_original["user_id"]
    slow_submission_id = each_original["slow_submission_id"]
    fast_submission_id = each_original["fast_submission_id"]
    slow_time = each_original["slow_time"]
    identifier = f"{problem_id}_{user_id}_{slow_submission_id}_{fast_submission_id}_maybe_faster.out"
    if identifier in results:
        count += 1
        maybe_faster_time = float(results[identifier])
        speedup = round(slow_time / maybe_faster_time, 4)
        if speedup < 1: 
            speedup = 1.0
        
        if relative_improve(slow_time, maybe_faster_time) >  0.1:
            opt_count += 1

        speedups.append(speedup)

        
print(f"correct: {count}")
print(f"opt count: {opt_count}")
print(f"Speedup in {count} is : {sum(speedups)/len(speedups)}")
length = len(speedups)
print((sum(speedups) + (1169-length)) / 1169)

correct: 551
opt count: 335
Speedup in 551 is : 2.8660998185117985
1.8795731394354156
