In [4]:
import os 
import json 
import re 

generated_model_name = "pie-gem5-by-user-cpp-problem_codellama-13b_sft_0420"
gem5_testcases_name = "benchmark_gem5_testcases_3"
benchmark_dir = f"/largespace/tydata/code_optimization/cpp/saved_models/{generated_model_name}/generate4/generate_out/"
print(len(os.listdir(benchmark_dir)))


4835


In [5]:
binary_name_set = set()

for binary in os.listdir(benchmark_dir):
    pattern_problem_id = r'p\d+'
    pattern_submission_id = r's\d+'
    pattern_user_id = r'u\d+'
    pattern_sample_id = r'faster_\d+'

    problem_id = re.findall(pattern_problem_id, binary)[0]
    submission_id_two = re.findall(pattern_submission_id, binary)
    slow_submission_id = submission_id_two[0]
    fast_submission_id = submission_id_two[1]
    user_id = re.findall(pattern_user_id, binary)[0]
    sample_id = re.findall(pattern_sample_id, binary)[0]

    binary_name = f"{problem_id}_{user_id}_{slow_submission_id}_{fast_submission_id}"
    binary_name_set.add(binary_name)

print(f"There are {len(binary_name_set)} unique binary name.")

There are 1352 unique binary name.


In [12]:
def result_statistics(analysis_result):
    with open(analysis_result, 'r') as f:
        data = json.load(f)
    
    flag_exec_right_gem5_right = False
    flag_all_right = False

    count_exec_right_gem5_right = len(data["binary_exec_right_and_gem5_right"]["testcases_id"])
    if count_exec_right_gem5_right == data["testcases_number"]:
        flag_exec_right_gem5_right = True

    count_all_right = len(data["binary_exec_right_and_gem5_right"]["binary_exec_right_and_gem5_right_and_answer_correct"])
    if count_all_right == data["testcases_number"]:
        flag_all_right = True
    
    return (flag_exec_right_gem5_right, flag_all_right)

In [17]:
def calculate_sim_seconds(stats):
    # more accurate than sim_seconds
    return float(stats["sim_ticks"]) / float(stats["sim_freq"])

In [18]:
import re
import ast
def parse_stats_txt(gem5_stats_path):
    with open(gem5_stats_path, 'r') as f:
        stats_lines = f.readlines()
    
    stats = {}
    for line in stats_lines:
        if line.strip() == '':
            continue 
        if "Begin" in line:
            continue
        if "End" in line:
            continue
        line = re.sub("#.*", "", line).strip() # remove comments
        parts = line.split()
        parts = [part.strip() for part in parts]
        if len(parts) > 2:
            value = parts[1:]
        elif len(parts) == 2:
            value = parts[1]
        else:
            print(f"could not parse line {line}")
            continue
        key = parts[0]
        if isinstance(value, str):
            try:
                value = value.replace("%", "").replace("nan", "None").replace("inf", "None").replace("-inf", "None")
                value = ast.literal_eval(value) if value != "None" else None
            except:
                print(f"could not parse value {value} for key {key}")
        elif isinstance(value, list):
            try:
                value = [v.replace("%", "").replace("nan", "None").replace("inf", "None").replace("-inf", "None") for v in value]
                value = [ast.literal_eval(v) if v!= "None" else None for v in value]
            except:
                print(f"could not parse value {value} for key {key}")
        
        stats[key] = value
    stats["sim_seconds_precise"] = calculate_sim_seconds(stats)
    return stats

In [20]:
import glob
def get_average_time(each_maybe_fast_sample):
    gem5_stats = glob.glob(each_maybe_fast_sample + "/gem5_stats.*.txt")
    sim_seconds_precise_all = []
    for gem5_stat in gem5_stats:
        stats = parse_stats_txt(gem5_stat)
        sim_seconds_precise = stats["sim_seconds_precise"]
        sim_seconds_precise_all.append(sim_seconds_precise)

    if len(sim_seconds_precise_all) != 0:
        return sum(sim_seconds_precise_all) / len(sim_seconds_precise_all)
    else:
        return 820

In [25]:
def compute_time(can_pass_all_testcase):
    time_result = []
    for each_maybe_fast_sample in can_pass_all_testcase:
        average_time = get_average_time(each_maybe_fast_sample)
        time_result.append({"name":each_maybe_fast_sample, "average_time":average_time})

    return time_result

In [30]:
import glob
from tqdm import tqdm
valid_binary_name_count = 0
selected = dict()
for binary_name in tqdm(binary_name_set):
    # binary_name p00056_u155634813_s704265359_s598602018
    samples = glob.glob(os.path.join(benchmark_dir, f"{binary_name}_maybe_faster_*.out"))
    can_pass_all_testcase = []
    #print(samples)
    for sample in samples:
        # sample /largespace/tydata/code_optimization/cpp/saved_models/pie-gem5-by-user-cpp-problem_codellama-13b_sft_0420/generate4/generate_out/p02714_u280802329_s680015951_s233802530_maybe_faster_1.out
        sample = sample.replace("generate_out", "benchmark_gem5_testcases_3")
        sample = sample.replace("faster", "faster_faster")
        sample = sample.replace(".out", "")

        (flag_exec_right_gem5_right, _) = result_statistics(os.path.join(sample, "analysis_result.json"))
        if flag_exec_right_gem5_right:
            can_pass_all_testcase.append(sample)
    
    if len(can_pass_all_testcase) > 0:
        valid_binary_name_count += 1
        # compare which one fast
        time_result = compute_time(can_pass_all_testcase)
        # print(time_result)
        sorted_time_result = sorted(time_result, key=lambda x: x['average_time'])
        # print(sorted_time_result)
        best_fast = sorted_time_result[0]
        selected[binary_name] = best_fast["name"]


print(f"There are {valid_binary_name_count} valid binary name.")
print(f"There are {len(selected)} in dictionary.")
with open(f"/largespace/tydata/code_optimization/cpp/saved_models/{generated_model_name}/generate4/selected_fast.json", 'w') as writer:
    json.dump(selected, writer, indent=4)




100%|██████████| 1352/1352 [03:36<00:00,  6.25it/s]

There are 1344 valid binary name.
There are 1344 in dictionary.





In [41]:
with open(f"/largespace/tydata/code_optimization/cpp/saved_models/{generated_model_name}/generate4/selected_fast.json", 'r') as reader:
    selected_fast = json.load(reader)

correct = 0
all_right = []
for test_problem, benchmark_path in tqdm(selected_fast.items()):
    # print(test_problem)
    # print(benchmark_path)
    (flag_exec_right_gem5_right, flag_all_right) = result_statistics(os.path.join(benchmark_path, "analysis_result.json"))
    assert flag_exec_right_gem5_right is True
    if flag_all_right:
        correct += 1
        all_right.append(benchmark_path)

print(f"correct num = {correct}")
with open(os.path.join(f"/largespace/tydata/code_optimization/cpp/saved_models/{generated_model_name}/generate4/generate_all_right.txt"), 'w') as f:
    for item in all_right:
        f.write(item + '\n')

100%|██████████| 1344/1344 [00:00<00:00, 49854.03it/s]

correct num = 554





In [49]:
with open(os.path.join(f"/largespace/tydata/code_optimization/cpp/saved_models/{generated_model_name}/generate4/generate_all_right.txt"), 'r') as f:
    binarys = f.read().splitlines()

results = {}
for binary in tqdm(binarys):
    pattern_problem_id = r'p\d+'
    pattern_submission_id = r's\d+'
    pattern_user_id = r'u\d+'
    pattern_sample_id = r'faster_\d+'

    problem_id = re.findall(pattern_problem_id, binary)[0]
    submission_id_two = re.findall(pattern_submission_id, binary)
    slow_submission_id = submission_id_two[0]
    fast_submission_id = submission_id_two[1]
    user_id = re.findall(pattern_user_id, binary)[0]
    sample_id = re.findall(pattern_sample_id, binary)[0]

    binary_name = f"{problem_id}_{user_id}_{slow_submission_id}_{fast_submission_id}"

    average_sim_seconds_precise = get_average_time(binary)
    results[binary_name] = average_sim_seconds_precise

with open(f"/largespace/tydata/code_optimization/cpp/saved_models/{generated_model_name}/generate4/average_time_tesecases3.json", 'w') as f:
    json.dump(results, f, indent=4)

100%|██████████| 554/554 [00:25<00:00, 21.53it/s]


In [50]:
def relative_improve(slow:float, fast:float):
    return round((slow - fast) / slow, 4)

In [53]:
count = 0
can_improvement_10_count = 0
cannot_improvement_10_count = 0
can_speedup_count = 0
cannot_speedup_count = 0
speedups_all = []
can_speedups = []
speedup = 0

with open(f"/largespace/tydata/code_optimization/cpp/saved_models/{generated_model_name}/generate4/average_time_tesecases3.json", 'r') as gg:
    results = json.load(gg)

with open(f"/largespace/tydata/code_optimization/cpp/saved_models/{generated_model_name}/generate4/merge_generates.json", 'r') as f:
    originals = json.load(f)
for each_original in originals:
    problem_id = each_original["problem_id"]
    user_id = each_original["user_id"]
    slow_submission_id = each_original["slow_submission_id"]
    fast_submission_id = each_original["fast_submission_id"]
    slow_time = each_original["slow_time"]
    identifier = f"{problem_id}_{user_id}_{slow_submission_id}_{fast_submission_id}"
    if identifier in results:
        count += 1
        maybe_faster_time = float(results[identifier])

        if maybe_faster_time < slow_time: #can speedup
            can_speedups.append(round(slow_time/maybe_faster_time, 4))
            can_speedup_count += 1
            if relative_improve(slow_time, maybe_faster_time) > 0.1:
                can_improvement_10_count += 1
            else:
                cannot_improvement_10_count += 1
        else: # cannot speedup 
            cannot_speedup_count += 1


print(f"all correct count: {count}")
print(f"cannot speedup: {cannot_speedup_count}")
print(f"can speedup: {can_speedup_count}")
print(f"-"*20)
print(f"can improvement 10 count: {can_improvement_10_count}")
print(f"cannot improvement 10 count: {cannot_improvement_10_count}")
print(f"-"*20)

final_speedup = (sum(can_speedups) + 59*1.0+ 790*1.0) / 1344
print(f"{final_speedup}")


all correct count: 554
cannot speedup: 59
can speedup: 495
--------------------
can improvement 10 count: 371
cannot improvement 10 count: 124
--------------------
2.029269791666665
