In [126]:
import json
from pathlib import Path
import re

In [127]:
def get_weights_and_simpoints_from_file(
    simpoint_path: Path,
    weight_path: Path,
    simpoint_interval: int
):
    """
    This function takes in file paths and outputs a list of SimPoints
    instruction starts and a list of weights
    """
    simpoint = []
    with open(simpoint_path) as simpoint_file, open(
        weight_path
    ) as weight_file:
        while True:
            line = simpoint_file.readline()
            if not line:
                break
            interval = int(line.split(" ", 1)[0])
            line = weight_file.readline()
            if not line:
                print("not engough weights")
                return
            weight = float(line.split(" ", 1)[0])
            simpoint.append((interval, weight))
    simpoint.sort(key=lambda obj: obj[0])
    # use simpoint to sort
    simpoint_start_insts = []
    weight_list = []
    for start, weight in simpoint:
        simpoint_start_insts.append(start * simpoint_interval)
        weight_list.append(weight)
    return simpoint_start_insts, weight_list

In [128]:
_path = Path("/projects/gem5/riscv-spec2006/checkpoints/data/")
compile = re.compile(r"cpt.SimPoint([0-9]+)")


        
        

In [129]:
def get_json(
        checkpoint_path: Path,
        size:str
):
    all_workload = {}
    all_simpoint = {}
    all_binary = {}
    all_checkpoint = {}
    checkpoint_path = Path(checkpoint_path/size)
    for benchmark in checkpoint_path.iterdir():
        name = benchmark.stem
        if (benchmark.is_dir()):
            for command in benchmark.iterdir():
                index = command.stem
                if(command.is_dir()):
                    for region in Path(command / "se_checkpoint_dir").iterdir():
                        rid = compile.findall(region.as_posix())[0]
                        all_workload[f"{name}-{size}-{index}-simpoint-{rid}"] = {
                            "type" : "workload",
                            "name" : f"{name}-{size}-{index}-simpoint-{rid}",
                            "documentation" : f"SimPoint Workload for SPEC2006 {size} size {name} with command {index} and region {rid} checkpoint",
                            "function" : "set_se_binary_workload",
                            "resources" : {
                                "binary" : f"riscv-{name}-{size}-{index}",
                                # "simpoint" : f"riscv-{name}-{size}-{index}-simpoint",
                                "checkpoint" : f"riscv-{name}-{size}-{index}-simpoint-cpt-{rid}"
                            }
                        }
                        all_checkpoint[f"riscv-{name}-{size}-{index}-simpoint-cpt-{rid}"] = {
                            "type": "checkpoint",
                            "name" : f"riscv-{name}-{size}-{index}-simpoint-cpt-{rid}",
                            "documentation" : f"SimPoint checkpoint for SPEC2006 {size} size {name} with command {index} and region {rid}",
                            "architecture": "RISCV",
                            "is_zipped" : False,
                            "is_tar_archive" : False
                            }
                    simp_list, weight_list = get_weights_and_simpoints_from_file(
                        simpoint_path= Path(command/"results.simpts"),
                        weight_path = Path(command/"results.weights"),
                        simpoint_interval=100000000)
                    all_simpoint[f"riscv-{name}-{size}-{index}-simpoint"] = {
                        "type" : "simpoint",
                        "name" : f"riscv-{name}-{size}-{index}-simpoint",
                        "simpoint_interval" : 100000000,
                        "simpoint_list" : simp_list,
                        "weight_list" : weight_list
                        }
            with open (Path(f"/projects/gem5/riscv-spec2006/benchmark/modified_name_commands/{name}_{size}_command.txt")) as file:
                line = file.readline()
                command_index = 0
                binary_path = Path(benchmark/"0"/f"{name}_base.riscv").as_posix()
                while line:
                    line = line.split()
                    if(len(line)>0):
                        if "../run_base_{size}" in line[0]:
                            all_binary[f"riscv-{name}-{size}-{command_index}"] = {
                                "type" : "resource",
                                "binary" : binary_path,
                                "additional_params" : {}
                            }
                            num_items = len(line)
                            index = 1
                            while(index < len(line)):
                                if(line[index]==">"):
                                    index +=1
                                    if not all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"].get("stdout"):
                                        all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"]["stdout"]=[line[index]]
                                    else:
                                        all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"]["stdout"].append(line[index])
                                elif(line[index]=="2>>"):
                                    index +=1
                                    if not all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"].get("stder"):
                                        all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"]["stder"]=[line[index]]
                                    else:
                                        all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"]["stder"].append(line[index])
                                elif(line[index]=="<"):
                                    index +=1
                                    if not all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"].get("stdin"):
                                        all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"]["stdin"]=[line[index]]
                                    else:
                                        all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"]["stdin"].append(line[index])
                                else:
                                    if not all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"].get("arguments"):
                                        all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"]["arguments"]=[line[index]]
                                    else:
                                        all_binary[f"riscv-{name}-{size}-{command_index}"]["additional_params"]["arguments"].append(line[index])
                                index += 1
                            command_index +=1
                    line = file.readline()

            
    with open(f"{size}.json", "w") as file:
        json.dump(all_workload, file, indent=2)
        json.dump(all_checkpoint, file, indent=2)
        json.dump(all_simpoint, file, indent=2)
        json.dump(all_binary, file, indent=2)

In [130]:
get_json(
    checkpoint_path = _path, size="train"
)

get_json(
    checkpoint_path = _path, size="ref"
)