In [1]:
import json
from pathlib import Path
from sklearn.cluster import KMeans
from sklearn import random_projection
import numpy as np
import random
from scipy.spatial import distance

workdir = Path("/home/studyztp/test_ground/experiments/hardware-profiling/NPB_protocol/NPB3.4.2/NPB3.4-OMP")

In [2]:
# Random Seed at file level
random_seed = 627

np.random.seed(random_seed)
random.seed(random_seed)

benchmark = ["bt", "cg", "ep", "ft", "is", "lu", "mg", "sp"]

In [3]:
def find_rep_rid(data, labels, centers):
    rep_rid = {}
    for i, center in enumerate(centers):
        min = float('inf')
        min_rid = -1
        for j, label in enumerate(labels):
            if label == i:
                dist = distance.euclidean(center, data[j])
                if dist < min and j != 0 and j != 1:
                    min = dist
                    min_rid = j
        rep_rid[i] = min_rid

    return rep_rid
def find_cluster_rid(labels):
    clusters = {}
    for i, label in enumerate(labels):
        if str(label) not in clusters.keys():
            clusters[str(label)] = []
        clusters[str(label)].append(i)
    return clusters

In [4]:
def clustering(data, k):
    all_clusters = {}
    for i in range(10, k+1, 10):
        kmeans = KMeans(n_clusters=i, random_state=random_seed)
        kmeans.fit(data)

        centers = kmeans.cluster_centers_
        labels = kmeans.labels_
        inertia = kmeans.inertia_
        n_iter = kmeans.n_iter_
        rep_rid = find_rep_rid(data, labels.tolist(), centers.tolist())
        clusters = find_cluster_rid(labels.tolist())
        all_clusters[i] = {
            "centers": centers.tolist(),
            "labels": labels.tolist(),
            "inertia": inertia,
            "n_iter": n_iter,
            "rep_rid": rep_rid,
            "clusters": clusters
        }

    return all_clusters


In [5]:
# with open(workdir/"basic_block_matrix_info.json", "r") as f:
#     basic_block_matrix_info = json.load(f)

# all_clustering_data = {}

# for bench in benchmark:
#     data = basic_block_matrix_info[bench]["matrix"]
#     data = np.array(data)
#     transformer = random_projection.SparseRandomProjection(n_components=15, random_state=random_seed)
#     data = transformer.fit_transform(data)
#     all_clustering_data[bench] = clustering(data, 30)

# with open(workdir/"clustering_data.json", "w") as f:
#     json.dump(all_clustering_data, f, indent=4)

with open(workdir/"clustering_data.json", "r") as f:
    all_clustering_data = json.load(f)

In [6]:
with open(Path(workdir/"marker_info.json"), "r") as f:
    marker_info = json.load(f)

In [10]:
bash_script = ""
import shutil
for bench in benchmark:
    bench_dir = Path(workdir/bench.upper())
    cluster_dir = Path(bench_dir/"clusters")
    if cluster_dir.exists():
        shutil.rmtree(cluster_dir)
    cluster_dir.mkdir(exist_ok=False)

    all_rid = set()
    for k in range(10, 31, 10):
        all_rid.update(set(all_clustering_data[bench][str(k)]["rep_rid"].values()))
    
    for rid in all_rid:
        print(f"Writing down {rid} for {bench}")
        region_markers = marker_info[bench][str(rid)]
        writedown = str(region_markers["warmup_marker_function_id"]) + "\n" \
                +  str(region_markers["warmup_marker_bbid"]) + "\n" \
                +  str(region_markers["warmup_marker_count"]) + "\n" \
                +  str(region_markers["start_marker_function_id"]) + "\n" \
                +  str(region_markers["start_marker_bbid"]) + "\n" \
                +  str(region_markers["start_marker_count"]) + "\n" \
                +  str(region_markers["end_marker_function_id"]) + "\n" \
                +  str(region_markers["end_marker_bbid"]) + "\n" \
                +  str(region_markers["end_marker_count"]) +"\n"
        file_path = Path(cluster_dir/f"{rid}.txt")
        with open(cluster_dir/f"{rid}.txt", "w") as f:
            f.write(writedown)
        bash_script += f"REGION={rid}\n"
        bash_script += f"cp {file_path.as_posix()} /home/studyztp/stuffs/info.txt\n"
        bash_script += f"cd {bench_dir.as_posix()} && "
        bash_script += "make m5_fs REGION=${REGION}\n"
        bash_script += f"cd {bench_dir.as_posix()} && "
        bash_script += "make papi REGION=${REGION}\n"

with open(workdir/"run_script.sh", "w") as f:
    f.write(bash_script)




Writing down 10115 for bt
Writing down 901 for bt
Writing down 1797 for bt
Writing down 8 for bt
Writing down 1418 for bt
Writing down 2700 for bt
Writing down 12558 for bt
Writing down 10767 for bt
Writing down 13206 for bt
Writing down 14870 for bt
Writing down 11416 for bt
Writing down 15254 for bt
Writing down 15003 for bt
Writing down 167 for bt
Writing down 8489 for bt
Writing down 12716 for bt
Writing down 13868 for bt
Writing down 9134 for bt
Writing down 5299 for bt
Writing down 12724 for bt
Writing down 8886 for bt
Writing down 13373 for bt
Writing down 5054 for bt
Writing down 12479 for bt
Writing down 7745 for bt
Writing down 14021 for bt
Writing down 9541 for bt
Writing down 4680 for bt
Writing down 3913 for bt
Writing down 14922 for bt
Writing down 3831 for bt
Writing down 8396 for bt
Writing down 10193 for bt
Writing down 4820 for bt
Writing down 84 for bt
Writing down 2772 for bt
Writing down 8407 for bt
Writing down 3672 for bt
Writing down 10329 for bt
Writing down 23