In [1]:
import json
from pathlib import Path
from sklearn.cluster import KMeans
from sklearn import random_projection
import numpy as np
import random
from scipy.spatial import distance

workdir = Path().cwd()

In [2]:
# Random Seed at file level
random_seed = 627

np.random.seed(random_seed)
random.seed(random_seed)

benchmarks = ["bt", "cg", "ep", "ft", "is", "lu", "mg", "sp"]

In [8]:
def form_matrix(data):
    matrix = []
    order_list = {}
    for i in data["0"].keys():
        order_list[i] = len(order_list)
    for rid, rdata in data.items():
        row = [0] * len(order_list.keys())
        for event, value in rdata.items():
            if event == "runtime":
                avg_runtime = sum(value) / len(value)
                row[order_list["runtime"]] = avg_runtime
            else:
                row[order_list[event]] = int(value)
        matrix.append(row)
    return matrix

def find_rep_rid(data, labels, centers):
    rep_rid = {}
    for i, center in enumerate(centers):
        min = float('inf')
        min_rid = -1
        count = 0
        for j, label in enumerate(labels):
            if label == i:
                count += 1
                dist = distance.euclidean(center, data[j])
                if dist < min and j != 0:
                    min = dist
                    min_rid = j
        if min_rid != -1:
            rep_rid[i] = min_rid
        else:
            print("Error: No representative RID found for cluster")
            print(f"There are {count} RIDs in cluster {i}")

    return rep_rid
def find_cluster_rid(labels):
    clusters = {}
    for i, label in enumerate(labels):
        if str(label) not in clusters.keys():
            clusters[str(label)] = []
        clusters[str(label)].append(i)
    return clusters

def clustering(data, k):
    all_clusters = {}
    for i in range(10, k+1, 10):
        kmeans = KMeans(n_clusters=i, random_state=random_seed)
        kmeans.fit(data)

        centers = kmeans.cluster_centers_
        labels = kmeans.labels_
        inertia = kmeans.inertia_
        n_iter = kmeans.n_iter_
        print(i)
        rep_rid = find_rep_rid(data, labels.tolist(), centers.tolist())
        clusters = find_cluster_rid(labels.tolist())
        all_clusters[str(i)] = {
            "centers": centers.tolist(),
            "labels": labels.tolist(),
            "inertia": inertia,
            "n_iter": n_iter,
            "rep_rid": rep_rid,
            "clusters": clusters
        }

    return all_clusters

In [9]:
with open(workdir/"azacca-papi-profiling-data.json") as f:
    azacca_data = json.load(f)

bench_matrix = {}
all_clustering_data = {}

for bench in benchmarks:
    bench_matrix[bench] = form_matrix(azacca_data[bench])
    data = np.array(bench_matrix[bench])
    print(bench)
    all_clustering_data[bench] = clustering(data, 30)

with open(workdir/"cluster_based_on_uarch.json", "w") as f:
    json.dump(all_clustering_data, f, indent=4)

with open(Path(workdir/"marker_info.json"), "r") as f:
    marker_info = json.load(f)

bt


  super()._check_params_vs_input(X, default_n_init=10)


10


  super()._check_params_vs_input(X, default_n_init=10)


20


  super()._check_params_vs_input(X, default_n_init=10)


30
cg


  super()._check_params_vs_input(X, default_n_init=10)


10


  super()._check_params_vs_input(X, default_n_init=10)


20
Error: No representative RID found for cluster
There are 1 RIDs in cluster 10


  super()._check_params_vs_input(X, default_n_init=10)


30
Error: No representative RID found for cluster
There are 1 RIDs in cluster 9
ep


  super()._check_params_vs_input(X, default_n_init=10)


10


  super()._check_params_vs_input(X, default_n_init=10)


20


  super()._check_params_vs_input(X, default_n_init=10)


30
ft


  super()._check_params_vs_input(X, default_n_init=10)


10


  super()._check_params_vs_input(X, default_n_init=10)


20


  super()._check_params_vs_input(X, default_n_init=10)


30
is
10
20
30
Error: No representative RID found for cluster
There are 1 RIDs in cluster 22
lu


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


10


  super()._check_params_vs_input(X, default_n_init=10)


20


  super()._check_params_vs_input(X, default_n_init=10)


30
mg


  super()._check_params_vs_input(X, default_n_init=10)


10


  super()._check_params_vs_input(X, default_n_init=10)


20


  super()._check_params_vs_input(X, default_n_init=10)


30
sp


  super()._check_params_vs_input(X, default_n_init=10)


10


  super()._check_params_vs_input(X, default_n_init=10)


20


  super()._check_params_vs_input(X, default_n_init=10)


30


In [10]:
bash_script = ""
counter = 0
import shutil
for bench in benchmarks:
    bench_dir = Path(workdir/bench.upper())
    cluster_dir = Path(bench_dir/"clusters")
    if cluster_dir.exists():
        shutil.rmtree(cluster_dir)
    cluster_dir.mkdir(exist_ok=False)

    all_rid = set()
    for k in range(10, 31, 10):
        all_rid.update(set(all_clustering_data[bench][str(k)]["rep_rid"].values()))

    print(f"Total {len(all_rid)} regions for {bench}")
    counter += len(all_rid)
    
    for rid in all_rid:
        # print(f"Writing down {rid} for {bench}")
        region_markers = marker_info[bench][str(rid)]
        writedown = str(region_markers["warmup_marker_function_id"]) + "\n" \
                +  str(region_markers["warmup_marker_bbid"]) + "\n" \
                +  str(region_markers["warmup_marker_count"]) + "\n" \
                +  str(region_markers["start_marker_function_id"]) + "\n" \
                +  str(region_markers["start_marker_bbid"]) + "\n" \
                +  str(region_markers["start_marker_count"]) + "\n" \
                +  str(region_markers["end_marker_function_id"]) + "\n" \
                +  str(region_markers["end_marker_bbid"]) + "\n" \
                +  str(region_markers["end_marker_count"]) +"\n"
        file_path = Path(cluster_dir/f"{rid}.txt")
        with open(cluster_dir/f"{rid}.txt", "w") as f:
            f.write(writedown)
        bash_script += f"make papi PROGRAM={bench} REGION={rid} TARGET_ARCH=aarch64;\n"
        bash_script += f"make m5_fs PROGRAM={bench} REGION={rid} TARGET_ARCH=aarch64;\n"
        bash_script += f"make final_compile_papi PROGRAM={bench} REGION={rid} TARGET_ARCH=aarch64;\n"
        bash_script += f"make final_compile_m5_fs PROGRAM={bench} REGION={rid} TARGET_ARCH=aarch64;\n"

with open(workdir/"build_papi_m5_fs_aarch64_script.sh", "w") as f:
    f.write(bash_script)

print(f"Total {counter} regions")


Total 58 regions for bt
Total 47 regions for cg
Total 43 regions for ep
Total 48 regions for ft
Total 36 regions for is
Total 57 regions for lu
Total 44 regions for mg
Total 57 regions for sp
Total 390 regions
