In [99]:
import numpy as np

bs16 = dict(np.load("/home/thanh/google_fast_or_slow/data/npz_all/npz/layout/xla_compressed/random/train/inception_v2_batch_8_train.npz"))
bs32 = dict(np.load("/home/thanh/google_fast_or_slow/data/npz_all/npz/layout/xla_compressed/random/train/inception_v2_batch_128_train.npz"))

In [100]:
for k in bs16.keys():
    print(k, bs16[k].shape, bs32[k].shape)

edge_index (782, 2) (782, 2)
node_feat (768, 140) (768, 140)
node_opcode (768,) (768,)
node_config_feat (7533, 216, 3) (5705, 216, 3)
node_config_ids (216,) (216,)
node_splits (1, 3) (1, 3)
config_runtime (7533,) (5705,)


In [104]:
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool, cpu_count, Manager

def init_worker(bs16_data, bs32_data, shared_list):
    global bs16_node_config_feat
    global bs32_node_config_feat
    global similar_pairs_list
    bs16_node_config_feat = bs16_data
    bs32_node_config_feat = bs32_data
    similar_pairs_list = shared_list

def find_similar_pairs(i, bs32_range):
    # We avoid using global tqdm as it is not well supported in multiprocessing
    # Progress bar should be handled in the main process if needed
    similar_pairs = []
    for j in bs32_range:
        if np.all(bs16_node_config_feat[i] == bs32_node_config_feat[j]):
            similar_pairs.append((i, j))
    return similar_pairs

def update_progress_bar(job, pbar):
    for _ in job:
        pbar.update()


# Shared list to collect results
manager = Manager()
similar_pairs = manager.list()

# Number of workers equal to the number of CPU cores
num_workers = cpu_count()

# We use a context manager to ensure processes are cleaned up properly
with Pool(processes=num_workers, initializer=init_worker, initargs=(bs16["node_config_feat"], bs32["node_config_feat"], similar_pairs)) as pool:
    jobs = []
    # Set up a tqdm progress bar
    with tqdm(total=bs16["node_config_feat"].shape[0]) as pbar:
        for i in range(bs16["node_config_feat"].shape[0]):
            # Each job processes a different row of bs16_node_config_feat
            job = pool.apply_async(find_similar_pairs, args=(i, range(bs32["node_config_feat"].shape[0])), callback=lambda _: pbar.update())
            jobs.append(job)

        # Collect results
        results = [job.get() for job in jobs]

# Flatten the list of results
similar_pairs = [pair for sublist in results for pair in sublist]

print(len(similar_pairs), bs16["node_config_feat"].shape[0], len(similar_pairs) / bs16["node_config_feat"].shape[0])


  0%|          | 0/5000 [00:00<?, ?it/s]

100%|██████████| 5000/5000 [00:25<00:00, 194.18it/s]

0 7533 0.0





In [105]:
similar_pairs

[]

In [89]:
bs16["config_runtime"][[pair[0] for pair in similar_pairs]].tolist(), bs32["config_runtime"][[pair[1] for pair in similar_pairs]].tolist()

for i, j in similar_pairs:
    print(
        bs16["config_runtime"][i],
        bs32["config_runtime"][j],
        "\t ratio =", bs16["config_runtime"][i] / bs32["config_runtime"][j]
    )

5476257 10024139 	 ratio = 0.546306969606068
5478625 10026062 	 ratio = 0.5464383723140751
5513079 10041219 	 ratio = 0.5490447922707392
5512130 10043622 	 ratio = 0.5488189420111589
5479048 10018717 	 ratio = 0.546881202453368
5973237 10646859 	 ratio = 0.5610327891070972
5476740 10023782 	 ratio = 0.5463746118979842
5474530 10018082 	 ratio = 0.546464882200006


In [93]:
np.argsort(bs16["config_runtime"][[pair[0] for pair in similar_pairs]]), np.argsort(bs32["config_runtime"][[pair[1] for pair in similar_pairs]])


(array([7, 0, 6, 1, 4, 3, 2, 5]), array([7, 4, 6, 0, 1, 2, 3, 5]))

In [114]:
# find those files has same graph architecture for each test file 
import os
import numpy
from tqdm import tqdm

for source in ["xla", "nlp"]:
    for search in ["random", "default"]:
        # find architecture of test set file based on edge_index
        train_folder = f"/home/thanh/google_fast_or_slow/data/npz_all/npz/layout/{source}_compressed/{search}/train"
        val_folder = f"/home/thanh/google_fast_or_slow/data/npz_all/npz/layout/{source}_compressed/{search}/valid"
        test_folder = f"/home/thanh/google_fast_or_slow/data/npz_all/npz/layout/{source}_compressed/{search}/test"

        train_files = [os.path.join(train_folder, f) for f in os.listdir(train_folder)]
        val_files = [os.path.join(val_folder, f) for f in os.listdir(val_folder)]
        test_files = [os.path.join(test_folder, f) for f in os.listdir(test_folder)]

        # create a dict of filename: edge_index
        train_files_dict = {}
        for f in tqdm(train_files + val_files):
            train_files_dict[os.path.basename(f)] = numpy.load(os.path.join(train_folder, f))["edge_index"]

        # find test files that have the same edge_index as train files
        test_files_dict = {}
        for f in tqdm(test_files):
            edge_index = numpy.load(os.path.join(test_folder, f))["edge_index"]
            for k, v in train_files_dict.items():
                if edge_index.shape[0] == v.shape[0] and (edge_index == v).all():
                    if os.path.basename(f) not in test_files_dict:
                        test_files_dict[os.path.basename(f)] = []
                    test_files_dict[os.path.basename(f)].append(k)

        # for file, list_similar in test_files_dict.items():
        #     print(os.path.basename(file), len(list_similar))

        print("Source:", source, "Search:", search)
        print("count/total", len(test_files_dict), len(test_files))

        for k, v in test_files_dict.items():
            print(k, v)
        
        print("-----------" * 10)

100%|██████████| 76/76 [00:00<00:00, 1601.95it/s]
100%|██████████| 8/8 [00:00<00:00, 1301.57it/s]


Source: xla Search: random
count/total 5 8
937ee0eb0d5d6151b7b8252933b5c1c9.npz ['resnet50.2x2.fp32.npz']
5335ed13823b0a518ee3c79ba4425f34.npz ['efficientnet_b7_eval_batch_1.npz']
db59a991b7c607634f13570d52ce885f.npz ['resnet_v1_50_official_batch_128_f32.npz']
05ae41e26dd3c4c06390371a0423233c.npz ['efficientnet_b7_eval_batch_1.npz']
fbaa8bb6a1aed9988281085c91065c05.npz ['inference_mlperf_ssd_1200_batch_128.npz']
--------------------------------------------------------------------------------------------------------------


100%|██████████| 68/68 [00:00<00:00, 1675.32it/s]
100%|██████████| 8/8 [00:00<00:00, 1366.78it/s]


Source: xla Search: default
count/total 5 8
937ee0eb0d5d6151b7b8252933b5c1c9.npz ['resnet50.2x2.fp32.npz']
5335ed13823b0a518ee3c79ba4425f34.npz ['efficientnet_b7_eval_batch_1.npz']
db59a991b7c607634f13570d52ce885f.npz ['resnet_v1_50_official_batch_128_f32.npz']
05ae41e26dd3c4c06390371a0423233c.npz ['efficientnet_b7_eval_batch_1.npz']
fbaa8bb6a1aed9988281085c91065c05.npz ['inference_mlperf_ssd_1200_batch_128.npz']
--------------------------------------------------------------------------------------------------------------


100%|██████████| 227/227 [00:00<00:00, 1897.61it/s]
100%|██████████| 17/17 [00:00<00:00, 1451.58it/s]


Source: nlp Search: random
count/total 17 17
d15316c12eefdef1ba549eb433797f77.npz ['small_bert_bert_en_uncased_L-8_H-512_A-8_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-8_H-256_A-4_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-8_H-128_A-2_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-8_H-768_A-12_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-8_H-256_A-4_batch_size_64_test.npz', 'small_bert_bert_en_uncased_L-8_H-768_A-12_batch_size_64_test.npz', 'small_bert_bert_en_uncased_L-8_H-768_A-12_batch_size_16_test.npz', 'small_bert_bert_en_uncased_L-8_H-128_A-2_batch_size_16_test.npz', 'small_bert_bert_en_uncased_L-8_H-512_A-8_batch_size_16_test.npz']
7f6284ebe027b1e9a3850fc703858a59.npz ['experts_wiki_books_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-12_H-512_A-8_batch_size_64_test.npz', 'small_bert_bert_en_uncased_L-12_H-512_A-8_batch_size_16_test.npz', 'electra_base_batch_size_32_test.npz', 'experts_pubmed_batch_size_64_test.npz', 'small_bert_

100%|██████████| 218/218 [00:00<00:00, 1874.89it/s]
100%|██████████| 17/17 [00:00<00:00, 1413.29it/s]

Source: nlp Search: default
count/total 17 17
d15316c12eefdef1ba549eb433797f77.npz ['small_bert_bert_en_uncased_L-8_H-512_A-8_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-8_H-256_A-4_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-8_H-128_A-2_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-8_H-768_A-12_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-8_H-256_A-4_batch_size_64_test.npz', 'small_bert_bert_en_uncased_L-8_H-768_A-12_batch_size_64_test.npz', 'small_bert_bert_en_uncased_L-8_H-768_A-12_batch_size_16_test.npz', 'small_bert_bert_en_uncased_L-8_H-512_A-8_batch_size_16_test.npz']
7f6284ebe027b1e9a3850fc703858a59.npz ['experts_wiki_books_batch_size_32_test.npz', 'small_bert_bert_en_uncased_L-12_H-512_A-8_batch_size_64_test.npz', 'small_bert_bert_en_uncased_L-12_H-512_A-8_batch_size_16_test.npz', 'electra_base_batch_size_32_test.npz', 'experts_pubmed_batch_size_64_test.npz', 'small_bert_bert_en_uncased_L-12_H-768_A-12_batch_size_32_test.npz', 'bert_en_


