In [1]:
import pandas as pd
import glob

In [12]:
from configs import config_generator

config_set_list = [
    config_generator.generate_resnet_mlb_configs(True),
    config_generator.generate_retinanet_mlb_configs(True),
    config_generator.generate_maskrcnn_mlb_configs(True),
    config_generator.generate_bert_mlb_configs(True)
]

In [13]:
import re

def block_repeat(filepath):
    match = re.search(r'(\d+)(?=\.\w+$)', filepath)
    if match:
        return int(match.group(1))
    return None

def extract_l_hs(filename):
    pattern = r'l_(\d+)_hs_(\d+)\.csv'
    match = re.search(pattern, filename)
    if match:
        l = int(match.group(1))
        hs = int(match.group(2))
        return l, hs
    else:
        return None

In [15]:
df_all_list = []
for config_set in config_set_list:

    exp_dir_path = f'./intensity-experiments/{config_set[0].experiment_dir}/'
    result_paths = glob.glob(exp_dir_path+'*')
    model_arch = config_set[0].model_arch
    is_train = 'train' if config_set[0].is_train else 'inference'

    df_list = []
    for path in result_paths:
        curr_df = pd.read_csv(path)
        if model_arch == 'bert':
            extracted = extract_l_hs(path)
            if extracted:
                l, hs = extracted
                curr_df['num_layers'] = l
                curr_df['hidden_size'] = hs
        else:
            extracted = block_repeat(path)
            if extracted:
                curr_df['block_repeat'] = extracted
        df_list.append(curr_df)

    if model_arch == 'bert':
        results_df = pd.concat(df_list, ignore_index=True).sort_values(by=['hidden_size','num_layers'])
        results_df.to_csv(f'{exp_dir_path}l_hs_aggregated_{is_train}.csv', index=False)
    else:
        results_df = pd.concat(df_list, ignore_index=True).sort_values(by='block_repeat')
        results_df.to_csv(f'{exp_dir_path}block_repeat_aggregated_{is_train}.csv', index=False)
    
    df_all_list.append(results_df)

df_all = pd.concat(df_all_list, ignore_index=True).sort_values(by=['block_repeat', 'hidden_size', 'num_layers'])
df_all.to_csv(f'./intensity-experiments/{is_train}_aggregated_ver1.0.csv', index=False)


In [9]:
results_df

Unnamed: 0,flops,imem,pmem,ai_estimate,num_layers,hidden_size
12,7621672000.0,394485900.0,178349200.0,13.305176,12,360
0,7974090000.0,416862500.0,189295400.0,13.155137,13,360
2,10526550000.0,477597000.0,249226000.0,14.482969,12,480
14,11025400000.0,505430700.0,264738400.0,14.315556,13,480
11,13617600000.0,566468000.0,325862800.0,15.260708,12,600
3,14277620000.0,600219700.0,346402300.0,15.082703,13,600
1,16894810000.0,661099000.0,408259600.0,15.799011,12,720
13,17730740000.0,701229400.0,434286900.0,15.614698,13,720
4,21384780000.0,808460000.0,528392300.0,15.996366,13,840
21,20358180000.0,761490100.0,496416400.0,16.184177,12,840
