In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
def parse_output_file(file_path):
    with open(file=file_path) as f:
        lines = f.readlines()
        f.close()
    passed_header = False
    times, names = [], []
    for line in lines:
        l = line.split()
        if l:
            if l[0] == "ncalls":
                passed_header = True
            else:
                if passed_header:
                    times.append(float(l[3]))
                    name = ""
                    name_list = l[5:]
                    for i, name_part in enumerate(name_list):
                        name += name_part
                        if not i == len(name_list) - 1:
                            name += " "
                    names.append(name)
    # create DataFrame
    data = pd.DataFrame({
        "percentage (of {} sec)".format(times[0]): 100 * np.array(times) / times[0], 
        "module / function": names
    })
    return data

In [3]:
def get_avg_data(directory):
    data_list = []
    for filename in os.listdir(directory):
        if filename.startswith("output_") and filename.endswith(".txt"):
            if filename == "output_sdf_master.txt":
                pass
            else:
                f = os.path.join(directory, filename)
                data_list.append(parse_output_file(f))

    # calculate averages
    times, names = [], []
    for name in data_list[0]['module / function'].values:
        names.append(name)
        t, count = 0.0, 0
        for data in data_list:
            d = data.loc[data['module / function'] == name]
            if list(d[d.columns[0]].values):
                t += float(d[d.columns[0]].values[0])
                count += 1
        times.append(t / float(count))
        
    t_total = 0.0
    for data in data_list:
        t_total += float(data.columns[0].split()[-2])
    t_total = t_total / len(data_list)
        
    t_print = ""
    t_hour = t_total / 3600
    if t_hour <= 1.0:
        t_min = 60 * t_hour
        if t_min <= 1.0:
            t_print = "{:.2f} sec".format(60 * t_min)
        else:
            t_print = "{:.2f} min".format(t_min)
    else:
        t_print = "{:.2f} hours".format(t_hour)

    avg_data = pd.DataFrame({
        "% of {}".format(t_print): np.array(times), 
        "module / function": names
    })
    return avg_data

In [4]:
cpu_data = get_avg_data("./2023-05-11_BO/CustomMean_cpu/")
gpu_data = get_avg_data("./2023-05-11_BO/CustomMean_gpu/")

In [5]:
data = pd.concat([cpu_data, gpu_data], axis=1)
data.columns=[['CPU','CPU','GPU','GPU'], list(data.columns)]

In [6]:
data[:50]

Unnamed: 0_level_0,CPU,CPU,GPU,GPU
Unnamed: 0_level_1,% of 3.45 hours,module / function,% of 1.48 hours,module / function
0,100.0,{built-in method builtins.exec},100.0,{built-in method builtins.exec}
1,100.0,run_bo_agent.py:1(<module>),99.999992,run_bo_agent.py:1(<module>)
2,98.809184,bo_agent.py:157(run),98.826984,bo_agent.py:166(run)
3,73.851022,timeout.py:17(minimize_with_timeout),85.4029,timeout.py:17(minimize_with_timeout)
4,73.85084,_minimize.py:45(minimize),85.402598,_minimize.py:45(minimize)
5,73.848391,_lbfgsb_py.py:210(_minimize_lbfgsb),85.394634,_lbfgsb_py.py:210(_minimize_lbfgsb)
6,73.339233,_differentiable_functions.py:282(fun_and_grad),84.510299,_optimize.py:67(_compute_if_needed)
7,73.273978,_optimize.py:67(_compute_if_needed),84.475023,_differentiable_functions.py:249(_update_fun)
8,73.23681,_differentiable_functions.py:249(_update_fun),84.46665,_differentiable_functions.py:154(update_fun)
9,73.230035,_differentiable_functions.py:154(update_fun),84.459647,_differentiable_functions.py:132(fun_wrapped)
