In [39]:
# %%
import pandas as pd, json, re, os, natsort
import matplotlib.pyplot as plt, numpy as np
import seaborn as sns
import random
sns.set()
sns.color_palette("flare")
# %%

In [40]:

# print Source Function / Function / Call Stack column without truncation
pd.set_option('display.max_colwidth', None)
# display all columns
pd.set_option('display.max_columns', None)

# %%
mapping_file = './mapping_funcs.json'
uarch_file ='uarch.csv'



# load a json file
with open(mapping_file) as f:
    data = json.load(f)

cpp_funcs = set()

for py_func in data['op_to_func']:
    for cpp_func in data['op_to_func'][py_func]:
        cpp_funcs.add(cpp_func.split('|')[0])
interested_functions = list(cpp_funcs)


In [41]:

def plot_stacked_bar_chart(uarch_file):
    combined_df = pd.DataFrame()

    if not uarch_file.endswith(".csv"):
        print("File is not a csv ", uarch_file)
        return

    #  read csv separated by tab
    df = pd.read_csv(uarch_file, sep='\t')

    # %%
    # remove trailing "s" in 'CPU Time' column and cast the column to float
    df['CPU Time'] = df['CPU Time'].str.rstrip('s').astype(float)
    # create a new column called "CPU Time %" from "CPU Time" column
    df['CPU Time %'] = df['CPU Time'] / df['CPU Time'].sum() * 100

    # %%
    # sort by column 'CPU Time' and reset index
    df = df.sort_values(by=['CPU Time'], ascending=False).reset_index(drop=True)

    # %%
    #  find index of interested functions in the dataframe in "Source Function / Function / Call Stack" column
    indices = {}
    empty_indices = []
    print('uarch_file: ', uarch_file)
    for func in interested_functions:
        # escape special characters
        func_ = re.escape(func)
        indices_for_func = df[df["Source Function / Function / Call Stack"].str.contains(func_)].index.values
        # if empty, add to empty_indices
        if len(indices_for_func) == 0:
            empty_indices.append(func)
        else:
            indices[func] = indices_for_func
    print("C/C++ functions not found in dataframe:")
    print(empty_indices)
    print("C/C++ functions (indices) found in dataframe:")    
    for func in indices:
        print("Index:",indices[func],"Function: ", func)
    print('\n\n')

    # %%
    # find above functions in the dataframe and map the function name to the one in interested_functions

    # for function in interested_functions:
    #     df.loc[df['Source Function / Function / Call Stack'].str.contains(function), 'Source Function / Function / Call Stack'] = function


    # %%
    # combine all the interested functions' row into a dataframe
    df2 = pd.DataFrame()
    for func in indices:
        df2 = pd.concat([df2,df.iloc[indices[func]]])

    # %%
    # sort by 'CPU Time' column and reset index
    df2 = df2.sort_values(by=['CPU Time %'], ascending=False).reset_index(drop=True)
    # rename 'CPU Time' column to 'CPU Time (s)'
    df2 = df2.rename(columns={"CPU Time": "CPU Time (s)"})
    remove_cols = [
        "Source File",
        "Start Address",
        "Module",
        "Average CPU Frequency",
        # "Clockticks",
        "Instructions Retired",
        "CPI Rate",
        "Function (Full)"
    ] 
    # remove columns
    df2 = df2.drop(columns=remove_cols)

    percentage_symbol_cols = [
        'Retiring',
        'Front-End Bound', 
        'Bad Speculation', 
        'L1 Bound', 
        'L2 Bound',
        'L3 Bound', 
        'Memory Bandwidth', 
        'Local Memory', 
        'Remote Memory',
        'Remote Cache', 
        'Store Bound', 
        'Core Bound',
        ]
    
    # remove the % symbol and cast the column to float
    for col in percentage_symbol_cols:
        df2[col] = df2[col].str.rstrip('%').astype(float)

    # replace 'Clockticks' ',' with '' and cast the column to int
    df2['Clockticks'] = df2['Clockticks'].str.replace(',', '').astype(int)
    
    # multiply each column with 'CPU Time (s)' column in percentage_symbol_cols
    for col in percentage_symbol_cols:
        # option 1
        # df2[col] = df2[col] * df2['CPU Time (s)'] / 100
        # option 2 - more realistic because of https://github.com/intel/perfmon/blob/main/BDX/metrics/broadwellx_metrics.json
        df2[col] = df2[col] * df2['Clockticks']

    # remove 'Clockticks' column
    df2 = df2.drop(columns=['Clockticks'])
    # %%
    # print sum of 'CPU Time (s)' column
    # print("Total CPU Time (s): ", df['CPU Time'].sum())

    # %%
    # set index to 'Source Function / Function / Call Stack' column
    df2 = df2.set_index('Source Function / Function / Call Stack')

    combined_df = df2

    # reset index
    combined_df = combined_df.reset_index()
    # # print(combined_df.columns)
    # # print(combined_df.head(len(combined_df)))
    # # rename column 'Source Function / Function / Call Stack' to 'Function' column
    combined_df = combined_df.rename(columns={"Source Function / Function / Call Stack": "Function"})
    # # %%
    func_rename = {
        "c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::TensorIteratorBase::loop_2d_from_1d<at::native::(anonymous namespace)::cpu_index_kernel<float, at::native::(anonymous namespace)::index_kernel(at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(char*char*, long)#1}>(void, at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::(anonymous namespace)::index_kernel(at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(char*char*, long)#1} const&, bool)::{lambda(char**long const*, long)#1}>(, signed char, at::native::(anonymous namespace)::cpu_index_kernel<float, at::native::(anonymous namespace)::index_kernel(at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(char*char*, long)#1}>(void, at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::(anonymous namespace)::index_kernel(at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(char*char*, long)#1} const&, bool)::{lambda(char**long const*, long)#1} const&)::{lambda(char**long const*, long, long)#1}>"\
            : "at::native::index_kernel",
        "c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::TensorIteratorBase::loop_2d_from_1d<at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::mul_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(floatfloat)#1}, at::native::(anonymous namespace)::mul_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::mul_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(floatfloat)#1}&&, at::native::(anonymous namespace)::mul_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}&&, long)::{lambda(char**long const*, long)#2}>(, signed char, at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::mul_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(floatfloat)#1}, at::native::(anonymous namespace)::mul_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::mul_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(floatfloat)#1}&&, at::native::(anonymous namespace)::mul_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}&&, long)::{lambda(char**long const*, long)#2} const&)::{lambda(char**long const*, long, long)#1}>"\
            : "at::native::mul_kernel",
        "c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::TensorIteratorBase::loop_2d_from_1d<at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::add_kernel(at::TensorIteratorBase&, c10::Scalar const&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(floatfloat)#1}, at::native::(anonymous namespace)::add_kernel(at::TensorIteratorBase&, c10::Scalar const&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::add_kernel(at::TensorIteratorBase&, c10::Scalar const&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(floatfloat)#1}&&, at::native::(anonymous namespace)::add_kernel(at::TensorIteratorBase&, c10::Scalar const&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}&&, long)::{lambda(char**long const*, long)#2}>(, signed char, at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::add_kernel(at::TensorIteratorBase&, c10::Scalar const&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(floatfloat)#1}, at::native::(anonymous namespace)::add_kernel(at::TensorIteratorBase&, c10::Scalar const&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::add_kernel(at::TensorIteratorBase&, c10::Scalar const&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(floatfloat)#1}&&, at::native::(anonymous namespace)::add_kernel(at::TensorIteratorBase&, c10::Scalar const&)::{lambda()#2}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}&&, long)::{lambda(char**long const*, long)#2} const&)::{lambda(char**long const*, long, long)#1}>"\
            : "at::native::add_kernel",
        "c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::TensorIteratorBase::loop_2d_from_1d<at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(float)#1}, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(float)#1}&&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>)#2}&&, long)::{lambda(char**long const*, long)#2}>(, signed char, at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(float)#1}, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(float)#1}&&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>)#2}&&, long)::{lambda(char**long const*, long)#2} const&)::{lambda(char**long const*, long, long)#1}>"\
            : "at::native::copy_kernel(float)",
        "c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::TensorIteratorBase::loop_2d_from_1d<at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::fill_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#1}, at::native::(anonymous namespace)::fill_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::fill_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#1}&&, at::native::(anonymous namespace)::fill_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#2}&&, long)::{lambda(char**long const*, long)#2}>(, signed char, at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::fill_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#1}, at::native::(anonymous namespace)::fill_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::fill_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#1}&&, at::native::(anonymous namespace)::fill_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#2}&&, long)::{lambda(char**long const*, long)#2} const&)::{lambda(char**long const*, long, long)#1}>"\
            : "at::native::fill_kernel",
        "c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::TensorIteratorBase::loop_2d_from_1d<at::native::(anonymous namespace)::cpu_kernel<at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#11}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#1}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(unsigned char)#1}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#11}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#1}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(unsigned char)#1}&&, long)::{lambda(char**long const*, long)#1}>(, signed char, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#11}::operator()(void) const::{lambda()#8}::operator()(void) const::{lambda()#1}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(unsigned char)#1} const&)::{lambda(char**long const*, long, long)#1}>"\
            : "at::native::copy_kernel",
        "c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::TensorIteratorBase::loop_2d_from_1d<at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#4}::operator()(void) const::{lambda(floatfloat)#1}, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#4}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#4}::operator()(void) const::{lambda(floatfloat)#1}&&, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#4}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}&&, long)::{lambda(char**long const*, long)#2}>(, signed char, at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#4}::operator()(void) const::{lambda(floatfloat)#1}, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#4}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#4}::operator()(void) const::{lambda(floatfloat)#1}&&, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#4}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<float>at::vec::(anonymous namespace)::Vectorized<float>)#2}&&, long)::{lambda(char**long const*, long)#2} const&)::{lambda(char**long const*, long, long)#1}>"\
            : "at::native::div_true_kernel",
        "c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::TensorIteratorBase::loop_2d_from_1d<at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(unsigned char)#1}, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<unsigned char>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(unsigned char)#1}&&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<unsigned char>)#2}&&, long)::{lambda(char**long const*, long)#2}>(, signed char, at::native::(anonymous namespace)::cpu_kernel_vec<(bool)1, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(unsigned char)#1}, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<unsigned char>)#2}>(void, at::TensorIteratorBase&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(unsigned char)#1}&&, at::native::(anonymous namespace)::copy_kernel(at::TensorIterator&, bool)::{lambda()#6}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(at::vec::(anonymous namespace)::Vectorized<unsigned char>)#2}&&, long)::{lambda(char**long const*, long)#2} const&)::{lambda(char**long const*, long, long)#1}>"\
            : "at::native::copy_kernel(unsigned char)",
    }
    
    

    # replace function names in 'Source Function / Function / Call Stack' column

    combined_df['Function'] = combined_df['Function'].replace(func_rename)

    combined_df['config'] = 4

    # save combined_df to csv
    combined_df.to_csv('./combined_uarch.csv')

    # %%

    # plt.rc('xtick', labelsize=150)    # fontsize of the tick labels
    # plt.rc('ytick', labelsize=150)    # fontsize of the tick labels
    # plt.rc('legend', fontsize=80)    # legend fontsize
    # # increase size of axis label size
    # plt.rcParams['axes.labelsize'] = 150
    # # increase space between axis label and axis
    # plt.rcParams['axes.labelpad'] = 60
    # # increase title size
    # plt.rcParams['axes.titlesize'] = 150
    # # increase space between title and plot
    # plt.rcParams['axes.titlepad'] = 60
    # # increase padding between ticks and their labels
    # plt.rcParams['xtick.major.pad'] = 60
    # plt.rcParams['ytick.major.pad'] = 60

    print(combined_df['Function'].head(len(combined_df)))

    print(combined_df.index)
    print(combined_df.columns)

    # merge duplicate rows in percentage_symbol_cols
    percentage_symbol_cols = [
            'Retiring',
            'Front-End Bound', 
            'Bad Speculation', 
            'L1 Bound', 
            'L2 Bound',
            'L3 Bound', 
            'Memory Bandwidth', 
            'Local Memory', 
            'Remote Memory',
            'Remote Cache', 
            'Store Bound', 
            'Core Bound',
            ]
    
    for col in percentage_symbol_cols:
        combined_df[col] = combined_df[col] / combined_df['CPU Time (s)'] * 100
    
    # remove duplicate rows across all columns
    combined_df = combined_df.drop_duplicates()
        

    print('*************************')

    combined_df.to_csv('./drop_dups_uarch.csv')

    # escape udnerscore escape characters in 'Function' column
    combined_df['Function'] = combined_df['Function'].str.replace('_', '\_')

    for col in combined_df.columns:
        if col == 'config' or col == 'Function':
            continue
        print(col)
        # pivot table
        df_pivot = combined_df.pivot(index='config',columns='Function', values=col)
        # sort the index using natsort
        df_pivot = df_pivot.reindex(natsort.natsorted(df_pivot.index))
        

        ax = df_pivot.plot(kind='bar', stacked=True, figsize= (15,10),
                           colormap='jet')
        # for c in ax.containers:
        #     ax.bar_label(c, label_type='center')
        # add x axis label
        plt.xlabel('Dataloaders')
        # rotate x axis labels
        plt.xticks(rotation=0)
        # add y axis label
        plt.ylabel(col)
        # add title to legend
        plt.title(f"Hardware metric breakdown by C/C++ functions")    
        # reverse legend order
        handles, labels = plt.gca().get_legend_handles_labels()
        plt.legend(handles[::-1], labels[::-1],loc='center left', bbox_to_anchor=(1.0, 0.5), ncol=2)
       
        # show the plot
        # plt.show()
        # make sure the figure fits in the saved image
        plt.tight_layout()

        plt.savefig(f'./c_cpp_funcs_metric_figs/{col}.png')
        plt.close()
        # exit(1)


In [42]:

plot_stacked_bar_chart(uarch_file)

uarch_file:  uarch.csv
C/C++ functions not found in dataframe:
['drop_gil', 'bilinear_filter', 'gomp_team_barrier_wait_end', '_Py_BuildValue_SizeT', 'gomp_team_end', '_PyBytes_Resize', 'gomp_finish_task', 'PyObject_IsTrue', 'jpeg_idct_16x16', 'method_vectorcall_O', 'method_vectorcall_VARARGS', 'method_vectorcall_FASTCALL', 'PyObject_IsInstance', '_decode', 'process_data_simple_main', '_ZN2at8internal15invoke_parallelIZNS_18TensorIteratorBase8for_eachEN3c1012function_refIFvPPcPKlllEEElEUlllE_EEvlllRKT_._omp_fn.0', 'munmap', 'PyBytes_FromStringAndSize', 'bytes_concat', 'jpeg_read_scanlines', 'normalize', '__libc_calloc', 'ImagingJpegDecode', '__posix_memalign', 'gomp_simple_barrier_wait', 'take_gil', 'tobytes', '[OpenMP worker]', 'at::internal::set_thread_num', 'method_vectorcall']
C/C++ functions (indices) found in dataframe:
Index: [10] Function:  _copy
Index: [64] Function:  decompress_onepass
Index: [63] Function:  ImagingFlipLeftRight
Index: [8] Function:  c10::function_ref<void (ch