In [129]:
import pandas as pd, json, re
# print Function / Call Stack column without truncation
pd.set_option('display.max_colwidth', None)

In [130]:
mapping_file = '/mydata/rbachkaniwala3/code/rajveerb-ml-pipeline-benchmark/code/image_classification/analysis/hw_event_analysis/mapping_funcs.json'
uarch_dir ='/mydata/rbachkaniwala3/code/rajveerb-ml-pipeline-benchmark/code/image_classification/analysis/hw_event_analysis/vtune_uarch_csvs'

In [131]:
# load a json file
with open(mapping_file) as f:
    data = json.load(f)

cpp_funcs = set()

for py_func in data['op_to_func']:
    for cpp_func in data['op_to_func'][py_func]:
        cpp_funcs.add(cpp_func.split('|')[0])
interested_functions = list(cpp_funcs)

# previous interested_functions =

# ["__memmove_avx_unaligned_erms",\
# "_int_free",\
# "ImagingResampleHorizontal_8bpc",\
# "ImagingResampleVertical_8bpc",\
# "ImagingFlipLeftRight",\
# "ImagingPackRGB",\
# "munmap",\
# "copy_kernel",\
# "div_true_kernel",\
# "direct_copy_kernel",\
# "add_kernel",\
# "decompress_onepass",\
# "jpeg_idct_islow",\
# "jpeg_idct_16x16",\
# "ycc_rgb_convert",\
# "decode_mcu",\
# "ImagingUnpackRGB",\
# "__memset_avx2_unaligned_erms",\
# "__libc_calloc",\
#         ]

In [132]:
#  read csv separated by tab
df = pd.read_csv('/mydata/rbachkaniwala3/code/rajveerb-ml-pipeline-benchmark/uarch.csv', sep='\t')

In [133]:
# remove trailing "s" in 'CPU Time' column and cast the column to float
df['CPU Time'] = df['CPU Time'].str.rstrip('s').astype(float)
# create a new column called "CPU Time %" from "CPU Time" column
df['CPU Time %'] = df['CPU Time'] / df['CPU Time'].sum() * 100

In [134]:
# sort by column 'CPU Time' and reset index
df = df.sort_values(by=['CPU Time'], ascending=False).reset_index(drop=True)

In [135]:
#  find index of interested functions in the dataframe in "Function / Call Stack" column
indices = {}
empty_indices = []
for func in interested_functions:
    # escape special characters
    func = re.escape(func)
    indices_for_func = df[df["Function / Call Stack"].str.contains(func)].index.values
    # if empty, add to empty_indices
    if len(indices_for_func) == 0:
        empty_indices.append(func)
    else:
        indices[func] = indices_for_func
print("C/C++ functions not found in dataframe:")
print(empty_indices)
print('\n\n')
print("C/C++ functions (indices) found in dataframe:")
for func in indices:
    print("Index:",indices[func],"Function: ", func)

C/C++ functions not found in dataframe:
['__kmp_join_barrier', 'GOMP_parallel', 'munmap', '__kmp_fork_barrier', 'gomp_team_barrier_wait_end']



C/C++ functions (indices) found in dataframe:
Index: [28] Function:  ImagingFlipLeftRight
Index: [15] Function:  c10::function_ref<void\ \(char\*\*,\ long\ const\*,\ long,\ long\)>::callback_fn<at::native::AVX2::VectorizedLoop2d<at::native::AVX2::direct_copy_kernel\(at::TensorIteratorBase\&\)::\{lambda\(\)\#3\}::operator\(\)\(void\)\ const::\{lambda\(\)\#1\}::operator\(\)\(void\)\ const::\{lambda\(unsigned\ char\)\#1\},\ at::native::AVX2::direct_copy_kernel\(at::TensorIteratorBase\&\)::\{lambda\(\)\#3\}::operator\(\)\(void\)\ const::\{lambda\(\)\#1\}::operator\(\)\(void\)\ const::\{lambda\(at::vec::AVX2::Vectorized<unsigned\ char>\)\#2\}>>
Index: [6] Function:  jpeg_idct_16x16
Index: [0] Function:  decode_mcu
Index: [9] Function:  at::native::AVX2::vectorized_loop<at::native::\(anonymous\ namespace\)::div_true_kernel\(at::TensorIteratorBase\&\

In [136]:
# find above functions in the dataframe and map the function name to the one in interested_functions

# for function in interested_functions:
#     df.loc[df['Function / Call Stack'].str.contains(function), 'Function / Call Stack'] = function




In [137]:
# combine all the interested functions' row into a dataframe
df2 = pd.DataFrame()
for func in indices:
    df2 = pd.concat([df2,df.iloc[indices[func]]])

In [138]:
# sort by 'CPU Time' column and reset index
df2 = df2.sort_values(by=['CPU Time %'], ascending=False).reset_index(drop=True)
# rename 'CPU Time' column to 'CPU Time (s)'
df2 = df2.rename(columns={"CPU Time": "CPU Time (s)"})
remove_cols = [
    "Source File",
    "Start Address",
    "Module",
    "Average CPU Frequency",
    "Clockticks",
    "Instructions Retired",
    "CPI Rate",
    "Function (Full)"
] 
# remove columns
df2 = df2.drop(columns=remove_cols)

In [139]:
# print sum of 'CPU Time (s)' column
print("Total CPU Time (s): ", df['CPU Time'].sum())

Total CPU Time (s):  8746.431999999999


In [140]:
df2.head(len(df2))

Unnamed: 0,Function / Call Stack,CPU Time (s),Retiring,Front-End Bound,Bad Speculation,L1 Bound,L2 Bound,L3 Bound,DRAM Bound,Store Bound,Core Bound,CPU Time %
0,decode_mcu,1938.05,41.2%,8.5%,42.7%,17.1%,0.2%,0.1%,0.0%,0.0%,4.8%,22.158178
1,jpeg_idct_islow,1531.883,69.3%,2.6%,5.9%,1.9%,0.0%,0.0%,0.0%,0.2%,20.7%,17.514376
2,ImagingResampleHorizontal_8bpc,672.25,92.6%,1.9%,0.0%,0.7%,0.1%,0.1%,0.1%,0.1%,6.1%,7.685991
3,ycc_rgb_convert,563.211,84.4%,0.4%,0.4%,0.3%,0.1%,0.0%,0.0%,0.1%,13.4%,6.439323
4,jpeg_idct_16x16,241.373,88.6%,1.4%,0.0%,0.5%,0.0%,0.0%,0.0%,0.3%,9.7%,2.759674
5,"c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::native::AVX2::VectorizedLoop2d<at::native::AVX2::direct_copy_kernel(at::TensorIteratorBase&)::{lambda()#3}::operator()(void) const::{lambda()#7}::operator()(void) const::{lambda(float)#1}, at::native::AVX2::direct_copy_kernel(at::TensorIteratorBase&)::{lambda()#3}::operator()(void) const::{lambda()#7}::operator()(void) const::{lambda(at::vec::AVX2::Vectorized<float>)#2}>>",239.608,5.3%,0.4%,0.2%,13.9%,12.7%,14.0%,44.7%,94.2%,4.1%,2.739494
6,__memmove_avx_unaligned_erms,234.228,8.2%,2.1%,0.0%,20.6%,13.5%,27.9%,9.8%,79.8%,8.6%,2.677983
7,"at::native::AVX2::vectorized_loop<at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(floatfloat)#1}&, at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#1}::operator()(void) const::{lambda()#2}::operator()(void) const::{lambda(at::vec::AVX2::Vectorized<float>at::vec::AVX2::Vectorized<float>)#2}&>.isra.0",164.444,13.9%,0.3%,0.0%,22.9%,11.5%,6.6%,0.0%,8.8%,42.4%,1.880127
8,decompress_onepass,150.369,53.3%,5.5%,20.4%,3.2%,0.1%,0.1%,0.0%,0.2%,18.3%,1.719204
9,ImagingUnpackRGB,145.769,85.5%,5.6%,0.0%,0.8%,0.2%,0.2%,0.0%,1.1%,7.8%,1.666611


In [141]:
print(df2['Function (Full)'])

KeyError: 'Function (Full)'

In [1]:
def flatten(S):
    if len(S)==1:
        return [S[0]]
    if isinstance(S[0], list):
        return flatten(S[0]) + flatten(S[1:])
    return S[:1] + flatten(S[1:])

In [4]:
flatten([[[1],2],3])

[1, 2, 3]