In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import json

In [3]:
csv_dir="./logs"
program_file_dir="."

In [4]:
# get all program files that end with .py
program_files = [f for f in os.listdir(program_file_dir) if f.endswith(".py")]

In [5]:
program = program_files[0].split(".")[0]
same_program_csv_dir=os.path.join(csv_dir,program)
# samples = total number of runs for each program
samples = len(os.listdir(same_program_csv_dir))

In [6]:
op_to_func = {}

# loop through csv files in csv_dir
for program_ in program_files:
    program = program_.split(".")[0]
    same_program_csv_dir=os.path.join(csv_dir,program)
    common_funcs_per_op = set()
    c_func_to_count = {}
    print(f"Reading {same_program_csv_dir}")
    for csv_file in os.listdir(same_program_csv_dir):
        # read csv file
        f = open(os.path.join(same_program_csv_dir,csv_file))
        lines = []
        start_append = False
        # read lines until "HOTTEST FUNCTIONS (Sort Event - CPU_TIME)"
        for line in f.readlines():
            if "HOTTEST FUNCTIONS (Sort Event - CPU_TIME)" in line:
                start_append = True
            elif start_append:
                if line == "\n":
                # if start append is true and line is newline then set start_append to False
                    break
                else:
                    split_line = line.split(',')
                    if len(split_line) > 3:
                            split_line = [",".join(split_line[:-2])] + split_line[-2:]
                    lines.append(split_line)
            
        df = pd.DataFrame(lines[1:], columns=lines[0])

        # remove '"' from df['FUNCTION'].astype(str)
        df['FUNCTION'] = df['FUNCTION'].astype(str).str.strip('"')
        # remove '\n' from df['Module\n'].astype(str)
        df['Module\n'] = df['Module\n'].astype(str).str.strip('\n').str.strip('"')
        # Split 'Module\n' by '/' and take the last element
        df['Module\n'] = df['Module\n'].str.split('/').str[-1]

        # merge two columns with "|" as separator
        df["full_func"] = df['FUNCTION'].astype(str) + "|" + df['Module\n'].astype(str).str.strip('\n')
        column_values = set(df["full_func"].values)        
        for func in df["full_func"].values:
            # if function is not in the dictionary, add it
            if func not in c_func_to_count:
                c_func_to_count[func] = 1
            # else increment the count
            else:
                c_func_to_count[func] += 1
    for func in c_func_to_count:
        # i.e. present in more than half of the samples
        # if c_func_to_count[func] > samples//2:
        common_funcs_per_op.add(func)
    op_to_func[program] = list(common_funcs_per_op)
    print(f"Common functions for {program} are {common_funcs_per_op}")

Reading ./logs/Normalize
Common functions for Normalize are {'void c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::native::AVX2::VectorizedLoop2d<at::native::AVX2::direct_copy_kernel(at::TensorIteratorBase&)::{lambda()#4}::operator()() const::{lambda()#14}::operator()() const::{lambda(float)#1}, at::native::AVX2::direct_copy_kernel(at::TensorIteratorBase&)::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const::{lambda(at::vec::AVX2::Vectorized<float>)#2}> >(long, char**, long const*, long, long)|libtorch_cpu.so', 'libgomp-a34b3233.so.1!0x00018b10|libgomp-a34b3233.so.1', 'void c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::native::AVX2::VectorizedLoop2d<at::native::AVX2::direct_copy_kernel(at::TensorIteratorBase&)::{lambda()#3}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::native::AVX2::direct_copy_kernel(at::TensorIteratorBase&)::{lambda()#3}::operator()() const::{lambda()#3}::operator(

In [8]:
op_to_func

{'RandomResizedCrop': ['ImagingResampleHorizontal_8bpc|_imaging.cpython-39-x86_64-linux-gnu.so',
  'deduce_unreachable|python3.9',
  '_PyEval_EvalFrameDefault|python3.9',
  'ImagingResampleVertical_8bpc|_imaging.cpython-39-x86_64-linux-gnu.so',
  'ImagingResampleVertical_8bpc|_imaging.cpython-311-x86_64-linux-gnu.so',
  'ImagingResampleHorizontal_8bpc|_imaging.cpython-311-x86_64-linux-gnu.so',
  '__memcpy_avx_unaligned_erms|libc-2.31.so',
  'ImagingPaste|_imaging.cpython-311-x86_64-linux-gnu.so'],
 'Loader': ['sep_upsample|libjpeg.so.9.5.0',
  '_copy|_imaging.cpython-39-x86_64-linux-gnu.so',
  '_PyObject_GetMethod|python3.9',
  '__memset_avx2_unaligned_erms|libc-2.31.so',
  'jpeg_idct_16x16|libjpeg.so.9.5.0',
  'ImagingUnpackRGB|_imaging.cpython-39-x86_64-linux-gnu.so',
  'jpeg_idct_islow|libjpeg.so.9.5.0',
  'decompress_onepass|libjpeg.so.9.5.0',
  'dictiter_iternextitem|python3.9',
  'jpeg_fill_bit_buffer|libjpeg.so.9.5.0',
  'ycc_rgb_convert|libjpeg.so.9.5.0',
  'deduce_unreachable|

In [8]:
def find_common_cpp_func(op_to_func):
    # find functions found in other ops
    common_funcs_to_op = {}
    # loop through all programs
    for python_func in op_to_func.keys():
        # loop through all functions in the program
        for func in op_to_func[python_func]:
            # loop through all other programs
            for other_python_func in op_to_func.keys():
                # skip if the program is the same as the current program
                if other_python_func == python_func:
                    continue
                # if the function is found in other programs
                if func in op_to_func[other_python_func]:
                    # add the function to the set of common functions
                    if func not in common_funcs_to_op:
                        common_funcs_to_op[func] = [python_func,other_python_func]
                    elif python_func not in common_funcs_to_op[func] or other_python_func not in common_funcs_to_op[func]:
                        common_funcs_to_op[func].append(other_python_func)
    return common_funcs_to_op

In [9]:
common_funcs_to_op = find_common_cpp_func(op_to_func)
print("C/C++ functions that can be found in multiple python operations:")
for func in common_funcs_to_op.keys():
    print(f"\t{func}\n\t\t->\t{common_funcs_to_op[func]}")

C/C++ functions that can be found in multiple python operations:
	void c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::native::AVX2::VectorizedLoop2d<at::native::AVX2::direct_copy_kernel(at::TensorIteratorBase&)::{lambda()#4}::operator()() const::{lambda()#14}::operator()() const::{lambda(float)#1}, at::native::AVX2::direct_copy_kernel(at::TensorIteratorBase&)::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const::{lambda(at::vec::AVX2::Vectorized<float>)#2}> >(long, char**, long const*, long, long)|libtorch_cpu.so
		->	['Normalize', 'Collation']
	libgomp-a34b3233.so.1!0x00018b10|libgomp-a34b3233.so.1
		->	['Normalize', 'ToTensor', 'Collation']
	void c10::function_ref<void (char**, long const*, long, long)>::callback_fn<at::native::AVX2::VectorizedLoop2d<at::native::(anonymous namespace)::div_true_kernel(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float, float)#1}, at::native::(anonymous 

In [10]:
json_string = {'op_to_func':op_to_func,'common_funcs_to_op':common_funcs_to_op}
with open('mapping_funcs.json', 'w') as outfile:
    json.dump(json_string, outfile,indent=4)