In [646]:
import json

In [647]:
perfmon_file = '/mydata/rbachkaniwala3/code/rajveerb-ml-pipeline-benchmark/perfmon_for_bdw.json'


data = json.load(open(perfmon_file))

In [648]:
data = data['Metrics']

In [649]:
columns = [
    'Bad_Speculation',
    'Core_Bound',
    'Frontend_Bound',
    'L1_Bound',
    'L2_Bound',
    'L3_Bound',
    'Local_DRAM',
    'Remote_DRAM',
    'MEM_Bandwidth',
    'Store_Bound'
]

In [650]:
def get_interest_metrics(data, columns):
    result = []
    for element in data:
        if element['MetricName'] in columns:
            result.append(element)
    
    if len(result) != len(columns):
        raise Exception('MetricName not found in columns')
    return result

In [651]:
metric_defs = get_interest_metrics(data, columns)

In [652]:
hw_events = set()

for metric in metric_defs:
    print(metric['MetricName'])
    for event in metric['Events']:
        hw_events.add(event['Name'])

Frontend_Bound
Bad_Speculation
L1_Bound
L2_Bound
L3_Bound
MEM_Bandwidth
Local_DRAM
Remote_DRAM
Store_Bound
Core_Bound


In [653]:
hw_events

{'CPU_CLK_UNHALTED.THREAD',
 'CYCLE_ACTIVITY.STALLS_L1D_MISS',
 'CYCLE_ACTIVITY.STALLS_L2_MISS',
 'CYCLE_ACTIVITY.STALLS_MEM_ANY',
 'CYCLE_ACTIVITY.STALLS_TOTAL',
 'IDQ_UOPS_NOT_DELIVERED.CORE',
 'IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE',
 'INST_RETIRED.ANY',
 'INT_MISC.RECOVERY_CYCLES',
 'MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT',
 'MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM',
 'MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS',
 'MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM',
 'MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM',
 'MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD',
 'MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM',
 'MEM_LOAD_UOPS_RETIRED.HIT_LFB',
 'MEM_LOAD_UOPS_RETIRED.L2_HIT',
 'MEM_LOAD_UOPS_RETIRED.L3_HIT',
 'MEM_LOAD_UOPS_RETIRED.L3_MISS',
 'OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:cmask=4',
 'RESOURCE_STALLS.SB',
 'RS_EVENTS.EMPTY_CYCLES',
 'UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC',
 'UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC',
 'UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC',
 'UOPS_ISSUED.ANY',
 'UOPS_RETIRED.RE

In [654]:
import pandas as pd

In [655]:
sample_csv = '/mydata/vtune_logs/pytorch_vtune_logs/csv/vtune_mem_access_vary_dataloader/b1024_gpu4_dataloader8.csv'

In [656]:
# read as csv
df = pd.read_csv(sample_csv)

In [657]:
hw_columns_in_csv = set()
for column in df.columns:
    if 'Hardware Event Count:' in column:
        hw_columns_in_csv.add(column.split('Hardware Event Count:')[1])

In [658]:
# check if both sets are equal
for hw_event in hw_events:
    if hw_event in hw_columns_in_csv:
        print(hw_event)
    elif hw_event+'_PS' in hw_columns_in_csv:
        print(hw_event)
    else:
        print('\t',hw_event)
        raise Exception('column not found in hw_events')

MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT
RESOURCE_STALLS.SB
RS_EVENTS.EMPTY_CYCLES
UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC
MEM_LOAD_UOPS_RETIRED.L3_HIT
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM
MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS
CYCLE_ACTIVITY.STALLS_L2_MISS
INST_RETIRED.ANY
UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC
UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC
CYCLE_ACTIVITY.STALLS_TOTAL
MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD
MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM
CYCLE_ACTIVITY.STALLS_L1D_MISS
UOPS_ISSUED.ANY
MEM_LOAD_UOPS_RETIRED.L2_HIT
INT_MISC.RECOVERY_CYCLES
CPU_CLK_UNHALTED.THREAD
MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM
MEM_LOAD_UOPS_RETIRED.L3_MISS
IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE
MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM
IDQ_UOPS_NOT_DELIVERED.CORE
OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:cmask=4
CYCLE_ACTIVITY.STALLS_MEM_ANY
MEM_LOAD_UOPS_RETIRED.HIT_LFB
UOPS_RETIRED.RETIRE_SLOTS


In [659]:
# replace columns with 'Hardware Event Count:' in the name with ''
df.columns = [x.split('Hardware Event Count:')[1] if 'Hardware Event Count:' in x else x for x in df.columns]

In [660]:
df.columns

Index(['Source Function', 'INST_RETIRED.ANY', 'CPU_CLK_UNHALTED.THREAD',
       'CPU_CLK_UNHALTED.REF_TSC', 'CYCLE_ACTIVITY.STALLS_L1D_MISS',
       'OFFCORE_RESPONSE:request=ALL_READS:response=LLC_MISS.LOCAL_DRAM',
       'OFFCORE_RESPONSE:request=ALL_READS:response=LLC_MISS.REMOTE_DRAM',
       'CPU_CLK_UNHALTED.REF_XCLK', 'L1D_PEND_MISS.PENDING',
       'OFFCORE_RESPONSE:request=DEMAND_RFO:response=LLC_HIT.HITM_OTHER_CORE',
       'OFFCORE_RESPONSE:request=DEMAND_RFO:response=LLC_MISS.REMOTE_HITM',
       'CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE', 'MEM_UOPS_RETIRED.ALL_LOADS_PS',
       'MEM_UOPS_RETIRED.ALL_STORES_PS', 'CPU_CLK_UNHALTED.THREAD_P',
       'MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4', 'CYCLE_ACTIVITY.STALLS_L2_MISS',
       'CYCLE_ACTIVITY.STALLS_MEM_ANY', 'CYCLE_ACTIVITY.STALLS_TOTAL',
       'DTLB_LOAD_MISSES.STLB_HIT', 'DTLB_LOAD_MISSES.WALK_COMPLETED',
       'DTLB_LOAD_MISSES.WALK_DURATION:cmask=1', 'DTLB_STORE_MISSES.STLB_HIT',
       'DTLB_STORE_MISSES.WALK_COMPLETED',
  

In [661]:
def bad_speculation(df):
    return df['UOPS_ISSUED.ANY'] - df['UOPS_RETIRED.RETIRE_SLOTS'] + (4 * df['INT_MISC.RECOVERY_CYCLES'])

In [662]:
# TODO check correctness
def core_bound(df):
    # "Events": [
    #       {
    #         "Name": "IDQ_UOPS_NOT_DELIVERED.CORE",
    #         "Alias": "a"
    #       },
    #       {
    #         "Name": "CPU_CLK_UNHALTED.THREAD",
    #         "Alias": "c"
    #       },
    #       {
    #         "Name": "UOPS_ISSUED.ANY",
    #         "Alias": "d"
    #       },
    #       {
    #         "Name": "UOPS_RETIRED.RETIRE_SLOTS",
    #         "Alias": "e"
    #       },
    #       {
    #         "Name": "INT_MISC.RECOVERY_CYCLES",
    #         "Alias": "g"
    #       },
    #       {
    #         "Name": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
    #         "Alias": "h"
    #       },
    #       {
    #         "Name": "RESOURCE_STALLS.SB",
    #         "Alias": "i"
    #       },
    #       {
    #         "Name": "CYCLE_ACTIVITY.STALLS_TOTAL",
    #         "Alias": "j"
    #       },
    #       {
    #         "Name": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
    #         "Alias": "k"
    #       },
    #       {
    #         "Name": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
    #         "Alias": "l"
    #       },
    #       {
    #         "Name": "INST_RETIRED.ANY",
    #         "Alias": "m"
    #       },
    #       {
    #         "Name": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
    #         "Alias": "n"
    #       },
    #       {
    #         "Name": "RS_EVENTS.EMPTY_CYCLES",
    #         "Alias": "o"
    #       },
    #       {
    #         "Name": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
    #         "Alias": "p"
    #       }
    #     ],
    #     "Constants": [
    #       {
    #         "Name": "HYPERTHREADING_ON",
    #         "Alias": "smt_on"
    #       },
    #       {
    #         "Name": "THREADS_PER_CORE",
    #         "Alias": "threads"
    #       }
    #     ],
    #     "Formula": "( 1 - ( a + ( d - ( e ) + ( 4 * g ) ) + e ) ) - ( ( ( h + i ) / ( j + k - ( l if ( m  > 1.8 ) else n ) - ( o if ( 4 * p ) > 0.1 else 0 ) + i ) ) * ( 1 - ( a + d - ( e ) + ( 4 * g ) + e ) ) )",
    return (1 - (df['IDQ_UOPS_NOT_DELIVERED.CORE'] + (df['UOPS_ISSUED.ANY'] - (df['UOPS_RETIRED.RETIRE_SLOTS']) + (4 * df['INT_MISC.RECOVERY_CYCLES'])) + df['UOPS_RETIRED.RETIRE_SLOTS'])) - (((df['CYCLE_ACTIVITY.STALLS_MEM_ANY'] + df['RESOURCE_STALLS.SB']) / (df['CYCLE_ACTIVITY.STALLS_TOTAL'] + df['UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC'] - (df['UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC'] if (df['INST_RETIRED.ANY']  > 1.8) else df['UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC']) - (df['RS_EVENTS.EMPTY_CYCLES'] if (4 * df['IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE']) > 0.1 else 0) + df['RESOURCE_STALLS.SB']))) * (1 - (df['IDQ_UOPS_NOT_DELIVERED.CORE'] + df['UOPS_ISSUED.ANY'] - (df['UOPS_RETIRED.RETIRE_SLOTS']) + (4 * df['INT_MISC.RECOVERY_CYCLES']) + df['UOPS_RETIRED.RETIRE_SLOTS']))

In [663]:
def L1_bound(df):
    # "Events": [
    #     {
    #     "Name": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
    #     "Alias": "a"
    #     },
    #     {
    #     "Name": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
    #     "Alias": "b"
    #     },
    # ],
    # "Constants": [],
    # "Formula": "( a - b)",
    return ( df['CYCLE_ACTIVITY.STALLS_MEM_ANY'] - df['CYCLE_ACTIVITY.STALLS_L1D_MISS'] )
    

In [664]:
def L2_bound(df):
    # "Events": [
    #     {
    #     "Name": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
    #     "Alias": "a"
    #     },
    #     {
    #     "Name": "CYCLE_ACTIVITY.STALLS_L2_MISS",
    #     "Alias": "b"
    #     },
    #     {
    #     "Name": "CPU_CLK_UNHALTED.THREAD",
    #     "Alias": "c"
    #     }
    # ],
    # "Constants": [],
    # "Formula": "( a - b )",
    return ( df['CYCLE_ACTIVITY.STALLS_L1D_MISS'] - df['CYCLE_ACTIVITY.STALLS_L2_MISS'] )

In [665]:
def L3_bound(df):
    #  "Events": [
    #     {
    #     "Name": "MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
    #     "Alias": "a"
    #     },
    #     {
    #     "Name": "MEM_LOAD_UOPS_RETIRED.L3_MISS_PS",
    #     "Alias": "b"
    #     },
    #     {
    #     "Name": "CYCLE_ACTIVITY.STALLS_L2_MISS",
    #     "Alias": "c"
    #     },
    #     {
    #     "Name": "CPU_CLK_UNHALTED.THREAD",
    #     "Alias": "d"
    #     }
    # ],
    # "Constants": [],
    # "Formula": "( a / ( a + ( 7 ) * b ) ) * c", 
    return ( df['MEM_LOAD_UOPS_RETIRED.L3_HIT_PS'] / ( df['MEM_LOAD_UOPS_RETIRED.L3_HIT_PS'] + ( 7 ) * df['MEM_LOAD_UOPS_RETIRED.L3_MISS_PS'] ) ) * df['CYCLE_ACTIVITY.STALLS_L2_MISS']

In [666]:
def Local_DRAM(df):
    # "Events": [
    #       {
    #         "Name": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM_PS",
    #         "Alias": "a"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS",
    #         "Alias": "b"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_RETIRED.L2_HIT_PS",
    #         "Alias": "c"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
    #         "Alias": "d"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT_PS",
    #         "Alias": "e"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM_PS",
    #         "Alias": "f"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS_PS",
    #         "Alias": "g"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_PS",
    #         "Alias": "h"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM_PS",
    #         "Alias": "i"
    #       },
    #       {
    #         "Name": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD_PS",
    #         "Alias": "j"
    #       },
    #       {
    #         "Name": "CPU_CLK_UNHALTED.THREAD",
    #         "Alias": "k"
    #       }
    #     ],
    #     "Constants": [],
    #     "Formula": "( 200 * ( a * ( 1 + b / ( ( c + d + e + f + g ) + a + h + i + j ) ) ) / k )",
    return ( 200 * ( df['MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM_PS'] * ( 1 + df['MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS'] / ( ( df['MEM_LOAD_UOPS_RETIRED.L2_HIT_PS'] + df['MEM_LOAD_UOPS_RETIRED.L3_HIT_PS'] + df['MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT_PS'] + df['MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM_PS'] + df['MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS_PS'] ) + df['MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM_PS'] + df['MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_PS'] + df['MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM_PS'] + df['MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD_PS'] ) ) ) / df['CPU_CLK_UNHALTED.THREAD'] )

In [667]:
def Remote_DRAM(df):
    # "Events": [
    # {
    # "Name": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_PS",
    # "Alias": "a"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS",
    # "Alias": "b"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_RETIRED.L2_HIT_PS",
    # "Alias": "c"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
    # "Alias": "d"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT_PS",
    # "Alias": "e"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM_PS",
    # "Alias": "f"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS_PS",
    # "Alias": "g"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
    # "Alias": "h"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM_PS",
    # "Alias": "i"
    # },
    # {
    # "Name": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD_PS",
    # "Alias": "j"
    # },
    # {
    # "Name": "CPU_CLK_UNHALTED.THREAD",
    # "Alias": "k"
    # }
    # ],
    # "Constants": [],
    # "Formula": "( 310 * ( a * ( 1 + b / ( ( c + d + e + f + g ) + h + a + i + j ) ) ) /  k )",
    return ( 310 * ( df['MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_PS'] * ( 1 + df['MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS'] / ( ( df['MEM_LOAD_UOPS_RETIRED.L2_HIT_PS'] + df['MEM_LOAD_UOPS_RETIRED.L3_HIT_PS'] + df['MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT_PS'] + df['MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM_PS'] + df['MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS_PS'] ) + df['MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM_PS'] + df['MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_PS'] + df['MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM_PS'] + df['MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD_PS'] ) ) ) / df['CPU_CLK_UNHALTED.THREAD'] )

In [668]:
def MEM_Bandwidth(df):
    # "Events": [
    #     {
    #     "Name": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:cmask=4",
    #     "Alias": "b"
    #     }
    # ],
    # "Constants": [],
    # "Formula": "b",
    return df['OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD:cmask=4']

In [669]:
def Store_Bound(df):
    # "Events": [
    #     {
    #     "Name": "RESOURCE_STALLS.SB",
    #     "Alias": "a"
    #     },

    # ],
    # "Constants": [],
    # "Formula": "a"
    return df['RESOURCE_STALLS.SB']

In [670]:
bad_spec= bad_speculation(df)
# core_bound = core_bound(df)
L1_bound = L1_bound(df)
L2_bound = L2_bound(df)
L3_bound = L3_bound(df)
Local_DRAM = Local_DRAM(df)
Remote_DRAM = Remote_DRAM(df)
MEM_Bandwidth = MEM_Bandwidth(df)
Store_Bound = Store_Bound(df)


In [671]:
print(bad_spec)
# print(core_bound)
print(L1_bound)
print(L2_bound)
print(L3_bound)
print(Local_DRAM)
print(Remote_DRAM)
print(MEM_Bandwidth)

print(Store_Bound)

0        1179751769625
1       10650165975225
2         -73200109800
3          93000139500
4          13050019575
             ...      
5771                 0
5772                 0
5773                 0
5774                 0
5775                 0
Length: 5776, dtype: int64
0         90150135225
1       1107001660500
2         18150027225
3          3750005625
4         15900023850
            ...      
5771                0
5772        150000225
5773                0
5774                0
5775                0
Length: 5776, dtype: int64
0         750001125
1       10950016425
2         300000450
3        1200001800
4         900001350
           ...     
5771              0
5772     -150000225
5773              0
5774     -150000225
5775              0
Length: 5776, dtype: int64
0       1.200002e+09
1       7.500011e+09
2       4.050006e+09
3       7.500011e+08
4       5.850282e+09
            ...     
5771             NaN
5772             NaN
5773             NaN
5774           