In [117]:
import pandas as pd
from glob import glob


In [118]:
def compute_bytes(d):
    tot = 2* d['Floating Point Operations(Half Precision)'] + \
          4* d['Floating Point Operations(Single Precision)'] + \
          8* d['Floating Point Operations(Double Precision)']
    tot = tot * d['Invocations']
    return tot

def req_nonlocal_bytes(d):
    tot = (d['Requested Global Load Throughput(bytes/sec)'] + d['Shared Memory Load Throughput(bytes/sec)']) * (d['Avg. Duration(ns)'] * d['Invocations'])
    tot = tot *  1.0e-9 # nanoseconds to seconds
    return tot

def trans_nonlocal_bytes(d):
    tot = (d['Global Load Throughput(bytes/sec)'] + d['Shared Memory Load Throughput(bytes/sec)']) * d['Avg. Duration(ns)'] * d['Invocations']
    tot = tot* 1.0e-9 # nanoseconds to seconds
    return tot

def unified_hits(d):
    return d['Unified Cache Transactions'] * (d['Unified Cache Hit Rate(%)'] / 100.0) * d['Invocations']

def unified_misses(d):
    return d['Unified Cache Transactions'] * (1.0 - (d['Unified Cache Hit Rate(%)'] / 100.0)) * d['Invocations']
                                              
    

In [119]:
data = {}
for i in glob('*/metrics.csv'):
    print(i)
    d = pd.read_csv(i, index_col='Name', skipinitialspace=True)
    
    #print(d['name'] == 'memset (0)')
    if 'memset (0)' in d.index :
        d.drop(['memset (0)'], inplace=True)
    data[i] = d 
    



mpnnv1_forward_10n_1b/metrics.csv
gcn_inference/metrics.csv
gat_sparse_backward_10n_1b/metrics.csv
gat_dense_forward_10n_1b/metrics.csv
gat_sparse_forward_10n_1b/metrics.csv
mpnnv2_backward_10n_1b/metrics.csv
deeplab_coco_2n_2b_forward/metrics.csv
mpnnv2_forward_n10_1b/metrics.csv
mpnnv1_backward_10n_1b/metrics.csv
multiscale_backward_10n_10b/metrics.csv
deeplab_coco_2n_2b_backward/metrics.csv
multiscale_forward_10n_10b/metrics.csv


In [120]:
def gen_stats(d):
    req_bytes = req_nonlocal_bytes(d)
    trans_bytes = trans_nonlocal_bytes(d)
    flops = compute_bytes(d)
    u_hits = unified_hits(d)
    u_misses = unified_misses(d)

    stats = {}
    stats['c_to_m'] = (flops.sum())/(req_bytes.sum())
    stats['load_efficiency'] = (req_bytes.sum())/(trans_bytes.sum())
    stats['unified_hit_rate'] = (u_hits.sum())/(u_hits.sum() + u_misses.sum())
    
    return stats

for n, d in data.items():
    s = gen_stats(d)
    print(n)
    print(s)

mpnnv1_forward_10n_1b/metrics.csv
{'c_to_m': 20.69521704998328, 'load_efficiency': 0.9662851353426587, 'unified_hit_rate': 0.4460015009992953}
gcn_inference/metrics.csv
{'c_to_m': 0.3000613883392695, 'load_efficiency': 0.8978298032897908, 'unified_hit_rate': 0.4310846417801808}
gat_sparse_backward_10n_1b/metrics.csv
{'c_to_m': 2.78283781322423, 'load_efficiency': 0.885026922362856, 'unified_hit_rate': 0.42876309344224484}
gat_dense_forward_10n_1b/metrics.csv
{'c_to_m': 25.818191294570255, 'load_efficiency': 0.9736218256918661, 'unified_hit_rate': 0.5469508236491517}
gat_sparse_forward_10n_1b/metrics.csv
{'c_to_m': 1.8449044424548764, 'load_efficiency': 0.7220746112608303, 'unified_hit_rate': 0.28703154445554907}
mpnnv2_backward_10n_1b/metrics.csv
{'c_to_m': 7.65779936786059, 'load_efficiency': 0.9381453435015384, 'unified_hit_rate': 0.28348211659273637}
deeplab_coco_2n_2b_forward/metrics.csv
{'c_to_m': 10.37110501015489, 'load_efficiency': 0.8598571423661392, 'unified_hit_rate': 0.5652

In [124]:
from IPython.display import display

for n, d in data.items():
    print("===========")
    print(n)
    print("-----------")
    print("Effiencicy")
    print(req_nonlocal_bytes(d)/trans_nonlocal_bytes(d))
    
    print("-----------")
    print("L1 Hit")
    print(unified_hits(d)/unified_misses(d))
    
    print("-----------")
    print("Comp:Mem")
    print(compute_bytes(d)/req_nonlocal_bytes(d))
    


mpnnv1_forward_10n_1b/metrics.csv
-----------
Effiencicy
Name
void kernelPointwiseApply3<TensorMulOp<float>, float, float, float, unsigned int, int=1, int=1, int=1>(OffsetInfo<TensorMulOp<float>, float, unsigned int>, OffsetInfo<float, float, int=1>, OffsetInfo<float, float, int=1>, float, float)                                                                                             0.998828
void kernelPointwiseApply2<TensorSigmoidOp<float>, float, float, unsigned int, int=1, int=1>(OffsetInfo<TensorSigmoidOp<float>, float, unsigned int>, OffsetInfo<float, float, int=1>, float, float)                                                                                                                                    0.986864
void kernelPointwiseApply2<CopyOp<float, unsigned char>, float, unsigned char, unsigned int, int=1, int=1>(OffsetInfo<unsigned char, float, unsigned char>, OffsetInfo<CopyOp<float, unsigned char>, float, unsigned int>, float, float)                                

In [116]:
%matplotlib notebook
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
