In [1]:
%load_ext autoreload
%autoreload 2

import os,sys
sys.path.append('../../pycode')
from magus import read_trace

import json,yaml,codecs

import numpy as np
import pandas as pd
import operator

# Metrics

### [1] read all the profling metrics on nvidia pascal arch 

In [2]:
with open('pascal_metrics.json', 'r') as metricsFile:
     metricsAll = yaml.safe_load(metricsFile)
        
TargetMetrics = metricsAll['pascal']        

featureDim = len(TargetMetrics)
print("Metrics on Pascal GPUs (1080Ti): {}".format(featureDim))

Metrics on Pascal GPUs (1080Ti): 120


### [2] read scale for each feature metric

In [3]:
#
# read the metrics_scale.jason
#
with open('metrics_scale.json', 'r') as metricsFile:
     metrics_scale_dd = yaml.safe_load(metricsFile)

In [4]:
metrics_scale_dd

{'achieved_occupancy': [0.015602, 0.98597],
 'atomic_transactions': [0.0, 80413.0],
 'atomic_transactions_per_request': [0.0, 5.210042],
 'branch_efficiency': [0.7297148, 1.0],
 'cf_executed': [1.0, 1380367395.0],
 'cf_fu_utilization': [0.1, 0.30000000000000004],
 'cf_issued': [1.0, 1380367395.0],
 'double_precision_fu_utilization': [0.0, 1.0],
 'dram_read_throughput': [0.0, 333.226286],
 'dram_read_transactions': [0.0, 11814974.0],
 'dram_utilization': [0.0, 0.9],
 'dram_write_throughput': [0.0, 383.09861],
 'dram_write_transactions': [0.0, 8486164.0],
 'ecc_throughput': [0.0, 0.0],
 'ecc_transactions': [0.0, 0.0],
 'eligible_warps_per_cycle': [0.008741, 8.993029],
 'executed_ipc': [0.010014, 3.372016],
 'flop_count_dp': [0.0, 939702716.0],
 'flop_count_dp_add': [0.0, 94379405.0],
 'flop_count_dp_fma': [0.0, 404701375.0],
 'flop_count_dp_mul': [0.0, 66591523.0],
 'flop_count_hp': [0.0, 0.0],
 'flop_count_hp_add': [0.0, 0.0],
 'flop_count_hp_fma': [0.0, 0.0],
 'flop_count_hp_mul': [0.0

In [5]:
metrics_all = []
for metric, _ in metrics_scale_dd.iteritems():
    metrics_all.append(metric)
    
print metrics_all

print '\ntotal metrics number = %d' % len(metrics_all)

['eligible_warps_per_cycle', 'stall_constant_memory_dependency', 'gst_efficiency', 'warp_execution_efficiency', 'sysmem_utilization', 'inst_fp_64', 'local_store_transactions', 'cf_executed', 'ldst_fu_utilization', 'stall_not_selected', 'inst_misc', 'global_hit_rate', 'gld_requested_throughput', 'flop_count_sp_special', 'stall_exec_dependency', 'gld_throughput', 'l2_tex_read_transactions', 'branch_efficiency', 'flop_hp_efficiency', 'sysmem_write_throughput', 'l2_atomic_transactions', 'warp_nonpred_execution_efficiency', 'issue_slots', 'sysmem_read_transactions', 'inst_bit_convert', 'gst_transactions_per_request', 'ldst_executed', 'gst_throughput', 'special_fu_utilization', 'issue_slot_utilization', 'tex_cache_transactions', 'tex_utilization', 'shared_efficiency', 'flop_sp_efficiency', 'shared_load_transactions', 'gst_transactions', 'flop_count_dp_fma', 'flop_count_hp_fma', 'local_load_transactions', 'shared_store_throughput', 'local_store_throughput', 'inst_executed', 'shared_store_tran

### [3] read app metrics,  convert the raw data into the same unit

In [6]:
# maxwell + pascal
Percentage2decimal_Metrics = ['sm_efficiency', 'branch_efficiency', 
        'warp_execution_efficiency', 'warp_nonpred_execution_efficiency', 
        'issue_slot_utilization', 'global_hit_rate', 'local_hit_rate',
        'gld_efficiency', 'gst_efficiency', 'shared_efficiency', 
        'stall_inst_fetch', 'stall_exec_dependency', 'stall_memory_dependency',
        'stall_texture', 'stall_sync', 'stall_other',
        'stall_constant_memory_dependency', 'stall_pipe_busy',
        'stall_memory_throttle', 'stall_not_selected', 'local_memory_overhead', 
        'tex_cache_hit_rate','l2_tex_read_hit_rate', 'l2_tex_write_hit_rate', 
        'flop_sp_efficiency', 'flop_dp_efficiency', 'sm_activity', 'flop_hp_efficiency']


Throughput_Metrics = ['gld_requested_throughput', 'gst_requested_throughput', 
        'gld_throughput', 'gst_throughput', 'dram_read_throughput', 
        'dram_write_throughput', 'tex_cache_throughput', 'local_load_throughput', 
        'local_store_throughput', 'shared_load_throughput', 
        'shared_store_throughput', 'l2_tex_read_throughput', 
        'l2_tex_write_throughput', 'l2_read_throughput', 'l2_write_throughput', 
        'sysmem_read_throughput', 'sysmem_write_throughput', 
        'l2_atomic_throughput', 'ecc_throughput']

Utilization2decimal_Metrics = ['cf_fu_utilization', 'tex_fu_utilization', 
        'ldst_fu_utilization', 'double_precision_fu_utilization', 
        'special_fu_utilization', 'single_precision_fu_utilization', 
        'dram_utilization', 'tex_utilization', 'shared_utilization', 
        'l2_utilization', 'sysmem_utilization', 'sysmem_read_utilization', 
        'sysmem_write_utilization', 'half_precision_fu_utilization']

In [7]:
#------------------------------------------------------------------------------
# convert metric from original format to the target presentation 
#------------------------------------------------------------------------------
def adjust_metric(metricName, metricValue):
    """
    Adjust metric from dataframe containing nvprof results.
    """ 
    metricValue_str= str(metricValue)
    
    #
    # update the value if the metric belongs to the following 3 groups
    # 

    # convert % to decimal
    if metricName in Percentage2decimal_Metrics: 
        adjustedV = float(metricValue_str[:-1]) * 0.01
        
    # scale to GB/s
    elif metricName in Throughput_Metrics: 
        if "GB/s" in metricValue_str:
            adjustedV = float(metricValue_str[:-4])
        elif "MB/s" in metricValue_str:
            adjustedV = float(metricValue_str[:-4]) * 1e-3
        elif "KB/s" in metricValue_str:
            adjustedV = float(metricValue_str[:-4]) * 1e-6
        elif "B/s" in metricValue_str:
            adjustedV = float(metricValue_str[:-3]) * 1e-9
        else:
            print "Error: unknow throughtput unit!"
            sys.exit(0)
            
    # convert util() to decimal
    elif metricName in Utilization2decimal_Metrics:
        adjustedV = float(metricValue_str[metricValue_str.find('(') + 1 : metricValue_str.rfind(')')]) * 0.1
        #print('{} in Utilization2decimal_Metrics.'.format(local_metric_value))
        
    else:
        adjustedV = float(metricValue)  # apply float as default

    #print('{} : {}'.format(metricName, metricValue))
    #print('{} : {}'.format(metricName, adjustedV))

    return adjustedV

read all the metrics in **./metrics** folder, save the input in data frame

In [8]:
metricsFolder = "./metrics"

Some file name examples:

* metrics_cudasdk_batchCUBLAS.csv
* metrics_rodinia_hybridsort.csv

In [9]:
appTrace = os.listdir(metricsFolder)

app_metrics_max_dd = {}

for currentFile in appTrace:
    appName = currentFile[8:][:-4]
    file_csv = metricsFolder + '/' + currentFile # csv link
    print appName
    #print("current : {},  appName : {},  csv_loc : {}".format(currentFile, appName, file_csv))
    
    df_app = read_trace(file_csv) # read csv content

    appMetricMax_dd = {}
    
    for metric in metrics_all:
        df_metric = df_app.loc[df_app['Metric Name'] == metric]['Avg']
        m_list = [adjust_metric(metric, mVal) for _, mVal in df_metric.iteritems()]
        appMetricMax_dd[metric] = max(m_list)  # use the max() value for the current feature column

    app_metrics_max_dd[appName] = appMetricMax_dd  # update app metrics for current application

cudasdk_interval
rodinia_hotspot
rodinia_heartwall
cudasdk_dxtc
cudasdk_MCSingleAsianOptionP
parboil_stencil
cudasdk_mergeSort
rodinia_hybridsort
shoc_lev1BFS
parboil_mriq
rodinia_b+tree
lonestar_sssp
poly_covariance
cudasdk_shflscan
cudasdk_transpose
poly_gemm
cudasdk_dwtHaar1D
cudasdk_MCEstimatePiQ
poly_2dconv
cudasdk_convolutionFFT2D
rodinia_dwt2d
parboil_lbm
cudasdk_convolutionSeparable
cudasdk_concurrentKernels
poly_fdtd2d
cudasdk_sortingNetworks
poly_gesummv
rodinia_lud
poly_correlation
parboil_bfs
cudasdk_MCEstimatePiInlineP
shoc_lev1sort
cudasdk_lineOfSight
poly_syrk
cudasdk_matrixMul
cudasdk_segmentationTreeThrust
poly_3dconv
cudasdk_fastWalshTransform
parboil_cutcp
cudasdk_simpleCUFFTcallback
poly_atax
shoc_lev1fft
cudasdk_c++11Cuda
cudasdk_boxFilterNPP
rodinia_gaussian
poly_mvt
poly_syr2k
cudasdk_MCEstimatePiP
rodinia_lavaMD
cudasdk_eigenvalues
cudasdk_vectorAdd
cudasdk_BlackScholes
rodinia_needle
cudasdk_batchCUBLAS
cudasdk_radixSortThrust
cudasdk_scan
cudasdk_SobolQRNG
rod

In [10]:
app_metrics_max_dd['cudasdk_reduction']

{'achieved_occupancy': 0.286578,
 'atomic_transactions': 0.0,
 'atomic_transactions_per_request': 0.0,
 'branch_efficiency': 1.0,
 'cf_executed': 266688.0,
 'cf_fu_utilization': 0.1,
 'cf_issued': 266688.0,
 'double_precision_fu_utilization': 0.0,
 'dram_read_throughput': 303.222798,
 'dram_read_transactions': 2097193.0,
 'dram_utilization': 0.7000000000000001,
 'dram_write_throughput': 6.134622,
 'dram_write_transactions': 42432.0,
 'ecc_throughput': 0.0,
 'ecc_transactions': 0.0,
 'eligible_warps_per_cycle': 0.278397,
 'executed_ipc': 0.299417,
 'flop_count_dp': 0.0,
 'flop_count_dp_add': 0.0,
 'flop_count_dp_fma': 0.0,
 'flop_count_dp_mul': 0.0,
 'flop_count_hp': 0.0,
 'flop_count_hp_add': 0.0,
 'flop_count_hp_fma': 0.0,
 'flop_count_hp_mul': 0.0,
 'flop_count_sp': 0.0,
 'flop_count_sp_add': 0.0,
 'flop_count_sp_fma': 0.0,
 'flop_count_sp_mul': 0.0,
 'flop_count_sp_special': 0.0,
 'flop_dp_efficiency': 0.0,
 'flop_hp_efficiency': 0.0,
 'flop_sp_efficiency': 0.0,
 'gld_efficiency': 1

### transform data dict to data frame

In [11]:
print("Metrics on Pascal GPUs (1080Ti): {}".format(featureDim))

Metrics on Pascal GPUs (1080Ti): 120


In [12]:
featMatCols = ['AppName']         
featMatCols.extend(TargetMetrics)

In [13]:
# application number
appNum = len(app_metrics_max_dd)
print "Total applications :  %d" % appNum

Total applications :  79


In [14]:
#  appNum  x featureDim
df_app = pd.DataFrame(index=np.arange(0, appNum), columns=featMatCols)

In [15]:
#
# export data to data frame, so that we can export to csv file easily
#
rowId = 0
for appName, metrics_dd in app_metrics_max_dd.iteritems():
    df_app.loc[rowId, 'AppName'] = appName # fill in kernel name 

    # add more metrics according to the column order
    for eachMetric in TargetMetrics:
        try:
            df_app.loc[rowId, eachMetric] = metrics_dd[eachMetric]
        except Exception as e:
            print e.message, e.args
            print('ERROR!! App = {}, Metric Name = {}'.format(appName, eachMetric))
            sys.exit(0)

    rowId += 1

In [16]:
df_app

Unnamed: 0,AppName,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,...,flop_sp_efficiency,flop_dp_efficiency,dram_read_transactions,dram_write_transactions,dram_read_throughput,dram_write_throughput,dram_utilization,half_precision_fu_utilization,ecc_transactions,ecc_throughput
0,rodinia_b+tree,215.823,1,0.969349,0.95526,0.000182,0,0,0,0,...,0,0,567304,49367,44.6523,3.88548,0.2,0,0,0
1,cudasdk_threadFenceReduction,945.559,1,0.998695,0.994113,0.002153,0.818182,0.86653,0,0,...,0.00292272,0,130986,42857,148.598,48.1941,0.5,0,0,0
2,cudasdk_convolutionFFT2D,739,1,1,0.998647,0.092664,1,1,0,0,...,0.0568246,0,1.0573e+06,531458,223.359,166.583,0.8,0,0,0
3,shoc_lev1BFS,508.375,1,0.901111,0.88126,0.004124,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,rodinia_lavaMD,107306,1,0.783421,0.782049,9e-06,1.04365,5.63165,0,0,...,0,0.575583,242144,105412,0.162125,0.0705719,0.1,0,0,0
5,poly_gemm,3289,1,1,0.999392,0.000133,0,0,0,0,...,0.040371,0,64384,35986,2.69525,1.50595,0.1,0,0,0
6,rodinia_hybridsort,28759.7,1,1,0.999418,0.004229,3.43682,2.69712,0,0,...,0.0165347,0,1.18018e+06,4.91563e+06,164.723,163.55,0.8,0,0,0
7,cudasdk_MCEstimatePiInlineQ,901.121,1,1,0.965012,0.007113,1,1,14.6633,4,...,0.00016994,0,1.51473e+06,5.79256e+06,153.211,104.262,0.5,0,0,0
8,cudasdk_MCEstimatePiInlineP,48362.9,1,0.951714,0.922504,0.005409,1,1,7.50697,3.98825,...,0.00439568,0,5.28837e+06,8.46774e+06,121.824,88.6172,0.4,0,0,0
9,cudasdk_shflscan,5259,1,1,0.986975,0.051071,1.29167,1.49815,0,0,...,0,0,259208,267438,87.508,256.427,0.8,0,0,0


### apply minmax scaler

In [17]:
# the scaling factors are stored in metrics_scale_dd
df_app_scale = df_app.copy()

metrics_scale_dd

for metric in TargetMetrics:
    [x_min, x_max] = metrics_scale_dd[metric]  # read the scaler from dict for min and max value of the feature
    
    if x_max == x_min:
        x_range = 1e-6    # up-floor, avoid float division by zero
    else:
        x_range = x_max - x_min
    
    df_app_scale[metric] = df_app_scale[metric].apply(lambda x : (x - x_min) / x_range)

In [18]:
df_app_scale

Unnamed: 0,AppName,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,...,flop_sp_efficiency,flop_dp_efficiency,dram_read_transactions,dram_write_transactions,dram_read_throughput,dram_write_throughput,dram_utilization,half_precision_fu_utilization,ecc_transactions,ecc_throughput
0,rodinia_b+tree,6.633252e-06,1.000000,0.968361,0.953817,0.000410,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,4.801568e-02,5.817352e-03,0.134000,0.010142,0.222222,0.0,0.0,0.0
1,cudasdk_threadFenceReduction,2.927118e-05,1.000000,0.998653,0.993923,0.004844,0.038837,0.078536,0.000000,0.000000,...,0.008837,0.000000,1.108644e-02,5.050221e-03,0.445938,0.125801,0.555556,0.0,0.0,0.0
2,cudasdk_convolutionFFT2D,2.286330e-05,1.000000,1.000000,0.998603,0.208494,0.047468,0.090633,0.000000,0.000000,...,0.171807,0.000000,8.948831e-02,6.262641e-02,0.670293,0.434830,0.888889,0.0,0.0,0.0
3,shoc_lev1BFS,1.570882e-05,1.000000,0.897921,0.877429,0.009279,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,0.0,0.0,0.0
4,rodinia_lavaMD,3.328782e-03,1.000000,0.776434,0.775018,0.000020,0.049540,0.510414,0.000000,0.000000,...,0.000000,0.929573,2.049467e-02,1.242163e-02,0.000487,0.000184,0.111111,0.0,0.0,0.0
5,poly_gemm,1.019697e-04,1.000000,1.000000,0.999372,0.000299,0.000000,0.000000,0.000000,0.000000,...,0.122060,0.000000,5.449356e-03,4.240550e-03,0.008088,0.003931,0.111111,0.0,0.0,0.0
6,rodinia_hybridsort,8.921252e-04,1.000000,1.000000,0.999400,0.009515,0.163138,0.244449,0.000000,0.000000,...,0.049992,0.000000,9.988884e-02,5.792521e-01,0.494328,0.426912,0.888889,0.0,0.0,0.0
7,cudasdk_MCEstimatePiInlineQ,2.789265e-05,1.000000,1.000000,0.963884,0.016004,0.047468,0.090633,0.916453,0.500000,...,0.000514,0.000000,1.282041e-01,6.825889e-01,0.459780,0.272156,0.555556,0.0,0.0,0.0
8,cudasdk_MCEstimatePiInlineP,1.500257e-03,1.000000,0.950156,0.920005,0.012170,0.047468,0.090633,0.469186,0.498532,...,0.013290,0.000000,4.475986e-01,9.978291e-01,0.365591,0.231317,0.444444,0.0,0.0,0.0
9,cudasdk_shflscan,1.630832e-04,1.000000,1.000000,0.986555,0.114910,0.061312,0.135783,0.000000,0.000000,...,0.000000,0.000000,2.193894e-02,3.151459e-02,0.262608,0.669349,0.888889,0.0,0.0,0.0


### [4] Select the target feature metrics set

In [19]:
#
# ToDo: feat26
# 
feat26 = ['warp_execution_efficiency',
          'warp_nonpred_execution_efficiency',
          'branch_efficiency',
          'ldst_fu_utilization',
          'flop_dp_efficiency',
          'sysmem_write_throughput',
            'shared_store_throughput',
            'global_hit_rate',
            'gst_transactions_per_request',
            'gst_efficiency',
            'gst_requested_throughput',
            'gld_throughput',
            'gld_transactions_per_request',
            'stall_inst_fetch',
            'stall_sync',
            'stall_texture',
            'stall_exec_dependency',
            'stall_memory_dependency',
            'stall_other',
            'local_hit_rate',
            'local_store_transactions_per_request',
            'local_memory_overhead',
            'l2_tex_write_throughput',
            'l2_utilization', 
            'tex_cache_throughput',
            'tex_cache_hit_rate'
]

In [20]:
sel_feats = feat26
print 'select feat number = %d' % len(sel_feats) 

select feat number = 26


In [21]:
#
# other metrics
#
other_feats = [m  for m in TargetMetrics if m not in sel_feats]
print len(other_feats)

94


In [22]:
if len(TargetMetrics) <> (len(other_feats) + len(sel_feats)):
    print "The feats number does not match!"
else:
    print "Good job!"

Good job!


In [23]:
import copy
df_current = copy.deepcopy(df_app_scale)

In [24]:
df_current.drop(other_feats, axis = 1, inplace=True)  # inplace drop columns not needed

In [25]:
df_current[:3]

Unnamed: 0,AppName,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,local_store_transactions_per_request,gld_transactions_per_request,gst_transactions_per_request,global_hit_rate,local_hit_rate,gst_requested_throughput,...,gst_efficiency,stall_inst_fetch,stall_exec_dependency,stall_memory_dependency,stall_texture,stall_sync,stall_other,l2_utilization,ldst_fu_utilization,flop_dp_efficiency
0,rodinia_b+tree,1.0,0.968361,0.953817,0.0,0.180095,0.03125,0.687571,0.0,0.002747,...,0.225,0.040705,0.035475,0.519929,0.0,0.514176,0.039073,0.4,0.2,0.0
1,cudasdk_threadFenceReduction,1.0,0.998653,0.993923,0.0,0.25,0.03125,0.575091,0.0,2.2e-05,...,0.125,0.046764,0.05908,0.906497,0.0,0.032294,0.018455,0.2,0.2,0.0
2,cudasdk_convolutionFFT2D,1.0,1.0,0.998603,0.0,0.372944,0.270249,0.862636,0.0,0.434757,...,1.0,0.153411,0.304536,1.002493,0.794627,0.09635,0.387119,0.6,0.2,0.0


In [26]:
df_current.shape

(79, 27)

### [5] save the processed metrics to a csv file 

In [27]:
df_current.to_csv("similarity_selected_metrics.csv", index=False, encoding='utf-8')

### [6] dict to store metrics for each app

In [28]:
df_current.columns[1:]

Index([u'branch_efficiency', u'warp_execution_efficiency',
       u'warp_nonpred_execution_efficiency',
       u'local_store_transactions_per_request',
       u'gld_transactions_per_request', u'gst_transactions_per_request',
       u'global_hit_rate', u'local_hit_rate', u'gst_requested_throughput',
       u'gld_throughput', u'local_memory_overhead', u'tex_cache_hit_rate',
       u'tex_cache_throughput', u'l2_tex_write_throughput',
       u'sysmem_write_throughput', u'shared_store_throughput',
       u'gst_efficiency', u'stall_inst_fetch', u'stall_exec_dependency',
       u'stall_memory_dependency', u'stall_texture', u'stall_sync',
       u'stall_other', u'l2_utilization', u'ldst_fu_utilization',
       u'flop_dp_efficiency'],
      dtype='object')

In [29]:
app2metric_dd = {}

for index, row in df_current.iterrows():
    app_name = row['AppName']
    #print app_name
    
    metric_cols  = df_current.columns[1:]
    metric_array = row[metric_cols]
    
    app2metric_dd[app_name] = metric_array

In [30]:
np.save('app2metric_dd.npy', app2metric_dd)

In [31]:
print len(app2metric_dd)

79


In [32]:
app2metric_dd['rodinia_b+tree']

branch_efficiency                                1
warp_execution_efficiency                 0.968361
warp_nonpred_execution_efficiency         0.953817
local_store_transactions_per_request             0
gld_transactions_per_request              0.180095
gst_transactions_per_request               0.03125
global_hit_rate                           0.687571
local_hit_rate                                   0
gst_requested_throughput                0.00274652
gld_throughput                            0.389003
local_memory_overhead                            0
tex_cache_hit_rate                        0.625726
tex_cache_throughput                      0.276062
l2_tex_write_throughput                  0.0070407
sysmem_write_throughput                          0
shared_store_throughput                          0
gst_efficiency                               0.225
stall_inst_fetch                         0.0407055
stall_exec_dependency                    0.0354754
stall_memory_dependency        

### apply similarity based analysis 

In [34]:
app_list = [key for key,_ in app2metric_dd.iteritems()]
app_list

['rodinia_b+tree',
 'cudasdk_reduction',
 'cudasdk_convolutionFFT2D',
 'shoc_lev1BFS',
 'rodinia_lavaMD',
 'poly_gemm',
 'rodinia_hybridsort',
 'cudasdk_MCEstimatePiInlineQ',
 'cudasdk_MCEstimatePiInlineP',
 'cudasdk_shflscan',
 'rodinia_dwt2d',
 'cudasdk_mergeSort',
 'lonestar_sssp',
 'rodinia_gaussian',
 'cudasdk_MCEstimatePiQ',
 'shoc_lev1md5hash',
 'parboil_bfs',
 'rodinia_backprop',
 'shoc_lev1fft',
 'cudasdk_segmentationTreeThrust',
 'cudasdk_simpleCUBLAS',
 'rodinia_heartwall',
 'parboil_sgemm',
 'cudasdk_concurrentKernels',
 'cudasdk_fastWalshTransform',
 'cudasdk_SobolQRNG',
 'poly_gesummv',
 'parboil_cutcp',
 'parboil_lbm',
 'cudasdk_boxFilterNPP',
 'cudasdk_dxtc',
 'shoc_lev1sort',
 'poly_covariance',
 'poly_correlation',
 'cudasdk_sortingNetworks',
 'cudasdk_convolutionSeparable',
 'poly_2dconv',
 'cudasdk_batchCUBLAS',
 'rodinia_pathfinder',
 'parboil_mriq',
 'cudasdk_binomialOptions',
 'cudasdk_scalarProd',
 'cudasdk_simpleCUFFTcallback',
 'rodinia_hotspot',
 'cudasdk_thr

In [51]:
def select_top3_least_similar(app2metric_dd, target, target_metrics):
    dist_dd = {}
    for key, value in app2metric_dd.iteritems():
        if key <> target:
            app_metric = value.as_matrix()
            dist_dd[key] = np.linalg.norm(target_metrics - app_metric)
    #    
    # sort dd by the dist : by ascending order
    #
    sort_dist = sorted(dist_dd.items(), key=operator.itemgetter(1))
    #print sort_dist
    
    #
    # select the application with largest euclidean distance : least similar one
    #
    print "\nTop3 least similar for %s\n" % target
    print sort_dist[-1]
    print sort_dist[-2]
    print sort_dist[-3]

In [52]:
#
# 2gpus
#

test1 = ['cudasdk_convolutionSeparable','cudasdk_fastWalshTransform']

for target in test1:
    select_top3_least_similar(app2metric_dd, target, app2metric_dd[target].as_matrix())


Top3 least similar for cudasdk_convolutionSeparable

('cudasdk_segmentationTreeThrust', 2.6250639899903292)
('poly_3mm', 2.5921012769285592)
('poly_gemm', 2.5832493498717031)

Top3 least similar for cudasdk_fastWalshTransform

('cudasdk_segmentationTreeThrust', 2.5232490297450596)
('poly_3mm', 2.4259091792262271)
('poly_gemm', 2.4228817615383851)


In [53]:
test2 = ['poly_correlation','poly_covariance']

for target in test2:
    select_top3_least_similar(app2metric_dd, target, app2metric_dd[target].as_matrix())


Top3 least similar for poly_correlation

('cudasdk_interval', 2.5658398748594018)
('cudasdk_concurrentKernels', 2.5523719367733744)
('shoc_lev1md5hash', 2.5062050509336662)

Top3 least similar for poly_covariance

('cudasdk_segmentationTreeThrust', 2.5035390256815067)
('poly_3mm', 2.3530437205517556)
('poly_gemm', 2.3513798622026294)


In [54]:
test3 = ['cudasdk_stereoDisparity','poly_3mm']

for target in test3:
    select_top3_least_similar(app2metric_dd, target, app2metric_dd[target].as_matrix())


Top3 least similar for cudasdk_stereoDisparity

('poly_3mm', 3.114059931744011)
('poly_gemm', 3.1068534670097709)
('cudasdk_segmentationTreeThrust', 3.0447592076639478)

Top3 least similar for poly_3mm

('rodinia_lavaMD', 3.698862219704325)
('cudasdk_interval', 3.6513310209970897)
('cudasdk_concurrentKernels', 3.5918903747815611)
