In [1]:
# from __future__ import print_function

%load_ext autoreload
%autoreload 2


import os
import sys # error msg, add the modules
sys.path.append('../pycode')

import json
import yaml
import numpy as np
import pandas as pd
from sklearn.feature_selection import VarianceThreshold

from magus import read_trace


### read metrics

In [2]:
metricsFolder = 'metrics_sdk80'
#
# read metrics
#
with open('maxwell_metrics.json', 'r') as metricsFile:
     metricsAll = yaml.safe_load(metricsFile)

MaxwellMetrics = metricsAll['maxwell']

featureDim = len(MaxwellMetrics)
print("Metrics on Maxwell GPUs : {}".format(featureDim))

appTrace = os.listdir(metricsFolder)
# print len(appTrace)
# print appTrace


#
# read each file
#
app_kernel_metrics_dd = {}
for currentFile in appTrace:
    # read the csv files in the metrics folder
    file_csv = metricsFolder + '/' + currentFile 
    appName = currentFile[:-12]
    
    df_app = read_trace(file_csv)
    
    kernelList = df_app['Kernel'].unique()
    
#     if appName == 'histogram':
#         print kernelList
    
    kernel_metrics_dd = {}
    for kernName in kernelList:
        df_kern = df_app.loc[df_app['Kernel'] == kernName]
        
        # go through each metrics and read the avg value
        metrics_dd = {}
        for index, rows in df_kern.iterrows():
            local_metric_name = rows['Metric Name']
            local_metric_value = rows['Avg']
            
            
            #adjustedV = local_metric_value
            
            adjustedV = adjust_metric(local_metric_name, local_metric_value)
            
            
            #
            # adjust convert (normalize) metrics
            #
            
            # (1) # percentage to decimal
            if local_metric_name in ['sm_efficiency', 'branch_efficiency', 'warp_execution_efficiency',
                'warp_nonpred_execution_efficiency', 'issue_slot_utilization', 'global_hit_rate', 'local_hit_rate',
                'gld_efficiency', 'gst_efficiency', 'shared_efficiency', 'stall_inst_fetch', 'stall_exec_dependency',
                'stall_memory_dependency', 'stall_texture', 'stall_sync', 'stall_other', 'stall_constant_memory_dependency',
                'stall_pipe_busy', 'stall_memory_throttle', 'stall_not_selected', 'local_memory_overhead', 'tex_cache_hit_rate',
                'l2_tex_read_hit_rate', 'l2_tex_write_hit_rate', 'flop_sp_efficiency', 'flop_dp_efficiency']:
                adjustedV = float(str(local_metric_value)[:-1]) * 0.01
                
            # (2) #  use GB/s for throughput
            elif local_metric_name in ['gld_requested_throughput', 'gst_requested_throughput', 'gld_throughput',
                'gst_throughput', 'dram_read_throughput', 'dram_write_throughput', 'tex_cache_throughput',
                'local_load_throughput', 'local_store_throughput', 'shared_load_throughput',
                'shared_store_throughput', 'l2_tex_read_throughput', 'l2_tex_write_throughput', 'l2_read_throughput',
                'l2_write_throughput', 'sysmem_read_throughput', 'sysmem_write_throughput',
                'l2_atomic_throughput', 'ecc_throughput']:
                #print local_metric_value
                local_metric_value_str = str(local_metric_value)
                if "GB/s" in local_metric_value_str:
                    adjustedV = float(str(local_metric_value)[:-4])
                elif "MB/s" in local_metric_value_str:
                    adjustedV = float(str(local_metric_value)[:-4]) * 1e-3
                elif "KB/s" in local_metric_value_str:
                    adjustedV = float(str(local_metric_value)[:-4]) * 1e-6
                elif "B/s" in local_metric_value_str:
                    adjustedV = float(str(local_metric_value)[:-3]) * 1e-9
                else:
                    print "Error: unknow throughtput unit!"
                    sys.exit(0)
            # (3) #  convert utilization to decimal 
            elif local_metric_name in ['cf_fu_utilization', 'tex_fu_utilization', 'ldst_fu_utilization',
                'double_precision_fu_utilization', 'special_fu_utilization', 'single_precision_fu_utilization',
                'dram_utilization', 'tex_utilization', 'shared_utilization', 'l2_utilization',
                'sysmem_utilization', 'sysmem_read_utilization', 'sysmem_write_utilization']:
                #print local_metric_value
                value_str = str(local_metric_value)
                #print value_str.find('(')
                #print value_str.rfind(')')
                adjustedV =  float(value_str[value_str.find('(') + 1 : value_str.rfind(')')]) * 0.1
                #print local_metric_name + " : " + str(local_metric_value) + " => after : " + str(adjustedV)
            
            #print local_metric_name + " : \t\t\t" + str(local_metric_value) + " => after : " + str(adjustedV)
            
            metrics_dd[local_metric_name] = adjustedV
        # update 
        kernel_metrics_dd[kernName] = metrics_dd
        #break
        
    # update
    app_kernel_metrics_dd[appName] = kernel_metrics_dd
    #break
    
    

#
# check
#
print("applications : {}".format(len(app_kernel_metrics_dd)))

totalKernNum = 0
for app, kern_metrics_dd in app_kernel_metrics_dd.iteritems():
    #print len(kern_metrics_dd)
    totalKernNum += len(kern_metrics_dd)
#     for kernName, metrics_dd in kern_metrics_dd.iteritems():
#         print kernName

print("kernels : {}".format(str(totalKernNum)))

Metrics on Maxwell GPUs : 113
applications : 56
kernels : 287


In [3]:
app_kernel_metrics_dd['scan']

{'scanExclusiveShared(uint4*, uint4*, unsigned int)': {'achieved_occupancy': '0.945823',
  'atomic_transactions': '0',
  'atomic_transactions_per_request': '0.000000',
  'branch_efficiency': 1.0,
  'cf_executed': '419719',
  'cf_fu_utilization': 0.1,
  'cf_issued': '419719',
  'double_precision_fu_utilization': 0.0,
  'dram_read_throughput': 43.577515,
  'dram_read_transactions': '851992',
  'dram_utilization': 0.9,
  'dram_write_throughput': 43.513253,
  'dram_write_transactions': '850733',
  'ecc_throughput': 0.0,
  'ecc_transactions': '0',
  'eligible_warps_per_cycle': '1.127109',
  'flop_count_dp': '0',
  'flop_count_dp_add': '0',
  'flop_count_dp_fma': '0',
  'flop_count_dp_mul': '0',
  'flop_count_sp': '0',
  'flop_count_sp_add': '0',
  'flop_count_sp_fma': '0',
  'flop_count_sp_mul': '0',
  'flop_count_sp_special': '0',
  'flop_dp_efficiency': 0.0,
  'flop_sp_efficiency': 0.0,
  'gld_efficiency': 1.0,
  'gld_requested_throughput': 43.576583,
  'gld_throughput': 43.576583,
  'gld

### set up feature matrix

In [12]:
featMatCols = ['KernelName']         
featMatCols.extend(MaxwellMetrics)

#
#  totalKernNum  x featureDim
#
df_featureMat = pd.DataFrame(index=np.arange(0, totalKernNum), columns=featMatCols)
print df_featureMat.shape

#
# export data to data frame, so that we can export to csv file easily
#
rowId = 0
for appName, kern_metrics_dd in app_kernel_metrics_dd.iteritems():
    for kernName, metrics_dd in kern_metrics_dd.iteritems():
        app_kern_name = '[[ '+ str(appName) + ' ]] ' + str(kernName)
        
        # add kernel name to the list
        df_featureMat.loc[rowId, 'KernelName'] = app_kern_name

        # add other metrics according to the column order
        for eachMetric in MaxwellMetrics:
            try:                
                df_featureMat.loc[rowId, eachMetric] = metrics_dd[eachMetric]
            except Exception as e:
                print e.message, e.args
                print('ERROR!! App = {}, Kernel={}'.format(appName, kernName))
                sys.exit(0)

        rowId += 1

(287, 114)


In [13]:
df_featureMat

Unnamed: 0,KernelName,sm_efficiency,achieved_occupancy,ipc,issued_ipc,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,...,l2_utilization,l2_atomic_throughput,l2_atomic_transactions,sysmem_utilization,ecc_throughput,sysmem_read_utilization,sysmem_write_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,[[ boxFilterNPP ]] void ForEachTupleByteQuad<u...,0.774578,0.379772,1.839418,1.741966,1878.210938,0.814055,0.586253,0.581444,0.001604,...,0.3,0,0,0.1,0,0,0.1,3.440007,0.00286991,0
1,"[[ scan ]] scanExclusiveShared(uint4*, uint4*,...",0.995241,0.945823,1.038429,1.038845,98.647059,1,1,0.979783,0.000257,...,0.3,0,0,0.1,0,0,0.1,1.127109,0,0
2,"[[ scan ]] uniformUpdate(uint4*, unsigned int*)",0.995049,0.904622,0.216420,0.216561,20.625000,1,0.970644,0.970455,0.000393,...,0.3,0,0,0.1,0,0,0.1,0.219377,0,0
3,"[[ scan ]] scanExclusiveShared2(unsigned int*,...",0.776216,0.496326,0.193890,0.201860,87.500000,1,1,0.962367,0.037633,...,0.1,0,0,0.1,0,0,0.1,0.210305,0,0
4,"[[ c++11_cuda ]] xyzw_frequency(int*, char*, int)",0.984878,0.166630,0.419791,0.419742,31626.671875,0.999996,0.71216,0.709834,0.000088,...,0.1,2.9793,136176,0.1,0,0,0.1,0.401653,0,0
5,[[ c++11_cuda ]] xyzw_frequency_thrust_device(...,0.16706,0.015625,0.120050,0.120064,32235074.000000,1,0.03125,0.03125,0.000000,...,0.1,0,0,0.1,0,0,0.1,0.114061,0,0
6,[[ cuSolverSp_LinearSolver ]] void pegasus_sca...,0.808125,0.724293,0.227263,0.238280,31.918803,1,0.999523,0.905853,0.025706,...,0.2,0,0,0.1,0,0,0.1,0.305385,0,0
7,[[ cuSolverSp_LinearSolver ]] void pegasus_sym...,0.748081,0.784142,0.460054,0.496689,32.914530,1,0.999513,0.938904,0.024929,...,0.3,0,0,0.1,0,0,0.1,0.669309,0,0
8,[[ cuSolverSp_LinearSolver ]] void gather_core...,0.984699,0.936917,0.428544,0.432904,30.995775,1,0.999985,0.935468,0.001163,...,0.5,0,0,0.1,0,0,0.1,0.623174,0,0
9,[[ cuSolverSp_LinearSolver ]] void chol_check_...,0.69728,0.519168,0.161881,0.167549,26.693750,1,0.999181,0.925135,0.074924,...,0.2,0,0,0.1,0,0,0.1,0.210813,0,0


### Feature Scaling

### remove low variance features

In [10]:
df_featureMat = df_featureMat.convert_objects(convert_numeric=True)

  if __name__ == '__main__':


In [11]:
df_featureMat_cp = df_featureMat.copy()
df_featureMat_cp = df_featureMat_cp.drop('KernelName', axis=1)   # remove the 1st column

In [12]:
df_featureMat_cp

Unnamed: 0,sm_efficiency,achieved_occupancy,ipc,issued_ipc,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,issue_slot_utilization,...,l2_utilization,l2_atomic_throughput,l2_atomic_transactions,sysmem_utilization,ecc_throughput,sysmem_read_utilization,sysmem_write_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,0.774578,0.379772,1.839418,1.741966,1.878211e+03,0.814055,0.586253,0.581444,0.001604,0.390164,...,0.3,0.000000,0,0.1,0.0,0.0,0.1,3.440007,0.002870,0.000000
1,0.995241,0.945823,1.038429,1.038845,9.864706e+01,1.000000,1.000000,0.979783,0.000257,0.218233,...,0.3,0.000000,0,0.1,0.0,0.0,0.1,1.127109,0.000000,0.000000
2,0.995049,0.904622,0.216420,0.216561,2.062500e+01,1.000000,0.970644,0.970455,0.000393,0.048892,...,0.3,0.000000,0,0.1,0.0,0.0,0.1,0.219377,0.000000,0.000000
3,0.776216,0.496326,0.193890,0.201860,8.750000e+01,1.000000,1.000000,0.962367,0.037633,0.042666,...,0.1,0.000000,0,0.1,0.0,0.0,0.1,0.210305,0.000000,0.000000
4,0.984878,0.166630,0.419791,0.419742,3.162667e+04,0.999996,0.712160,0.709834,0.000088,0.096177,...,0.1,2.979301,136176,0.1,0.0,0.0,0.1,0.401653,0.000000,0.000000
5,0.167060,0.015625,0.120050,0.120064,3.223507e+07,1.000000,0.031250,0.031250,0.000000,0.028515,...,0.1,0.000000,0,0.1,0.0,0.0,0.1,0.114061,0.000000,0.000000
6,0.808125,0.724293,0.227263,0.238280,3.191880e+01,1.000000,0.999523,0.905853,0.025706,0.059570,...,0.2,0.000000,0,0.1,0.0,0.0,0.1,0.305385,0.000000,0.000000
7,0.748081,0.784142,0.460054,0.496689,3.291453e+01,1.000000,0.999513,0.938904,0.024929,0.124172,...,0.3,0.000000,0,0.1,0.0,0.0,0.1,0.669309,0.000000,0.000000
8,0.984699,0.936917,0.428544,0.432904,3.099577e+01,1.000000,0.999985,0.935468,0.001163,0.108226,...,0.5,0.000000,0,0.1,0.0,0.0,0.1,0.623174,0.000000,0.000000
9,0.697280,0.519168,0.161881,0.167549,2.669375e+01,1.000000,0.999181,0.925135,0.074924,0.041887,...,0.2,0.000000,0,0.1,0.0,0.0,0.1,0.210813,0.000000,0.000000


In [13]:
df_featureMat_cp.values

array([[  7.74578200e-01,   3.79772000e-01,   1.83941800e+00, ...,
          3.44000700e+00,   2.86991000e-03,   0.00000000e+00],
       [  9.95241010e-01,   9.45823000e-01,   1.03842900e+00, ...,
          1.12710900e+00,   0.00000000e+00,   0.00000000e+00],
       [  9.95048990e-01,   9.04622000e-01,   2.16420000e-01, ...,
          2.19377000e-01,   0.00000000e+00,   0.00000000e+00],
       ..., 
       [  9.14324520e-01,   4.62286000e-01,   1.32895200e+00, ...,
          1.92675900e+00,   0.00000000e+00,   0.00000000e+00],
       [  9.35530860e-01,   1.82641000e-01,   9.32989000e-01, ...,
          8.95934000e-01,   0.00000000e+00,   0.00000000e+00],
       [  9.97601820e-01,   3.07152000e-01,   2.97309900e+00, ...,
          4.03422500e+00,   2.54352010e-01,   0.00000000e+00]])

In [14]:
featureMatColumns = df_featureMat_cp.columns
print featureMatColumns

Index([u'sm_efficiency', u'achieved_occupancy', u'ipc', u'issued_ipc',
       u'inst_per_warp', u'branch_efficiency', u'warp_execution_efficiency',
       u'warp_nonpred_execution_efficiency', u'inst_replay_overhead',
       u'issue_slot_utilization',
       ...
       u'l2_utilization', u'l2_atomic_throughput', u'l2_atomic_transactions',
       u'sysmem_utilization', u'ecc_throughput', u'sysmem_read_utilization',
       u'sysmem_write_utilization', u'eligible_warps_per_cycle',
       u'flop_sp_efficiency', u'flop_dp_efficiency'],
      dtype='object', length=113)


In [15]:
featureMatColumns[0]

'sm_efficiency'

In [16]:
X = df_featureMat_cp.values
p = 0.8
vt = VarianceThreshold(threshold=(p * (1 - p)))

vt.fit(X)

# get the indices of the features that are being kept
feature_indices = vt.get_support(indices=True)

In [17]:
feature_indices

array([  2,   3,   4,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
        20,  21,  22,  23,  26,  27,  28,  29,  30,  31,  32,  33,  34,
        35,  36,  39,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
        56,  59,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,
        72,  73,  74,  75,  88,  89,  94,  95,  96,  97,  98,  99, 100,
       104, 105, 110])

In [19]:
print len(feature_indices)

68


In [18]:
# remove low-variance columns from index
feature_names = [featureMatColumns[idx] for idx, _ in enumerate(featureMatColumns) if idx in feature_indices]
print feature_names

['ipc', 'issued_ipc', 'inst_per_warp', 'shared_load_transactions_per_request', 'shared_store_transactions_per_request', 'local_load_transactions_per_request', 'local_store_transactions_per_request', 'gld_transactions_per_request', 'gst_transactions_per_request', 'shared_store_transactions', 'shared_load_transactions', 'local_load_transactions', 'local_store_transactions', 'gld_transactions', 'gst_transactions', 'dram_read_transactions', 'dram_write_transactions', 'gld_requested_throughput', 'gst_requested_throughput', 'gld_throughput', 'gst_throughput', 'dram_read_throughput', 'dram_write_throughput', 'tex_cache_throughput', 'local_load_throughput', 'local_store_throughput', 'shared_load_throughput', 'shared_store_throughput', 'tex_cache_transactions', 'flop_count_dp', 'flop_count_dp_add', 'flop_count_dp_fma', 'flop_count_dp_mul', 'flop_count_sp', 'flop_count_sp_add', 'flop_count_sp_fma', 'flop_count_sp_mul', 'flop_count_sp_special', 'inst_executed', 'inst_issued', 'shared_efficiency',

In [18]:
selectec_columns = ['KernelName']         
selectec_columns.extend(feature_names)
print selectec_columns

['KernelName', 'ipc', 'issued_ipc', 'inst_per_warp', 'shared_load_transactions_per_request', 'shared_store_transactions_per_request', 'local_load_transactions_per_request', 'local_store_transactions_per_request', 'gld_transactions_per_request', 'gst_transactions_per_request', 'shared_store_transactions', 'shared_load_transactions', 'local_load_transactions', 'local_store_transactions', 'gld_transactions', 'gst_transactions', 'dram_read_transactions', 'dram_write_transactions', 'gld_requested_throughput', 'gst_requested_throughput', 'gld_throughput', 'gst_throughput', 'dram_read_throughput', 'dram_write_throughput', 'tex_cache_throughput', 'local_load_throughput', 'local_store_throughput', 'shared_load_throughput', 'shared_store_throughput', 'tex_cache_transactions', 'flop_count_dp', 'flop_count_dp_add', 'flop_count_dp_fma', 'flop_count_dp_mul', 'flop_count_sp', 'flop_count_sp_add', 'flop_count_sp_fma', 'flop_count_sp_mul', 'flop_count_sp_special', 'inst_executed', 'inst_issued', 'share

In [19]:
df_featureMat = df_featureMat[selectec_columns]

In [20]:
df_featureMat

Unnamed: 0,KernelName,ipc,issued_ipc,inst_per_warp,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,gld_transactions_per_request,gst_transactions_per_request,...,l2_tex_write_hit_rate,l2_tex_read_throughput,l2_tex_write_throughput,l2_tex_read_transactions,l2_tex_write_transactions,l2_read_throughput,l2_write_throughput,l2_atomic_throughput,l2_atomic_transactions,eligible_warps_per_cycle
0,[[ boxFilterNPP ]] void ForEachTupleByteQuad<u...,1.839418,1.741966,1.878211e+03,0.000000,0.000000,0.000000,0.00000,3.687500,8.000000,...,0.830078,56.411140,5.014241,92160,8192,58.012083,5.017966,0.000000,0,3.440007
1,"[[ scan ]] scanExclusiveShared(uint4*, uint4*,...",1.038429,1.038845,9.864706e+01,1.294118,1.294118,0.000000,0.00000,16.000000,16.000000,...,0.000000,43.576583,43.576583,851968,851968,43.580309,43.576583,0.000000,0,1.127109
2,"[[ scan ]] uniformUpdate(uint4*, unsigned int*)",0.216420,0.216561,2.062500e+01,1.000000,1.000000,0.000000,0.00000,14.333333,16.000000,...,1.000000,43.876469,43.536536,858624,851968,43.879263,43.536536,0.000000,0,0.219377
3,"[[ scan ]] scanExclusiveShared2(unsigned int*,...",0.193890,0.201860,8.750000e+01,1.500000,1.500000,0.000000,0.00000,32.000000,4.000000,...,0.000000,31.416304,1.962297,13312,832,31.588599,1.977198,0.000000,0,0.210305
4,"[[ c++11_cuda ]] xyzw_frequency(int*, char*, int)",0.419791,0.419742,3.162667e+04,0.000000,0.000000,0.000000,0.00000,3.999980,0.000000,...,0.000000,4.408881,0.000000,100735,0,7.390045,2.980232,2.979301,136176,0.401653
5,[[ c++11_cuda ]] xyzw_frequency_thrust_device(...,0.120050,0.120064,3.223507e+07,0.000000,0.000000,0.000000,0.00000,1.000000,1.000000,...,0.000000,0.533104,0.000000,3223503,1,0.533104,0.000000,0.000000,0,0.114061
6,[[ cuSolverSp_LinearSolver ]] void pegasus_sca...,0.227263,0.238280,3.191880e+01,0.000000,0.000000,0.000000,0.00000,7.993562,18.174893,...,0.203672,19.319355,29.284507,11175,16939,19.387342,29.295683,0.000000,0,0.305385
7,[[ cuSolverSp_LinearSolver ]] void pegasus_sym...,0.460054,0.496689,3.291453e+01,0.000000,0.000000,0.000000,0.00000,12.968884,7.993562,...,0.000000,54.871663,24.532899,16663,7450,55.000186,24.552457,0.000000,0,0.669309
8,[[ cuSolverSp_LinearSolver ]] void gather_core...,0.428544,0.432904,3.099577e+01,0.000000,0.000000,0.000000,0.00000,19.994624,7.999812,...,0.000000,127.397478,28.457493,762776,170388,127.414241,28.458424,0.000000,0,0.623174
9,[[ cuSolverSp_LinearSolver ]] void chol_check_...,0.161881,0.167549,2.669375e+01,0.000000,0.000000,0.000000,0.00000,20.964856,0.000000,...,0.000000,43.473206,0.000000,11562,0,43.620355,0.022888,0.000000,0,0.210813


### normalize each feature column

In [23]:
columns_after_sel = df_featureMat.columns

In [24]:
cols2norm_full = ['ipc',
'issued_ipc',
'inst_per_warp',
'inst_replay_overhead',
'shared_load_transactions_per_request',
'shared_store_transactions_per_request',
'local_load_transactions_per_request',
'local_store_transactions_per_request',
'gld_transactions_per_request',
'gst_transactions_per_request',
'shared_store_transactions',
'shared_load_transactions',
'local_load_transactions',
'local_store_transactions',
'gld_transactions',
'gst_transactions',
'dram_read_transactions',
'dram_write_transactions',
'gld_requested_throughput',
'gst_requested_throughput',
'gld_throughput',
'gst_throughput',
'dram_read_throughput',
'dram_write_throughput',
'tex_cache_throughput',
'local_load_throughput',
'local_store_throughput',
'shared_load_throughput',
'shared_store_throughput',
'tex_cache_transactions',
'flop_count_dp',
'flop_count_dp_add',
'flop_count_dp_fma',
'flop_count_dp_mul',
'flop_count_sp',
'flop_count_sp_add',
'flop_count_sp_fma',
'flop_count_sp_mul',
'flop_count_sp_special',
'inst_executed',
'inst_issued',
'inst_fp_32',
'inst_fp_64',
'inst_integer',
'inst_bit_convert',
'inst_control',
'inst_compute_ld_st',
'inst_misc',
'inst_inter_thread_communication',
'issue_slots',
'cf_issued',
'cf_executed',
'ldst_issued',
'ldst_executed',
'atomic_transactions',
'atomic_transactions_per_request',
'sysmem_read_transactions',
'sysmem_write_transactions',
'l2_read_transactions',
'l2_write_transactions',
'ecc_transactions',
'l2_tex_read_throughput',
'l2_tex_write_throughput',
'l2_tex_read_transactions',
'l2_tex_write_transactions',
'l2_read_throughput',
'l2_write_throughput',
'sysmem_read_throughput',
'sysmem_write_throughput',
'l2_atomic_throughput',
'l2_atomic_transactions',
'ecc_throughput',
'eligible_warps_per_cycle']


colsMaxOne_full = ['sm_efficiency',
'achieved_occupancy',
'branch_efficiency',
'warp_execution_efficiency',
'warp_nonpred_execution_efficiency',
'issue_slot_utilization',
'global_hit_rate',
'local_hit_rate',
'gld_efficiency',
'gst_efficiency',
'cf_fu_utilization',
'tex_fu_utilization',
'ldst_fu_utilization',
'double_precision_fu_utilization',
'special_fu_utilization',
'single_precision_fu_utilization',
'dram_utilization',
'tex_utilization',
'shared_efficiency',
'shared_utilization',
'stall_inst_fetch',
'stall_exec_dependency',
'stall_memory_dependency',
'stall_texture',
'stall_sync',
'stall_other',
'stall_constant_memory_dependency',
'stall_pipe_busy',
'stall_memory_throttle',
'stall_not_selected',
'local_memory_overhead',
'tex_cache_hit_rate',
'l2_tex_read_hit_rate',
'l2_tex_write_hit_rate',
'l2_utilization',
'sysmem_utilization',
'sysmem_read_utilization',
'sysmem_write_utilization',
'flop_sp_efficiency',
'flop_dp_efficiency']


In [None]:
#
# TODO: save the scaling factors for each metric column
#

In [25]:
#
# update selected columns
#
cols2norm = [metric for metric in cols2norm_full if metric in columns_after_sel]
#print cols2norm

colsMaxOne = [metric for metric in colsMaxOne_full if metric in columns_after_sel]
print colsMaxOne

['shared_efficiency', 'l2_tex_write_hit_rate']


In [26]:
metricsMax_dd = {}
for metric in colsMaxOne:
        metricsMax_dd[metric] = 1.0 

    
maxcol = df_featureMat[cols2norm].max()
#print maxcol

mincol = df_featureMat[cols2norm].min()

# feature scaling
for metric in cols2norm:
    maxV = float(maxcol[metric])
    minV = float(mincol[metric])
    
    if maxV == 0: maxV = 1e-6  #if it is zero, raise floor by 1e-6

    if maxV == minV: maxV += 1e-6
    
    metricsMax_dd[metric] = maxV
        
    #
    # option 2: feature scaling
    #
    df_featureMat[metric] = df_featureMat[metric].apply(lambda x: (x - minV) / (maxV - minV))
    

# print type(maxcol)
# #print type(float(maxcol)) # error
# print maxcol.loc['ipc']
# print type(maxcol.loc['ipc'])
# print maxcol.loc['issued_ipc']

In [27]:
#maxcol
#mincol
metricsMax_dd

{'atomic_transactions': 83039.0,
 'atomic_transactions_per_request': 5.349603,
 'cf_executed': 220609193.0,
 'cf_issued': 220609193.0,
 'dram_read_throughput': 82.921237,
 'dram_read_transactions': 30866749.0,
 'dram_write_throughput': 88.832341,
 'dram_write_transactions': 6655938.0,
 'eligible_warps_per_cycle': 10.155316000000001,
 'flop_count_dp': 939702716.0,
 'flop_count_dp_add': 94379405.0,
 'flop_count_dp_fma': 404701375.0,
 'flop_count_dp_mul': 66591523.0,
 'flop_count_sp': 6511611904.0,
 'flop_count_sp_add': 1086197760.0,
 'flop_count_sp_fma': 2170536960.0,
 'flop_count_sp_mul': 2170537984.0,
 'flop_count_sp_special': 56186056.0,
 'gld_requested_throughput': 147.828832,
 'gld_throughput': 283.836387,
 'gld_transactions': 1310066566.0,
 'gld_transactions_per_request': 32.0,
 'gst_requested_throughput': 90.070069,
 'gst_throughput': 112.817623,
 'gst_transactions': 56802642.0,
 'gst_transactions_per_request': 32.0,
 'inst_bit_convert': 70975488.0,
 'inst_compute_ld_st': 21237254

In [28]:
print len(metricsMax_dd)

68


In [50]:
# save dd to csv
import csv

with open('metricsColumnMax.csv', 'wb') as f:
    w = csv.DictWriter(f, metricsMax_dd.keys())
    w.writeheader()
    w.writerow(metricsMax_dd)

In [51]:
df_featureMat

Unnamed: 0,KernelName,ipc,issued_ipc,inst_per_warp,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,gld_transactions_per_request,gst_transactions_per_request,...,l2_tex_write_hit_rate,l2_tex_read_throughput,l2_tex_write_throughput,l2_tex_read_transactions,l2_tex_write_transactions,l2_read_throughput,l2_write_throughput,l2_atomic_throughput,l2_atomic_transactions,eligible_warps_per_cycle
0,[[ boxFilterNPP ]] void ForEachTupleByteQuad<u...,0.495440,0.468416,5.820403e-05,0.000000,0.000000,0.000000,0.000000,0.115234,0.250000,...,0.830078,0.322555,0.032929,1.741527e-04,1.440158e-04,0.331667,0.032954,0.000000,0.000000,0.338107
1,"[[ scan ]] scanExclusiveShared(uint4*, uint4*,...",0.278742,0.278362,2.998196e-06,0.013673,0.027061,0.000000,0.000000,0.500000,0.500000,...,0.000000,0.249168,0.286172,1.609945e-03,1.497764e-02,0.249157,0.286172,0.000000,0.000000,0.110136
2,"[[ scan ]] uniformUpdate(uint4*, unsigned int*)",0.056357,0.056100,5.777868e-07,0.010566,0.020910,0.000000,0.000000,0.447917,0.500000,...,1.000000,0.250883,0.285909,1.622522e-03,1.497764e-02,0.250867,0.285909,0.000000,0.000000,0.020666
3,"[[ scan ]] scanExclusiveShared2(unsigned int*,...",0.050262,0.052126,2.652391e-06,0.015849,0.031366,0.000000,0.000000,1.000000,0.125000,...,0.000000,0.179636,0.012887,2.515539e-05,1.462661e-05,0.180598,0.012984,0.000000,0.000000,0.019772
4,"[[ c++11_cuda ]] xyzw_frequency(int*, char*, int)",0.111377,0.111019,9.810641e-04,0.000000,0.000000,0.000000,0.000000,0.124999,0.000000,...,0.000000,0.025210,0.000000,1.903567e-04,0.000000e+00,0.042250,0.019572,0.079732,0.692690,0.038632
5,[[ c++11_cuda ]] xyzw_frequency_thrust_device(...,0.030285,0.030016,1.000000e+00,0.000000,0.000000,0.000000,0.000000,0.031250,0.031250,...,0.000000,0.003048,0.000000,6.091381e-03,1.758006e-08,0.003048,0.000000,0.000000,0.000000,0.010285
6,[[ cuSolverSp_LinearSolver ]] void pegasus_sca...,0.059291,0.061970,9.281444e-07,0.000000,0.000000,0.000000,0.000000,0.249799,0.567965,...,0.203672,0.110467,0.192315,2.111715e-05,2.977886e-04,0.110841,0.192388,0.000000,0.000000,0.029143
7,[[ cuSolverSp_LinearSolver ]] void pegasus_sym...,0.122269,0.131818,9.590340e-07,0.000000,0.000000,0.000000,0.000000,0.405278,0.249799,...,0.000000,0.313752,0.161110,3.148769e-05,1.309714e-04,0.314447,0.161239,0.000000,0.000000,0.065013
8,[[ cuSolverSp_LinearSolver ]] void gather_core...,0.113745,0.114577,8.995102e-07,0.000000,0.000000,0.000000,0.000000,0.624832,0.249994,...,0.000000,0.728450,0.186883,1.441401e-03,2.995430e-03,0.728453,0.186890,0.000000,0.000000,0.060466
9,[[ cuSolverSp_LinearSolver ]] void chol_check_...,0.041602,0.042852,7.660523e-07,0.000000,0.000000,0.000000,0.000000,0.655152,0.000000,...,0.000000,0.248577,0.000000,2.184845e-05,0.000000e+00,0.249386,0.000150,0.000000,0.000000,0.019822


In [52]:
df_featureMat.shape

(287, 69)

### dataframe to matrix

In [61]:
columns_after_sel_list = [col for col in columns_after_sel if col <> 'KernelName']
#print columns_after_sel_list

In [62]:
FeatureMat = df_featureMat.loc[:,columns_after_sel_list].as_matrix()

In [63]:
df_featureMat.loc[:,columns_after_sel_list]

Unnamed: 0,ipc,issued_ipc,inst_per_warp,shared_load_transactions_per_request,shared_store_transactions_per_request,local_load_transactions_per_request,local_store_transactions_per_request,gld_transactions_per_request,gst_transactions_per_request,shared_store_transactions,...,l2_tex_write_hit_rate,l2_tex_read_throughput,l2_tex_write_throughput,l2_tex_read_transactions,l2_tex_write_transactions,l2_read_throughput,l2_write_throughput,l2_atomic_throughput,l2_atomic_transactions,eligible_warps_per_cycle
0,0.495440,0.468416,5.820403e-05,0.000000,0.000000,0.000000,0.000000,0.115234,0.250000,0.000000e+00,...,0.830078,0.322555,0.032929,1.741527e-04,1.440158e-04,0.331667,0.032954,0.000000,0.000000,0.338107
1,0.278742,0.278362,2.998196e-06,0.013673,0.027061,0.000000,0.000000,0.500000,0.500000,2.012189e-02,...,0.000000,0.249168,0.286172,1.609945e-03,1.497764e-02,0.249157,0.286172,0.000000,0.000000,0.110136
2,0.056357,0.056100,5.777868e-07,0.010566,0.020910,0.000000,0.000000,0.447917,0.500000,2.776561e-04,...,1.000000,0.250883,0.285909,1.622522e-03,1.497764e-02,0.250867,0.285909,0.000000,0.000000,0.020666
3,0.050262,0.052126,2.652391e-06,0.015849,0.031366,0.000000,0.000000,1.000000,0.125000,7.592160e-05,...,0.000000,0.179636,0.012887,2.515539e-05,1.462661e-05,0.180598,0.012984,0.000000,0.000000,0.019772
4,0.111377,0.111019,9.810641e-04,0.000000,0.000000,0.000000,0.000000,0.124999,0.000000,0.000000e+00,...,0.000000,0.025210,0.000000,1.903567e-04,0.000000e+00,0.042250,0.019572,0.079732,0.692690,0.038632
5,0.030285,0.030016,1.000000e+00,0.000000,0.000000,0.000000,0.000000,0.031250,0.031250,0.000000e+00,...,0.000000,0.003048,0.000000,6.091381e-03,1.758006e-08,0.003048,0.000000,0.000000,0.000000,0.010285
6,0.059291,0.061970,9.281444e-07,0.000000,0.000000,0.000000,0.000000,0.249799,0.567965,0.000000e+00,...,0.203672,0.110467,0.192315,2.111715e-05,2.977886e-04,0.110841,0.192388,0.000000,0.000000,0.029143
7,0.122269,0.131818,9.590340e-07,0.000000,0.000000,0.000000,0.000000,0.405278,0.249799,0.000000e+00,...,0.000000,0.313752,0.161110,3.148769e-05,1.309714e-04,0.314447,0.161239,0.000000,0.000000,0.065013
8,0.113745,0.114577,8.995102e-07,0.000000,0.000000,0.000000,0.000000,0.624832,0.249994,0.000000e+00,...,0.000000,0.728450,0.186883,1.441401e-03,2.995430e-03,0.728453,0.186890,0.000000,0.000000,0.060466
9,0.041602,0.042852,7.660523e-07,0.000000,0.000000,0.000000,0.000000,0.655152,0.000000,0.000000e+00,...,0.000000,0.248577,0.000000,2.184845e-05,0.000000e+00,0.249386,0.000150,0.000000,0.000000,0.019822


In [69]:
df_featureMat.shape

(287, 69)

In [66]:
# FeatureMat
# print type(FeatureMat)

# print df_featureMat.loc[0, 'ipc']
# print FeatureMat[0][0]

In [67]:
#
# save dataframe to csv
#
df_featureMat.to_csv("featureSelection_cudasdk.csv", index=False, encoding='utf-8')

### run principal feature analysis

In [68]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from collections import defaultdict
from sklearn.metrics.pairwise import euclidean_distances

In [77]:
class PFA(object):
    def __init__(self, n_features, q=None):
        self.q = q
        self.n_features = n_features

    def fit(self, X):
        if not self.q:
            self.q = X.shape[1] 

#         print self.q
        
        pca = PCA(n_components=self.q).fit(X)
#         pca = PCA(n_components=30).fit(X)
        
#         print pca.components_.shape
        
#         print pca.explained_variance_.shape
#         print pca.explained_variance_
        
#         print pca.explained_variance_ratio_.shape
#         print pca.explained_variance_ratio_
        print sum(pca.explained_variance_ratio_)
        
        A_q = pca.components_.T

        kmeans = KMeans(n_clusters=self.n_features).fit(A_q)
        clusters = kmeans.predict(A_q)
        cluster_centers = kmeans.cluster_centers_

        dists = defaultdict(list)
        for i, c in enumerate(clusters):
            dist = euclidean_distances(A_q[i, :], cluster_centers[c, :])[0][0]
            dists[c].append((i, dist))

        self.indices_ = [sorted(f, key=lambda x: x[1])[0][0] for f in dists.values()]
        self.features_ = X[:, self.indices_]
        
        
pfa = PFA(n_features=30)
X = FeatureMat
pfa.fit(X)

1.0




In [78]:
# To get the transformed matrix
X = pfa.features_


# To get the column indices of the kept features
column_indices = pfa.indices_
print column_indices

for idx in column_indices:
    print MaxwellMetrics[idx]

[9, 43, 30, 33, 55, 38, 22, 17, 13, 24, 40, 1, 45, 44, 67, 49, 34, 12, 20, 58, 56, 2, 52, 19, 0, 66, 32, 29, 27, 62]
issue_slot_utilization
double_precision_fu_utilization
dram_read_throughput
local_load_throughput
inst_executed
gst_efficiency
dram_read_transactions
shared_load_transactions
local_store_transactions_per_request
global_hit_rate
cf_fu_utilization
achieved_occupancy
single_precision_fu_utilization
special_fu_utilization
inst_misc
flop_count_dp_mul
local_store_throughput
local_load_transactions_per_request
gld_transactions
tex_utilization
inst_issued
ipc
flop_count_sp_fma
local_store_transactions
sm_efficiency
inst_compute_ld_st
tex_cache_throughput
gst_throughput
gst_requested_throughput
inst_fp_64
