In [1]:
# from __future__ import print_function

%load_ext autoreload
%autoreload 2


import os
import sys # error msg, add the modules
sys.path.append('../pycode')

import json
import yaml
import numpy as np
import pandas as pd
from sklearn.feature_selection import VarianceThreshold

from magus import read_trace,adjust_metric


### read metrics

In [2]:
metricsFolder = 'metrics_sdk80'
#
# read metrics
#
with open('maxwell_metrics.json', 'r') as metricsFile:
     metricsAll = yaml.safe_load(metricsFile)

MaxwellMetrics = metricsAll['maxwell']

featureDim = len(MaxwellMetrics)
print("Metrics on Maxwell GPUs : {}".format(featureDim))

appTrace = os.listdir(metricsFolder)
# print len(appTrace)
# print appTrace


#
# read each file
#
app_kernel_metrics_dd = {}
for currentFile in appTrace:
    # read the csv files in the metrics folder
    file_csv = metricsFolder + '/' + currentFile 
    appName = currentFile[:-12]
    
    df_app = read_trace(file_csv)
    
    kernelList = df_app['Kernel'].unique()
    
    #print kernelList
    
    kernel_metrics_dd = {}
    for kernName in kernelList:
        df_kern = df_app.loc[df_app['Kernel'] == kernName]
        #print kernName
        
        # go through each metrics and read the avg value
        metrics_dd = {}
        for index, rows in df_kern.iterrows():
            local_metric_name = rows['Metric Name']
            local_metric_value = rows['Avg']
            
            adjustedV = adjust_metric(local_metric_name, local_metric_value)
            metrics_dd[local_metric_name] = adjustedV
            #break
            
        # update 
        kernel_metrics_dd[kernName] = metrics_dd
        #break
        
    # update
    app_kernel_metrics_dd[appName] = kernel_metrics_dd
    #break
    
    

#
# check
#
print("applications : {}".format(len(app_kernel_metrics_dd)))

totalKernNum = 0
for app, kern_metrics_dd in app_kernel_metrics_dd.iteritems():
    #print len(kern_metrics_dd)
    totalKernNum += len(kern_metrics_dd)

print("kernels : {}".format(str(totalKernNum)))

Metrics on Maxwell GPUs : 113
applications : 57
kernels : 287


In [3]:
app_kernel_metrics_dd['scan']

{'scanExclusiveShared(uint4*, uint4*, unsigned int)': {'achieved_occupancy': '0.945823',
  'atomic_transactions': '0',
  'atomic_transactions_per_request': '0.000000',
  'branch_efficiency': 1.0,
  'cf_executed': '419719',
  'cf_fu_utilization': 0.1,
  'cf_issued': '419719',
  'double_precision_fu_utilization': 0.0,
  'dram_read_throughput': 43.577515,
  'dram_read_transactions': '851992',
  'dram_utilization': 0.9,
  'dram_write_throughput': 43.513253,
  'dram_write_transactions': '850733',
  'ecc_throughput': 0.0,
  'ecc_transactions': '0',
  'eligible_warps_per_cycle': '1.127109',
  'flop_count_dp': '0',
  'flop_count_dp_add': '0',
  'flop_count_dp_fma': '0',
  'flop_count_dp_mul': '0',
  'flop_count_sp': '0',
  'flop_count_sp_add': '0',
  'flop_count_sp_fma': '0',
  'flop_count_sp_mul': '0',
  'flop_count_sp_special': '0',
  'flop_dp_efficiency': 0.0,
  'flop_sp_efficiency': 0.0,
  'gld_efficiency': 1.0,
  'gld_requested_throughput': 43.576583,
  'gld_throughput': 43.576583,
  'gld

# set up feature matrix

In [4]:
featMatCols = ['KernelName']         
featMatCols.extend(MaxwellMetrics)

#
#  totalKernNum  x featureDim
#
df_featureMat = pd.DataFrame(index=np.arange(0, totalKernNum), columns=featMatCols)
print df_featureMat.shape

#
# export data to data frame, so that we can export to csv file easily
#
rowId = 0
for appName, kern_metrics_dd in app_kernel_metrics_dd.iteritems():
    for kernName, metrics_dd in kern_metrics_dd.iteritems():
        app_kern_name = '[[ '+ str(appName) + ' ]] ' + str(kernName)
        
        # add kernel name to the list
        df_featureMat.loc[rowId, 'KernelName'] = app_kern_name

        # add other metrics according to the column order
        for eachMetric in MaxwellMetrics:
            try:                
                df_featureMat.loc[rowId, eachMetric] = metrics_dd[eachMetric]
            except Exception as e:
                print e.message, e.args
                print('ERROR!! App = {}, Kernel={}'.format(appName, kernName))
                sys.exit(0)

        rowId += 1

(287, 114)


In [5]:
df_featureMat

Unnamed: 0,KernelName,sm_efficiency,achieved_occupancy,ipc,issued_ipc,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,...,l2_utilization,l2_atomic_throughput,l2_atomic_transactions,sysmem_utilization,ecc_throughput,sysmem_read_utilization,sysmem_write_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,[[ boxFilterNPP ]] void ForEachTupleByteQuad<u...,0.774578,0.379772,1.839418,1.741966,1878.210938,0.814055,0.586253,0.581444,0.001604,...,0.3,0,0,0.1,0,0,0.1,3.440007,0.00286991,0
1,"[[ scan ]] scanExclusiveShared(uint4*, uint4*,...",0.995241,0.945823,1.038429,1.038845,98.647059,1,1,0.979783,0.000257,...,0.3,0,0,0.1,0,0,0.1,1.127109,0,0
2,"[[ scan ]] uniformUpdate(uint4*, unsigned int*)",0.995049,0.904622,0.216420,0.216561,20.625000,1,0.970644,0.970455,0.000393,...,0.3,0,0,0.1,0,0,0.1,0.219377,0,0
3,"[[ scan ]] scanExclusiveShared2(unsigned int*,...",0.776216,0.496326,0.193890,0.201860,87.500000,1,1,0.962367,0.037633,...,0.1,0,0,0.1,0,0,0.1,0.210305,0,0
4,"[[ c++11_cuda ]] xyzw_frequency(int*, char*, int)",0.984878,0.166630,0.419791,0.419742,31626.671875,0.999996,0.71216,0.709834,0.000088,...,0.1,2.9793,136176,0.1,0,0,0.1,0.401653,0,0
5,[[ c++11_cuda ]] xyzw_frequency_thrust_device(...,0.16706,0.015625,0.120050,0.120064,32235074.000000,1,0.03125,0.03125,0.000000,...,0.1,0,0,0.1,0,0,0.1,0.114061,0,0
6,[[ cuSolverSp_LinearSolver ]] void pegasus_sca...,0.808125,0.724293,0.227263,0.238280,31.918803,1,0.999523,0.905853,0.025706,...,0.2,0,0,0.1,0,0,0.1,0.305385,0,0
7,[[ cuSolverSp_LinearSolver ]] void pegasus_sym...,0.748081,0.784142,0.460054,0.496689,32.914530,1,0.999513,0.938904,0.024929,...,0.3,0,0,0.1,0,0,0.1,0.669309,0,0
8,[[ cuSolverSp_LinearSolver ]] void gather_core...,0.984699,0.936917,0.428544,0.432904,30.995775,1,0.999985,0.935468,0.001163,...,0.5,0,0,0.1,0,0,0.1,0.623174,0,0
9,[[ cuSolverSp_LinearSolver ]] void chol_check_...,0.69728,0.519168,0.161881,0.167549,26.693750,1,0.999181,0.925135,0.074924,...,0.2,0,0,0.1,0,0,0.1,0.210813,0,0


# Feature Scaling

In [7]:
df_featureMat = df_featureMat.convert_objects(convert_numeric=True)


df_featureMat_cp = df_featureMat.copy()
df_featureMat_cp = df_featureMat_cp.drop('KernelName', axis=1)   # remove the 1st column

  if __name__ == '__main__':


In [8]:
df_featureMat_cp.columns

Index([u'sm_efficiency', u'achieved_occupancy', u'ipc', u'issued_ipc',
       u'inst_per_warp', u'branch_efficiency', u'warp_execution_efficiency',
       u'warp_nonpred_execution_efficiency', u'inst_replay_overhead',
       u'issue_slot_utilization',
       ...
       u'l2_utilization', u'l2_atomic_throughput', u'l2_atomic_transactions',
       u'sysmem_utilization', u'ecc_throughput', u'sysmem_read_utilization',
       u'sysmem_write_utilization', u'eligible_warps_per_cycle',
       u'flop_sp_efficiency', u'flop_dp_efficiency'],
      dtype='object', length=113)

In [9]:
df_featureMat_cp

Unnamed: 0,sm_efficiency,achieved_occupancy,ipc,issued_ipc,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,issue_slot_utilization,...,l2_utilization,l2_atomic_throughput,l2_atomic_transactions,sysmem_utilization,ecc_throughput,sysmem_read_utilization,sysmem_write_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,0.774578,0.379772,1.839418,1.741966,1.878211e+03,0.814055,0.586253,0.581444,0.001604,0.390164,...,0.3,0.000000,0,0.1,0.0,0.0,0.1,3.440007,0.002870,0.000000
1,0.995241,0.945823,1.038429,1.038845,9.864706e+01,1.000000,1.000000,0.979783,0.000257,0.218233,...,0.3,0.000000,0,0.1,0.0,0.0,0.1,1.127109,0.000000,0.000000
2,0.995049,0.904622,0.216420,0.216561,2.062500e+01,1.000000,0.970644,0.970455,0.000393,0.048892,...,0.3,0.000000,0,0.1,0.0,0.0,0.1,0.219377,0.000000,0.000000
3,0.776216,0.496326,0.193890,0.201860,8.750000e+01,1.000000,1.000000,0.962367,0.037633,0.042666,...,0.1,0.000000,0,0.1,0.0,0.0,0.1,0.210305,0.000000,0.000000
4,0.984878,0.166630,0.419791,0.419742,3.162667e+04,0.999996,0.712160,0.709834,0.000088,0.096177,...,0.1,2.979301,136176,0.1,0.0,0.0,0.1,0.401653,0.000000,0.000000
5,0.167060,0.015625,0.120050,0.120064,3.223507e+07,1.000000,0.031250,0.031250,0.000000,0.028515,...,0.1,0.000000,0,0.1,0.0,0.0,0.1,0.114061,0.000000,0.000000
6,0.808125,0.724293,0.227263,0.238280,3.191880e+01,1.000000,0.999523,0.905853,0.025706,0.059570,...,0.2,0.000000,0,0.1,0.0,0.0,0.1,0.305385,0.000000,0.000000
7,0.748081,0.784142,0.460054,0.496689,3.291453e+01,1.000000,0.999513,0.938904,0.024929,0.124172,...,0.3,0.000000,0,0.1,0.0,0.0,0.1,0.669309,0.000000,0.000000
8,0.984699,0.936917,0.428544,0.432904,3.099577e+01,1.000000,0.999985,0.935468,0.001163,0.108226,...,0.5,0.000000,0,0.1,0.0,0.0,0.1,0.623174,0.000000,0.000000
9,0.697280,0.519168,0.161881,0.167549,2.669375e+01,1.000000,0.999181,0.925135,0.074924,0.041887,...,0.2,0.000000,0,0.1,0.0,0.0,0.1,0.210813,0.000000,0.000000


In [10]:
from sklearn import preprocessing

scaler = preprocessing.MinMaxScaler()

scaled_df = scaler.fit_transform(df_featureMat_cp)

In [11]:
scaled_df = pd.DataFrame(scaled_df, columns=df_featureMat_cp.columns)

In [12]:
scaled_df

Unnamed: 0,sm_efficiency,achieved_occupancy,ipc,issued_ipc,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,issue_slot_utilization,...,l2_utilization,l2_atomic_throughput,l2_atomic_transactions,sysmem_utilization,ecc_throughput,sysmem_read_utilization,sysmem_write_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,0.768912,0.371959,0.495440,0.468416,5.820403e-05,0.312041,0.572906,5.679426e-01,0.003609,0.459895,...,0.333333,0.000000,0.000000,0.0,0.0,0.0,0.0,0.338107,0.005258,0.000000
1,0.996392,0.950113,0.278742,0.278362,2.998196e-06,1.000000,1.000000,9.791313e-01,0.000578,0.256059,...,0.333333,0.000000,0.000000,0.0,0.0,0.0,0.0,0.110136,0.000000,0.000000
2,0.996194,0.908031,0.056357,0.056100,5.777868e-07,1.000000,0.969697,9.695015e-01,0.000884,0.055293,...,0.333333,0.000000,0.000000,0.0,0.0,0.0,0.0,0.020666,0.000000,0.000000
3,0.770601,0.491005,0.050262,0.052126,2.652391e-06,1.000000,1.000000,9.611530e-01,0.084674,0.047912,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.019772,0.000000,0.000000
4,0.985709,0.154259,0.111377,0.111019,9.810641e-04,0.999986,0.702875,7.004741e-01,0.000198,0.111353,...,0.000000,0.079732,0.692690,0.0,0.0,0.0,0.0,0.038632,0.000000,0.000000
5,0.142626,0.000026,0.030285,0.030016,1.000000e+00,1.000000,0.000000,3.096774e-08,0.000000,0.031135,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.010285,0.000000,0.000000
6,0.803495,0.723846,0.059291,0.061970,9.281444e-07,1.000000,0.999508,9.028155e-01,0.057839,0.067952,...,0.166667,0.000000,0.000000,0.0,0.0,0.0,0.0,0.029143,0.000000,0.000000
7,0.741597,0.784975,0.122269,0.131818,9.590340e-07,1.000000,0.999497,9.369330e-01,0.056090,0.144543,...,0.333333,0.000000,0.000000,0.0,0.0,0.0,0.0,0.065013,0.000000,0.000000
8,0.985524,0.941016,0.113745,0.114577,8.995102e-07,1.000000,0.999984,9.333865e-01,0.002617,0.125638,...,0.666667,0.000000,0.000000,0.0,0.0,0.0,0.0,0.060466,0.000000,0.000000
9,0.689226,0.514335,0.041602,0.042852,7.660523e-07,1.000000,0.999154,9.227196e-01,0.168579,0.046988,...,0.166667,0.000000,0.000000,0.0,0.0,0.0,0.0,0.019822,0.000000,0.000000


### save scaling factors: min / max for each column

In [13]:
print len(scaler.data_min_)
print len(scaler.data_max_)
print scaler.data_min_[0]
print scaler.data_max_[0]

113
113
0.02870774
0.99874103


In [14]:
# save dd to json file
import json
import codecs

In [15]:
metrics_scale_dd = {}
index = 0
for col_name in df_featureMat_cp.columns:
    #print col_name
    metrics_scale_dd[col_name] = [scaler.data_min_[index], scaler.data_max_[index]]
    index += 1

#print metrics_scale_dd

with codecs.open('metrics_scale.json', 'w', encoding='utf-8') as outfile:
    json.dump(metrics_scale_dd, outfile, ensure_ascii=False)

# remove low variance features

In [16]:
scaled_df

Unnamed: 0,sm_efficiency,achieved_occupancy,ipc,issued_ipc,inst_per_warp,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,issue_slot_utilization,...,l2_utilization,l2_atomic_throughput,l2_atomic_transactions,sysmem_utilization,ecc_throughput,sysmem_read_utilization,sysmem_write_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,0.768912,0.371959,0.495440,0.468416,5.820403e-05,0.312041,0.572906,5.679426e-01,0.003609,0.459895,...,0.333333,0.000000,0.000000,0.0,0.0,0.0,0.0,0.338107,0.005258,0.000000
1,0.996392,0.950113,0.278742,0.278362,2.998196e-06,1.000000,1.000000,9.791313e-01,0.000578,0.256059,...,0.333333,0.000000,0.000000,0.0,0.0,0.0,0.0,0.110136,0.000000,0.000000
2,0.996194,0.908031,0.056357,0.056100,5.777868e-07,1.000000,0.969697,9.695015e-01,0.000884,0.055293,...,0.333333,0.000000,0.000000,0.0,0.0,0.0,0.0,0.020666,0.000000,0.000000
3,0.770601,0.491005,0.050262,0.052126,2.652391e-06,1.000000,1.000000,9.611530e-01,0.084674,0.047912,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.019772,0.000000,0.000000
4,0.985709,0.154259,0.111377,0.111019,9.810641e-04,0.999986,0.702875,7.004741e-01,0.000198,0.111353,...,0.000000,0.079732,0.692690,0.0,0.0,0.0,0.0,0.038632,0.000000,0.000000
5,0.142626,0.000026,0.030285,0.030016,1.000000e+00,1.000000,0.000000,3.096774e-08,0.000000,0.031135,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.010285,0.000000,0.000000
6,0.803495,0.723846,0.059291,0.061970,9.281444e-07,1.000000,0.999508,9.028155e-01,0.057839,0.067952,...,0.166667,0.000000,0.000000,0.0,0.0,0.0,0.0,0.029143,0.000000,0.000000
7,0.741597,0.784975,0.122269,0.131818,9.590340e-07,1.000000,0.999497,9.369330e-01,0.056090,0.144543,...,0.333333,0.000000,0.000000,0.0,0.0,0.0,0.0,0.065013,0.000000,0.000000
8,0.985524,0.941016,0.113745,0.114577,8.995102e-07,1.000000,0.999984,9.333865e-01,0.002617,0.125638,...,0.666667,0.000000,0.000000,0.0,0.0,0.0,0.0,0.060466,0.000000,0.000000
9,0.689226,0.514335,0.041602,0.042852,7.660523e-07,1.000000,0.999154,9.227196e-01,0.168579,0.046988,...,0.166667,0.000000,0.000000,0.0,0.0,0.0,0.0,0.019822,0.000000,0.000000


In [17]:
#scaled_df.values

In [18]:
featureMatColumns = scaled_df.columns
print featureMatColumns

Index([u'sm_efficiency', u'achieved_occupancy', u'ipc', u'issued_ipc',
       u'inst_per_warp', u'branch_efficiency', u'warp_execution_efficiency',
       u'warp_nonpred_execution_efficiency', u'inst_replay_overhead',
       u'issue_slot_utilization',
       ...
       u'l2_utilization', u'l2_atomic_throughput', u'l2_atomic_transactions',
       u'sysmem_utilization', u'ecc_throughput', u'sysmem_read_utilization',
       u'sysmem_write_utilization', u'eligible_warps_per_cycle',
       u'flop_sp_efficiency', u'flop_dp_efficiency'],
      dtype='object', length=113)


In [19]:
featureMatColumns[0]

'sm_efficiency'

In [20]:
X = scaled_df.values

p = 0.99

vt = VarianceThreshold(threshold=(p * (1 - p)))

vt.fit(X)

# get the indices of the features that are being kept
feature_indices = vt.get_support(indices=True)

print('selected feature indices: {}'.format(feature_indices))
print('features after removing low variance: {}'.format(len(feature_indices)))

selected feature indices: [  0   1   2   3   5   6   7   8   9  12  13  14  15  17  24  25  26  27
  28  29  30  31  32  35  36  37  38  40  41  42  43  44  45  57  58  59
  60  76  77  78  79  80  81  82  84  91  92  93  94  95  96  99 100 102
 103 110 111 112]
features after removing low variance: 58


In [21]:
print len(vt.variances_)
print vt.variances_

113
[ 0.12589892  0.11347647  0.0493536   0.0494976   0.00348177  0.01979414
  0.03561245  0.03684253  0.0142123   0.04809239  0.00350043  0.00367467
  0.0191086   0.02792826  0.0345056   0.02776195  0.00677958  0.01007577
  0.00579435  0.00585973  0.00416087  0.00378545  0.00405946  0.00524066
  0.0807952   0.02767761  0.03108827  0.05209781  0.02634535  0.04957048
  0.07027941  0.05175891  0.01662712  0.00866307  0.00879519  0.01614477
  0.01869846  0.15188426  0.14234465  0.00429626  0.04324442  0.01681652
  0.01567386  0.02681737  0.02493657  0.0511962   0.00431428  0.00671268
  0.0039868   0.00478249  0.00383951  0.00362811  0.0037472   0.0035113
  0.00625497  0.00508776  0.00515009  0.11728972  0.01337372  0.06777291
  0.02785587  0.00400257  0.00464606  0.00416755  0.00412968  0.00393676
  0.00392229  0.00629623  0.00936285  0.00524291  0.00482402  0.00482402
  0.00436323  0.0045844   0.00638233  0.00963421  0.02827052  0.03112088
  0.10572164  0.02030531  0.02310151  0.03931504

In [22]:
# remove low-variance columns from index
feature_names = [featureMatColumns[idx] for idx, _ in enumerate(featureMatColumns) if idx in feature_indices]
print feature_names

['sm_efficiency', 'achieved_occupancy', 'ipc', 'issued_ipc', 'branch_efficiency', 'warp_execution_efficiency', 'warp_nonpred_execution_efficiency', 'inst_replay_overhead', 'issue_slot_utilization', 'local_load_transactions_per_request', 'local_store_transactions_per_request', 'gld_transactions_per_request', 'gst_transactions_per_request', 'shared_load_transactions', 'global_hit_rate', 'local_hit_rate', 'gld_requested_throughput', 'gst_requested_throughput', 'gld_throughput', 'gst_throughput', 'dram_read_throughput', 'dram_write_throughput', 'tex_cache_throughput', 'shared_load_throughput', 'shared_store_throughput', 'gld_efficiency', 'gst_efficiency', 'cf_fu_utilization', 'tex_fu_utilization', 'ldst_fu_utilization', 'double_precision_fu_utilization', 'special_fu_utilization', 'single_precision_fu_utilization', 'dram_utilization', 'tex_utilization', 'shared_efficiency', 'shared_utilization', 'stall_inst_fetch', 'stall_exec_dependency', 'stall_memory_dependency', 'stall_texture', 'stall_

# select the targeted features from oringinal dataframe

In [23]:
selectec_columns = ['KernelName']         
selectec_columns.extend(feature_names)
print selectec_columns

['KernelName', 'sm_efficiency', 'achieved_occupancy', 'ipc', 'issued_ipc', 'branch_efficiency', 'warp_execution_efficiency', 'warp_nonpred_execution_efficiency', 'inst_replay_overhead', 'issue_slot_utilization', 'local_load_transactions_per_request', 'local_store_transactions_per_request', 'gld_transactions_per_request', 'gst_transactions_per_request', 'shared_load_transactions', 'global_hit_rate', 'local_hit_rate', 'gld_requested_throughput', 'gst_requested_throughput', 'gld_throughput', 'gst_throughput', 'dram_read_throughput', 'dram_write_throughput', 'tex_cache_throughput', 'shared_load_throughput', 'shared_store_throughput', 'gld_efficiency', 'gst_efficiency', 'cf_fu_utilization', 'tex_fu_utilization', 'ldst_fu_utilization', 'double_precision_fu_utilization', 'special_fu_utilization', 'single_precision_fu_utilization', 'dram_utilization', 'tex_utilization', 'shared_efficiency', 'shared_utilization', 'stall_inst_fetch', 'stall_exec_dependency', 'stall_memory_dependency', 'stall_tex

In [24]:
df_featureMat = df_featureMat[selectec_columns]

In [25]:
df_featureMat

Unnamed: 0,KernelName,sm_efficiency,achieved_occupancy,ipc,issued_ipc,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,issue_slot_utilization,...,l2_tex_write_hit_rate,l2_tex_read_throughput,l2_tex_write_throughput,l2_read_throughput,l2_write_throughput,sysmem_write_throughput,l2_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,[[ boxFilterNPP ]] void ForEachTupleByteQuad<u...,0.774578,0.379772,1.839418,1.741966,0.814055,0.586253,0.581444,0.001604,0.390164,...,0.830078,56.411140,5.014241,58.012083,5.017966,0.002861,0.3,3.440007,0.002870,0.000000
1,"[[ scan ]] scanExclusiveShared(uint4*, uint4*,...",0.995241,0.945823,1.038429,1.038845,1.000000,1.000000,0.979783,0.000257,0.218233,...,0.000000,43.576583,43.576583,43.580309,43.576583,0.000000,0.3,1.127109,0.000000,0.000000
2,"[[ scan ]] uniformUpdate(uint4*, unsigned int*)",0.995049,0.904622,0.216420,0.216561,1.000000,0.970644,0.970455,0.000393,0.048892,...,1.000000,43.876469,43.536536,43.879263,43.536536,0.000000,0.3,0.219377,0.000000,0.000000
3,"[[ scan ]] scanExclusiveShared2(unsigned int*,...",0.776216,0.496326,0.193890,0.201860,1.000000,1.000000,0.962367,0.037633,0.042666,...,0.000000,31.416304,1.962297,31.588599,1.977198,0.011444,0.1,0.210305,0.000000,0.000000
4,"[[ c++11_cuda ]] xyzw_frequency(int*, char*, int)",0.984878,0.166630,0.419791,0.419742,0.999996,0.712160,0.709834,0.000088,0.096177,...,0.000000,4.408881,0.000000,7.390045,2.980232,0.000000,0.1,0.401653,0.000000,0.000000
5,[[ c++11_cuda ]] xyzw_frequency_thrust_device(...,0.167060,0.015625,0.120050,0.120064,1.000000,0.031250,0.031250,0.000000,0.028515,...,0.000000,0.533104,0.000000,0.533104,0.000000,0.000000,0.1,0.114061,0.000000,0.000000
6,[[ cuSolverSp_LinearSolver ]] void pegasus_sca...,0.808125,0.724293,0.227263,0.238280,1.000000,0.999523,0.905853,0.025706,0.059570,...,0.203672,19.319355,29.284507,19.387342,29.295683,0.008583,0.2,0.305385,0.000000,0.000000
7,[[ cuSolverSp_LinearSolver ]] void pegasus_sym...,0.748081,0.784142,0.460054,0.496689,1.000000,0.999513,0.938904,0.024929,0.124172,...,0.000000,54.871663,24.532899,55.000186,24.552457,0.016212,0.3,0.669309,0.000000,0.000000
8,[[ cuSolverSp_LinearSolver ]] void gather_core...,0.984699,0.936917,0.428544,0.432904,1.000000,0.999985,0.935468,0.001163,0.108226,...,0.000000,127.397478,28.457493,127.414241,28.458424,0.000000,0.5,0.623174,0.000000,0.000000
9,[[ cuSolverSp_LinearSolver ]] void chol_check_...,0.697280,0.519168,0.161881,0.167549,1.000000,0.999181,0.925135,0.074924,0.041887,...,0.000000,43.473206,0.000000,43.620355,0.022888,0.019073,0.2,0.210813,0.000000,0.000000


# apply scaling to the selected metric column

In [26]:
#
# read the metrics_scale.jason
#
with open('metrics_scale.json', 'r') as metricsFile:
     metrics_scale_dd = yaml.safe_load(metricsFile)

In [27]:
metrics_scale_dd

{'achieved_occupancy': [0.0156, 0.9946659999999999],
 'atomic_transactions': [0.0, 83039.0],
 'atomic_transactions_per_request': [0.0, 5.349603],
 'branch_efficiency': [0.7297148, 1.0],
 'cf_executed': [1.0, 220609193.0],
 'cf_fu_utilization': [0.1, 0.2],
 'cf_issued': [1.0, 220609193.0],
 'double_precision_fu_utilization': [0.0, 1.0],
 'dram_read_throughput': [0.0, 82.921237],
 'dram_read_transactions': [0.0, 30866749.0],
 'dram_utilization': [0.0, 0.9],
 'dram_write_throughput': [0.0, 88.832341],
 'dram_write_transactions': [0.0, 6655938.0],
 'ecc_throughput': [0.0, 0.0],
 'ecc_transactions': [0.0, 0.0],
 'eligible_warps_per_cycle': [0.009709, 10.155316000000001],
 'flop_count_dp': [0.0, 939702716.0],
 'flop_count_dp_add': [0.0, 94379405.0],
 'flop_count_dp_fma': [0.0, 404701375.0],
 'flop_count_dp_mul': [0.0, 66591523.0],
 'flop_count_sp': [0.0, 6511611904.0],
 'flop_count_sp_add': [0.0, 1086197760.0],
 'flop_count_sp_fma': [0.0, 2170536960.0],
 'flop_count_sp_mul': [0.0, 2170537984

In [33]:
#
# scaling the columns in df_featureMat
# note that df_featureMat is after low-variance removal, where there are less columns
#

df_featureMat_columns = df_featureMat.columns
for select_metric, min_max in metrics_scale_dd.iteritems():
    if select_metric in df_featureMat_columns:
        x_min, x_max = min_max[0], min_max[1]
        x_range = x_max - x_min
        df_featureMat[select_metric] = df_featureMat[select_metric].apply(lambda x : (x - x_min) / x_range)

In [34]:
df_featureMat

Unnamed: 0,KernelName,sm_efficiency,achieved_occupancy,ipc,issued_ipc,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,issue_slot_utilization,...,l2_tex_write_hit_rate,l2_tex_read_throughput,l2_tex_write_throughput,l2_read_throughput,l2_write_throughput,sysmem_write_throughput,l2_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,[[ boxFilterNPP ]] void ForEachTupleByteQuad<u...,0.768912,0.371959,0.131843,0.468416,0.312041,0.559129,5.679426e-01,0.003609,0.459895,...,0.830078,0.322555,0.032929,0.331667,0.032954,0.037037,0.333333,0.032368,0.005258,0.000000
1,"[[ scan ]] scanExclusiveShared(uint4*, uint4*,...",0.996392,0.950113,0.073218,0.278362,1.000000,1.000000,9.791313e-01,0.000578,0.256059,...,0.000000,0.249168,0.286172,0.249157,0.286172,0.000000,0.333333,0.009899,0.000000,0.000000
2,"[[ scan ]] uniformUpdate(uint4*, unsigned int*)",0.996194,0.908031,0.013054,0.056100,1.000000,0.968719,9.695015e-01,0.000884,0.055293,...,1.000000,0.250883,0.285909,0.250867,0.285909,0.000000,0.333333,0.001080,0.000000,0.000000
3,"[[ scan ]] scanExclusiveShared2(unsigned int*,...",0.770601,0.491005,0.011405,0.052126,1.000000,1.000000,9.611530e-01,0.084674,0.047912,...,0.000000,0.179636,0.012887,0.180598,0.012984,0.148148,0.000000,0.000992,0.000000,0.000000
4,"[[ c++11_cuda ]] xyzw_frequency(int*, char*, int)",0.985709,0.154259,0.027939,0.111019,0.999986,0.693290,7.004741e-01,0.000198,0.111353,...,0.000000,0.025210,0.000000,0.042250,0.019572,0.000000,0.000000,0.002851,0.000000,0.000000
5,[[ c++11_cuda ]] xyzw_frequency_thrust_device(...,0.142626,0.000026,0.006001,0.030016,1.000000,-0.032258,3.096774e-08,0.000000,0.031135,...,0.000000,0.003048,0.000000,0.003048,0.000000,0.000000,0.000000,0.000057,0.000000,0.000000
6,[[ cuSolverSp_LinearSolver ]] void pegasus_sca...,0.803495,0.723846,0.013848,0.061970,1.000000,0.999492,9.028155e-01,0.057839,0.067952,...,0.203672,0.110467,0.192315,0.110841,0.192388,0.111111,0.166667,0.001916,0.000000,0.000000
7,[[ cuSolverSp_LinearSolver ]] void pegasus_sym...,0.741597,0.784975,0.030886,0.131818,1.000000,0.999481,9.369330e-01,0.056090,0.144543,...,0.000000,0.313752,0.161110,0.314447,0.161239,0.209877,0.333333,0.005451,0.000000,0.000000
8,[[ cuSolverSp_LinearSolver ]] void gather_core...,0.985524,0.941016,0.028580,0.114577,1.000000,0.999984,9.333865e-01,0.002617,0.125638,...,0.000000,0.728450,0.186883,0.728453,0.186890,0.000000,0.666667,0.005003,0.000000,0.000000
9,[[ cuSolverSp_LinearSolver ]] void chol_check_...,0.689226,0.514335,0.009062,0.042852,1.000000,0.999127,9.227196e-01,0.168579,0.046988,...,0.000000,0.248577,0.000000,0.249386,0.000150,0.246914,0.166667,0.000997,0.000000,0.000000


In [35]:
df_featureMat.shape

(287, 59)

# transform dataframe to matrix

In [36]:
columns_after_sel_list = [col for col in df_featureMat_columns if col <> 'KernelName']
print columns_after_sel_list

['sm_efficiency', 'achieved_occupancy', 'ipc', 'issued_ipc', 'branch_efficiency', 'warp_execution_efficiency', 'warp_nonpred_execution_efficiency', 'inst_replay_overhead', 'issue_slot_utilization', 'local_load_transactions_per_request', 'local_store_transactions_per_request', 'gld_transactions_per_request', 'gst_transactions_per_request', 'shared_load_transactions', 'global_hit_rate', 'local_hit_rate', 'gld_requested_throughput', 'gst_requested_throughput', 'gld_throughput', 'gst_throughput', 'dram_read_throughput', 'dram_write_throughput', 'tex_cache_throughput', 'shared_load_throughput', 'shared_store_throughput', 'gld_efficiency', 'gst_efficiency', 'cf_fu_utilization', 'tex_fu_utilization', 'ldst_fu_utilization', 'double_precision_fu_utilization', 'special_fu_utilization', 'single_precision_fu_utilization', 'dram_utilization', 'tex_utilization', 'shared_efficiency', 'shared_utilization', 'stall_inst_fetch', 'stall_exec_dependency', 'stall_memory_dependency', 'stall_texture', 'stall_

In [37]:
FeatureMat = df_featureMat.loc[:,columns_after_sel_list].as_matrix()

In [42]:
df_featureMat.loc[:,columns_after_sel_list]

Unnamed: 0,sm_efficiency,achieved_occupancy,ipc,issued_ipc,branch_efficiency,warp_execution_efficiency,warp_nonpred_execution_efficiency,inst_replay_overhead,issue_slot_utilization,local_load_transactions_per_request,...,l2_tex_write_hit_rate,l2_tex_read_throughput,l2_tex_write_throughput,l2_read_throughput,l2_write_throughput,sysmem_write_throughput,l2_utilization,eligible_warps_per_cycle,flop_sp_efficiency,flop_dp_efficiency
0,0.768912,0.371959,0.131843,0.468416,0.312041,0.559129,5.679426e-01,0.003609,0.459895,0.000000,...,0.830078,0.322555,0.032929,0.331667,0.032954,0.037037,0.333333,0.032368,0.005258,0.000000
1,0.996392,0.950113,0.073218,0.278362,1.000000,1.000000,9.791313e-01,0.000578,0.256059,0.000000,...,0.000000,0.249168,0.286172,0.249157,0.286172,0.000000,0.333333,0.009899,0.000000,0.000000
2,0.996194,0.908031,0.013054,0.056100,1.000000,0.968719,9.695015e-01,0.000884,0.055293,0.000000,...,1.000000,0.250883,0.285909,0.250867,0.285909,0.000000,0.333333,0.001080,0.000000,0.000000
3,0.770601,0.491005,0.011405,0.052126,1.000000,1.000000,9.611530e-01,0.084674,0.047912,0.000000,...,0.000000,0.179636,0.012887,0.180598,0.012984,0.148148,0.000000,0.000992,0.000000,0.000000
4,0.985709,0.154259,0.027939,0.111019,0.999986,0.693290,7.004741e-01,0.000198,0.111353,0.000000,...,0.000000,0.025210,0.000000,0.042250,0.019572,0.000000,0.000000,0.002851,0.000000,0.000000
5,0.142626,0.000026,0.006001,0.030016,1.000000,-0.032258,3.096774e-08,0.000000,0.031135,0.000000,...,0.000000,0.003048,0.000000,0.003048,0.000000,0.000000,0.000000,0.000057,0.000000,0.000000
6,0.803495,0.723846,0.013848,0.061970,1.000000,0.999492,9.028155e-01,0.057839,0.067952,0.000000,...,0.203672,0.110467,0.192315,0.110841,0.192388,0.111111,0.166667,0.001916,0.000000,0.000000
7,0.741597,0.784975,0.030886,0.131818,1.000000,0.999481,9.369330e-01,0.056090,0.144543,0.000000,...,0.000000,0.313752,0.161110,0.314447,0.161239,0.209877,0.333333,0.005451,0.000000,0.000000
8,0.985524,0.941016,0.028580,0.114577,1.000000,0.999984,9.333865e-01,0.002617,0.125638,0.000000,...,0.000000,0.728450,0.186883,0.728453,0.186890,0.000000,0.666667,0.005003,0.000000,0.000000
9,0.689226,0.514335,0.009062,0.042852,1.000000,0.999127,9.227196e-01,0.168579,0.046988,0.000000,...,0.000000,0.248577,0.000000,0.249386,0.000150,0.246914,0.166667,0.000997,0.000000,0.000000


In [43]:
# FeatureMat
#print type(FeatureMat)

print df_featureMat.loc[0, 'sm_efficiency']
print FeatureMat[0][0]

0.768912229806
0.768912229806


### save dataframe to csv

In [47]:
import time
timestr = time.strftime("%Y%m%d-%H%M%S")
#print timestr

out_filename = "FeatSel_after_scaler_lowVar_cudasdk_" + timestr + ".csv"
print out_filename

df_featureMat.to_csv(out_filename, index=False, encoding='utf-8')

FeatSel_after_scaler_lowVar_cudasdk_20170830-115253.csv


# run principal feature analysis

In [49]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from collections import defaultdict
from sklearn.metrics.pairwise import euclidean_distances

In [80]:
class PFA(object):
    def __init__(self, n_features, q=None):
        self.q = q
        self.n_features = n_features

    def fit(self, X):
        if not self.q:
            self.q = X.shape[1] 

#         print self.q
        
        pca = PCA(n_components=self.q).fit(X)
#         pca = PCA(n_components=10).fit(X)
        
#         print pca.components_.shape
        
#         print pca.explained_variance_.shape
#         print pca.explained_variance_
        
#         print pca.explained_variance_ratio_.shape
#         print pca.explained_variance_ratio_
        print sum(pca.explained_variance_ratio_)
        
        A_q = pca.components_.T

        kmeans = KMeans(n_clusters=self.n_features).fit(A_q)
        clusters = kmeans.predict(A_q)
        cluster_centers = kmeans.cluster_centers_

        dists = defaultdict(list)
        for i, c in enumerate(clusters):
            dist = euclidean_distances(A_q[i, :], cluster_centers[c, :])[0][0]
            dists[c].append((i, dist))

        self.indices_ = [sorted(f, key=lambda x: x[1])[0][0] for f in dists.values()]
        self.features_ = X[:, self.indices_]
        
        
pfa = PFA(n_features=40)
X = FeatureMat
pfa.fit(X)

1.0




In [81]:
# To get the transformed matrix
X = pfa.features_


# To get the column indices of the kept features
column_indices = pfa.indices_
print column_indices

for idx in column_indices:
    print columns_after_sel_list[idx]

[42, 7, 36, 31, 53, 49, 40, 17, 51, 19, 21, 1, 28, 48, 46, 27, 44, 55, 22, 56, 32, 0, 50, 33, 10, 38, 35, 54, 52, 29, 57, 5, 26, 15, 8, 9, 30, 11, 16, 45]
stall_other
inst_replay_overhead
shared_utilization
special_fu_utilization
sysmem_write_throughput
l2_tex_read_throughput
stall_texture
gst_requested_throughput
l2_read_throughput
gst_throughput
dram_write_throughput
achieved_occupancy
tex_fu_utilization
l2_tex_write_hit_rate
tex_cache_hit_rate
cf_fu_utilization
stall_memory_throttle
eligible_warps_per_cycle
tex_cache_throughput
flop_sp_efficiency
single_precision_fu_utilization
sm_efficiency
l2_tex_write_throughput
dram_utilization
local_store_transactions_per_request
stall_exec_dependency
shared_efficiency
l2_utilization
l2_write_throughput
ldst_fu_utilization
flop_dp_efficiency
warp_execution_efficiency
gst_efficiency
local_hit_rate
issue_slot_utilization
local_load_transactions_per_request
double_precision_fu_utilization
gld_transactions_per_request
gld_requested_throughput
local