In [1]:
import subprocess

#Params
datasetPath = "../datasets/"
dataPath = "./data_clustered/"
resultPath = "./results/"
filenames = ["Dune","Apache"]
perf="perf"

n_lines=[]
for k,filename in enumerate(filenames):
    n_lines.append(int(subprocess.check_output("echo $(wc -l < "+datasetPath+filename+".csv)", shell=True)))

#Params for sensitivity
NBINS = 40 # Number of vertical bins for threshold
NSUBS = 10 # Number of training sets to average on
srm = 1 # Minimum sampling size
srM=[]
srs = []
for k,filename in enumerate(filenames):
    srM.append(int(subprocess.check_output("echo $(wc -l < "+datasetPath+filename+".csv)", shell=True))) # Maximum sampling size
    srs.append(srM[k]//100) # Sampling step between two iterations

oracle = [0.2,0.5,0.8]

In [2]:
import sys, os
import pandas as pd
from sklearn import tree

def sensitivity(datasetPath, dataPath, filename, perf, NBINS, NSUBS, srm, srM, srs, oracle=False):
    
    #If data fodler does not exists
    if not os.path.exists(dataPath):
        try:
            os.makedirs(dataPath)
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    
    
    perf_x264 = ['Watt', 'Energy', 'SSIM', 'PSNR', 'Speed', 'Size', 'Time']
    perf_sac = ['compile-exit', 'compile-real', 'compile-user', 'compile-ioin', 'compile-ioout',
                'compile-maxmem', 'compile-cpu', 'compile-size', 'run-exit',
                'run-real', 'run-user', 'run-maxmem', 'run-cpu']

    d = pd.read_csv(datasetPath+filename+".csv") # Open dataset
    d = d.sort_values(by=perf) # Sort it by perf to get threshold values
    if not oracle:
        thresholds = [d[perf].iloc[i * d.shape[0]//NBINS] for i in range(1, NBINS)]
    else:
        for i in oracle:
            if not os.path.exists(dataPath+str(i)):
                try:
                    os.makedirs(dataPath+str(i))
                except OSError as exc: # Guard against race condition
                    if exc.errno != errno.EEXIST:
                        raise
        thresholds = [d[perf].iloc[int(i * d.shape[0])] for i in oracle]

    # Computation
    for k,t in enumerate(thresholds):
        res = {"sr":[],"t":[],"TN":[],"TP":[],"FN":[],"FP":[]}
        for sr in range(srm,srM,srs):
                print("Computing for sr=%d and t=%.3f..." % (sr, t))
                d["label"] = 0
                d.loc[d[perf] > t, "label"] = 1 # Label with the (current) oracle
                clean = d.drop(perf_sac+perf_x264+["perf"],axis=1,errors="ignore")
                subs = [clean.sample(sr) for i in range(NSUBS)] # Subsample trainsets
                TN = TP = FN = FP = 0 # Counters for classification results
                d["pred"] = 0
                for s in subs: # We cumulate results for each experiment and average later
                    # MACHINE LEARNING PART
                    # Settings are chosen to be the closest to J48 algorithm
                    c = tree.DecisionTreeClassifier(criterion="entropy", min_samples_leaf=2)
                    c.fit(s.drop(["label"],axis=1), s.label)
                    # END OF LEARNING
                    d["pred"] = c.predict(clean.drop(["label"], axis=1)) # Get model's prediction
                    TN += d[(d.label == 0) & (d.pred == 0)].shape[0]
                    TP += d[(d.label == 1) & (d.pred == 1)].shape[0]
                    FN += d[(d.label == 1) & (d.pred == 0)].shape[0]
                    FP += d[(d.label == 0) & (d.pred == 1)].shape[0]
                del d["pred"] # Reset
                # Push the results
                res["sr"].append(sr)
                res["t"].append(t)
                res["TN"].append(TN/NSUBS)
                res["TP"].append(TP/NSUBS)
                res["FN"].append(FN/NSUBS)
                res["FP"].append(FP/NSUBS)
        # Save the result as csv
        pd.DataFrame(res).to_csv(dataPath+"/"+str(oracle[k])+"/"+str(oracle[k])+"-"+filename+".csv", index=False)

In [3]:
for k,filename in enumerate(filenames):
    #Machine learning part, using data from {filename} file in {datasetPath} folder and writing results in {dataPath} folder
    sensitivity(datasetPath = datasetPath, dataPath = dataPath, filename = filename,
                perf = perf, NBINS = NBINS, NSUBS = NSUBS, srm = srm, srM = srM[k], srs = srs[k], oracle = oracle)

Computing for sr=1 and t=8447.814...
Computing for sr=24 and t=8447.814...
Computing for sr=47 and t=8447.814...
Computing for sr=70 and t=8447.814...
Computing for sr=93 and t=8447.814...
Computing for sr=116 and t=8447.814...
Computing for sr=139 and t=8447.814...
Computing for sr=162 and t=8447.814...
Computing for sr=185 and t=8447.814...
Computing for sr=208 and t=8447.814...
Computing for sr=231 and t=8447.814...
Computing for sr=254 and t=8447.814...
Computing for sr=277 and t=8447.814...
Computing for sr=300 and t=8447.814...
Computing for sr=323 and t=8447.814...
Computing for sr=346 and t=8447.814...
Computing for sr=369 and t=8447.814...
Computing for sr=392 and t=8447.814...
Computing for sr=415 and t=8447.814...
Computing for sr=438 and t=8447.814...
Computing for sr=461 and t=8447.814...
Computing for sr=484 and t=8447.814...
Computing for sr=507 and t=8447.814...
Computing for sr=530 and t=8447.814...
Computing for sr=553 and t=8447.814...
Computing for sr=576 and t=8447

Computing for sr=93 and t=11656.393...
Computing for sr=116 and t=11656.393...
Computing for sr=139 and t=11656.393...
Computing for sr=162 and t=11656.393...
Computing for sr=185 and t=11656.393...
Computing for sr=208 and t=11656.393...
Computing for sr=231 and t=11656.393...
Computing for sr=254 and t=11656.393...
Computing for sr=277 and t=11656.393...
Computing for sr=300 and t=11656.393...
Computing for sr=323 and t=11656.393...
Computing for sr=346 and t=11656.393...
Computing for sr=369 and t=11656.393...
Computing for sr=392 and t=11656.393...
Computing for sr=415 and t=11656.393...
Computing for sr=438 and t=11656.393...
Computing for sr=461 and t=11656.393...
Computing for sr=484 and t=11656.393...
Computing for sr=507 and t=11656.393...
Computing for sr=530 and t=11656.393...
Computing for sr=553 and t=11656.393...
Computing for sr=576 and t=11656.393...
Computing for sr=599 and t=11656.393...
Computing for sr=622 and t=11656.393...
Computing for sr=645 and t=11656.393...
C

Computing for sr=114 and t=1020.000...
Computing for sr=115 and t=1020.000...
Computing for sr=116 and t=1020.000...
Computing for sr=117 and t=1020.000...
Computing for sr=118 and t=1020.000...
Computing for sr=119 and t=1020.000...
Computing for sr=120 and t=1020.000...
Computing for sr=121 and t=1020.000...
Computing for sr=122 and t=1020.000...
Computing for sr=123 and t=1020.000...
Computing for sr=124 and t=1020.000...
Computing for sr=125 and t=1020.000...
Computing for sr=126 and t=1020.000...
Computing for sr=127 and t=1020.000...
Computing for sr=128 and t=1020.000...
Computing for sr=129 and t=1020.000...
Computing for sr=130 and t=1020.000...
Computing for sr=131 and t=1020.000...
Computing for sr=132 and t=1020.000...
Computing for sr=133 and t=1020.000...
Computing for sr=134 and t=1020.000...
Computing for sr=135 and t=1020.000...
Computing for sr=136 and t=1020.000...
Computing for sr=137 and t=1020.000...
Computing for sr=138 and t=1020.000...
Computing for sr=139 and 

Computing for sr=136 and t=1230.000...
Computing for sr=137 and t=1230.000...
Computing for sr=138 and t=1230.000...
Computing for sr=139 and t=1230.000...
Computing for sr=140 and t=1230.000...
Computing for sr=141 and t=1230.000...
Computing for sr=142 and t=1230.000...
Computing for sr=143 and t=1230.000...
Computing for sr=144 and t=1230.000...
Computing for sr=145 and t=1230.000...
Computing for sr=146 and t=1230.000...
Computing for sr=147 and t=1230.000...
Computing for sr=148 and t=1230.000...
Computing for sr=149 and t=1230.000...
Computing for sr=150 and t=1230.000...
Computing for sr=151 and t=1230.000...
Computing for sr=152 and t=1230.000...
Computing for sr=153 and t=1230.000...
Computing for sr=154 and t=1230.000...
Computing for sr=155 and t=1230.000...
Computing for sr=156 and t=1230.000...
Computing for sr=157 and t=1230.000...
Computing for sr=158 and t=1230.000...
Computing for sr=159 and t=1230.000...
Computing for sr=160 and t=1230.000...
Computing for sr=161 and 

Computing for sr=160 and t=1980.000...
Computing for sr=161 and t=1980.000...
Computing for sr=162 and t=1980.000...
Computing for sr=163 and t=1980.000...
Computing for sr=164 and t=1980.000...
Computing for sr=165 and t=1980.000...
Computing for sr=166 and t=1980.000...
Computing for sr=167 and t=1980.000...
Computing for sr=168 and t=1980.000...
Computing for sr=169 and t=1980.000...
Computing for sr=170 and t=1980.000...
Computing for sr=171 and t=1980.000...
Computing for sr=172 and t=1980.000...
Computing for sr=173 and t=1980.000...
Computing for sr=174 and t=1980.000...
Computing for sr=175 and t=1980.000...
Computing for sr=176 and t=1980.000...
Computing for sr=177 and t=1980.000...
Computing for sr=178 and t=1980.000...
Computing for sr=179 and t=1980.000...
Computing for sr=180 and t=1980.000...
Computing for sr=181 and t=1980.000...
Computing for sr=182 and t=1980.000...
Computing for sr=183 and t=1980.000...
Computing for sr=184 and t=1980.000...
Computing for sr=185 and 

In [4]:
if not os.path.exists(resultPath):
    try:
        os.makedirs(resultPath)
    except OSError as exc: # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise

for k,filename in enumerate(filenames):
    for o in oracle:
        !Rscript ./helpers/2.calculateMetrics.R ./data_clustered/{o}/{o}-{filename}.csv {n_lines[k]} {resultPath}