In [1]:
import subprocess

#Params
datasetPath = "../datasets/"
dataPath = "./data/"
resultPath = "./results/"
filename = "Apache"
perf="perf"


#Params for sensistivity
NBINS = 40 # Number of vertical bins for threshold
NSUBS = 10 # Number of training sets to average on
srm = 1 # Minimum sampling size
srM = int(subprocess.check_output("echo $(wc -l < "+datasetPath+filename+".csv)", shell=True)) # Maximum sampling size
srs = srM//100 # Sampling step between two iterations

In [2]:
import sys, os
import pandas as pd
from sklearn import tree

def sensitivity(datasetPath, dataPath, filename, perf, NBINS, NSUBS, srm, srM, srs, thresholds=False):
    
    #If data fodler does not exists
    if not os.path.exists(dataPath):
        try:
            os.makedirs(dataPath)
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
            
    perf_x264 = ['Watt', 'Energy', 'SSIM', 'PSNR', 'Speed', 'Size', 'Time']
    perf_sac = ['compile-exit', 'compile-real', 'compile-user', 'compile-ioin', 'compile-ioout',
                'compile-maxmem', 'compile-cpu', 'compile-size', 'run-exit',
                'run-real', 'run-user', 'run-maxmem', 'run-cpu']

    d = pd.read_csv(datasetPath+filename+".csv") # Open dataset
    d = d.sort_values(by=perf) # Sort it by perf to get threshold values
    if not thresholds:
        thresholds = [d[perf].iloc[i * d.shape[0]//NBINS] for i in range(1, NBINS)]
    else:
        thresholds = [d[perf].iloc[int(thresholds * d.shape[0])]]

    res = {"sr":[],"t":[],"TN":[],"TP":[],"FN":[],"FP":[]}

    # Computation
    for sr in range(srm,srM,srs):
        for t in thresholds:
            print("Computing for sr=%d and t=%.3f..." % (sr, t))
            d["label"] = 0
            d.loc[d[perf] > t, "label"] = 1 # Label with the (current) oracle
            clean = d.drop(perf_sac+perf_x264+["perf"],axis=1,errors="ignore")
            subs = [clean.sample(sr) for i in range(NSUBS)] # Subsample trainsets
            TN = TP = FN = FP = 0 # Counters for classification results
            d["pred"] = 0
            for s in subs: # We cumulate results for each experiment and average later
                # MACHINE LEARNING PART
                # Settings are chosen to be the closest to J48 algorithm
                c = tree.DecisionTreeClassifier(criterion="entropy", min_samples_leaf=2)
                c.fit(s.drop(["label"],axis=1), s.label)
                # END OF LEARNING
                d["pred"] = c.predict(clean.drop(["label"], axis=1)) # Get model's prediction
                TN += d[(d.label == 0) & (d.pred == 0)].shape[0]
                TP += d[(d.label == 1) & (d.pred == 1)].shape[0]
                FN += d[(d.label == 1) & (d.pred == 0)].shape[0]
                FP += d[(d.label == 0) & (d.pred == 1)].shape[0]
            del d["pred"] # Reset
            # Push the results
            res["sr"].append(sr)
            res["t"].append(t)
            res["TN"].append(TN/NSUBS)
            res["TP"].append(TP/NSUBS)
            res["FN"].append(FN/NSUBS)
            res["FP"].append(FP/NSUBS)
    # Save the result as csv
    pd.DataFrame(res).to_csv(dataPath+filename+".csv", index=False)

In [None]:
#Machine learning part, using data from {filename} file in {datasetPath} folder and writing results in {dataPath} folder
sensitivity(datasetPath = datasetPath, dataPath = dataPath, filename = filename,
            perf = perf, NBINS = NBINS, NSUBS = NSUBS, srm = srm, srM = srM, srs = srs)

Computing for sr=1 and t=870.000...
Computing for sr=1 and t=900.000...
Computing for sr=1 and t=930.000...
Computing for sr=1 and t=960.000...
Computing for sr=1 and t=960.000...
Computing for sr=1 and t=990.000...
Computing for sr=1 and t=990.000...
Computing for sr=1 and t=1020.000...
Computing for sr=1 and t=1020.000...
Computing for sr=1 and t=1050.000...
Computing for sr=1 and t=1080.000...
Computing for sr=1 and t=1080.000...
Computing for sr=1 and t=1110.000...
Computing for sr=1 and t=1110.000...
Computing for sr=1 and t=1140.000...
Computing for sr=1 and t=1140.000...
Computing for sr=1 and t=1170.000...
Computing for sr=1 and t=1200.000...
Computing for sr=1 and t=1200.000...
Computing for sr=1 and t=1230.000...
Computing for sr=1 and t=1230.000...
Computing for sr=1 and t=1260.000...
Computing for sr=1 and t=1290.000...
Computing for sr=1 and t=1320.000...
Computing for sr=1 and t=1380.000...
Computing for sr=1 and t=1410.000...
Computing for sr=1 and t=1680.000...
Computin

Computing for sr=6 and t=1830.000...
Computing for sr=6 and t=1890.000...
Computing for sr=6 and t=1920.000...
Computing for sr=6 and t=1980.000...
Computing for sr=6 and t=2040.000...
Computing for sr=6 and t=2100.000...
Computing for sr=6 and t=2130.000...
Computing for sr=6 and t=2160.000...
Computing for sr=6 and t=2190.000...
Computing for sr=6 and t=2280.000...
Computing for sr=6 and t=2430.000...
Computing for sr=7 and t=870.000...
Computing for sr=7 and t=900.000...
Computing for sr=7 and t=930.000...
Computing for sr=7 and t=960.000...
Computing for sr=7 and t=960.000...
Computing for sr=7 and t=990.000...
Computing for sr=7 and t=990.000...
Computing for sr=7 and t=1020.000...
Computing for sr=7 and t=1020.000...
Computing for sr=7 and t=1050.000...
Computing for sr=7 and t=1080.000...
Computing for sr=7 and t=1080.000...
Computing for sr=7 and t=1110.000...
Computing for sr=7 and t=1110.000...
Computing for sr=7 and t=1140.000...
Computing for sr=7 and t=1140.000...
Computin

Computing for sr=12 and t=1170.000...
Computing for sr=12 and t=1200.000...
Computing for sr=12 and t=1200.000...
Computing for sr=12 and t=1230.000...
Computing for sr=12 and t=1230.000...
Computing for sr=12 and t=1260.000...
Computing for sr=12 and t=1290.000...
Computing for sr=12 and t=1320.000...
Computing for sr=12 and t=1380.000...
Computing for sr=12 and t=1410.000...
Computing for sr=12 and t=1680.000...
Computing for sr=12 and t=1770.000...
Computing for sr=12 and t=1830.000...
Computing for sr=12 and t=1890.000...
Computing for sr=12 and t=1920.000...
Computing for sr=12 and t=1980.000...
Computing for sr=12 and t=2040.000...
Computing for sr=12 and t=2100.000...
Computing for sr=12 and t=2130.000...
Computing for sr=12 and t=2160.000...
Computing for sr=12 and t=2190.000...
Computing for sr=12 and t=2280.000...
Computing for sr=12 and t=2430.000...
Computing for sr=13 and t=870.000...
Computing for sr=13 and t=900.000...
Computing for sr=13 and t=930.000...
Computing for s

Computing for sr=18 and t=930.000...
Computing for sr=18 and t=960.000...
Computing for sr=18 and t=960.000...
Computing for sr=18 and t=990.000...
Computing for sr=18 and t=990.000...
Computing for sr=18 and t=1020.000...
Computing for sr=18 and t=1020.000...
Computing for sr=18 and t=1050.000...
Computing for sr=18 and t=1080.000...
Computing for sr=18 and t=1080.000...
Computing for sr=18 and t=1110.000...
Computing for sr=18 and t=1110.000...
Computing for sr=18 and t=1140.000...
Computing for sr=18 and t=1140.000...
Computing for sr=18 and t=1170.000...
Computing for sr=18 and t=1200.000...
Computing for sr=18 and t=1200.000...
Computing for sr=18 and t=1230.000...
Computing for sr=18 and t=1230.000...
Computing for sr=18 and t=1260.000...
Computing for sr=18 and t=1290.000...
Computing for sr=18 and t=1320.000...
Computing for sr=18 and t=1380.000...
Computing for sr=18 and t=1410.000...
Computing for sr=18 and t=1680.000...
Computing for sr=18 and t=1770.000...
Computing for sr=

Computing for sr=23 and t=1770.000...
Computing for sr=23 and t=1830.000...
Computing for sr=23 and t=1890.000...
Computing for sr=23 and t=1920.000...
Computing for sr=23 and t=1980.000...
Computing for sr=23 and t=2040.000...
Computing for sr=23 and t=2100.000...
Computing for sr=23 and t=2130.000...
Computing for sr=23 and t=2160.000...
Computing for sr=23 and t=2190.000...
Computing for sr=23 and t=2280.000...
Computing for sr=23 and t=2430.000...
Computing for sr=24 and t=870.000...
Computing for sr=24 and t=900.000...
Computing for sr=24 and t=930.000...
Computing for sr=24 and t=960.000...
Computing for sr=24 and t=960.000...
Computing for sr=24 and t=990.000...
Computing for sr=24 and t=990.000...
Computing for sr=24 and t=1020.000...
Computing for sr=24 and t=1020.000...
Computing for sr=24 and t=1050.000...
Computing for sr=24 and t=1080.000...
Computing for sr=24 and t=1080.000...
Computing for sr=24 and t=1110.000...
Computing for sr=24 and t=1110.000...
Computing for sr=24

Computing for sr=29 and t=1110.000...
Computing for sr=29 and t=1140.000...
Computing for sr=29 and t=1140.000...
Computing for sr=29 and t=1170.000...
Computing for sr=29 and t=1200.000...
Computing for sr=29 and t=1200.000...
Computing for sr=29 and t=1230.000...
Computing for sr=29 and t=1230.000...
Computing for sr=29 and t=1260.000...
Computing for sr=29 and t=1290.000...
Computing for sr=29 and t=1320.000...
Computing for sr=29 and t=1380.000...
Computing for sr=29 and t=1410.000...
Computing for sr=29 and t=1680.000...
Computing for sr=29 and t=1770.000...
Computing for sr=29 and t=1830.000...
Computing for sr=29 and t=1890.000...
Computing for sr=29 and t=1920.000...
Computing for sr=29 and t=1980.000...
Computing for sr=29 and t=2040.000...
Computing for sr=29 and t=2100.000...
Computing for sr=29 and t=2130.000...
Computing for sr=29 and t=2160.000...
Computing for sr=29 and t=2190.000...
Computing for sr=29 and t=2280.000...
Computing for sr=29 and t=2430.000...
Computing fo

Computing for sr=34 and t=2430.000...
Computing for sr=35 and t=870.000...
Computing for sr=35 and t=900.000...
Computing for sr=35 and t=930.000...
Computing for sr=35 and t=960.000...
Computing for sr=35 and t=960.000...
Computing for sr=35 and t=990.000...
Computing for sr=35 and t=990.000...
Computing for sr=35 and t=1020.000...
Computing for sr=35 and t=1020.000...
Computing for sr=35 and t=1050.000...
Computing for sr=35 and t=1080.000...
Computing for sr=35 and t=1080.000...
Computing for sr=35 and t=1110.000...
Computing for sr=35 and t=1110.000...
Computing for sr=35 and t=1140.000...
Computing for sr=35 and t=1140.000...
Computing for sr=35 and t=1170.000...
Computing for sr=35 and t=1200.000...
Computing for sr=35 and t=1200.000...
Computing for sr=35 and t=1230.000...
Computing for sr=35 and t=1230.000...
Computing for sr=35 and t=1260.000...
Computing for sr=35 and t=1290.000...
Computing for sr=35 and t=1320.000...
Computing for sr=35 and t=1380.000...
Computing for sr=35

Computing for sr=40 and t=1380.000...
Computing for sr=40 and t=1410.000...
Computing for sr=40 and t=1680.000...
Computing for sr=40 and t=1770.000...
Computing for sr=40 and t=1830.000...
Computing for sr=40 and t=1890.000...
Computing for sr=40 and t=1920.000...
Computing for sr=40 and t=1980.000...
Computing for sr=40 and t=2040.000...
Computing for sr=40 and t=2100.000...
Computing for sr=40 and t=2130.000...
Computing for sr=40 and t=2160.000...
Computing for sr=40 and t=2190.000...
Computing for sr=40 and t=2280.000...
Computing for sr=40 and t=2430.000...
Computing for sr=41 and t=870.000...
Computing for sr=41 and t=900.000...
Computing for sr=41 and t=930.000...
Computing for sr=41 and t=960.000...
Computing for sr=41 and t=960.000...
Computing for sr=41 and t=990.000...
Computing for sr=41 and t=990.000...
Computing for sr=41 and t=1020.000...
Computing for sr=41 and t=1020.000...
Computing for sr=41 and t=1050.000...
Computing for sr=41 and t=1080.000...
Computing for sr=41

Computing for sr=46 and t=1080.000...
Computing for sr=46 and t=1080.000...
Computing for sr=46 and t=1110.000...
Computing for sr=46 and t=1110.000...
Computing for sr=46 and t=1140.000...
Computing for sr=46 and t=1140.000...
Computing for sr=46 and t=1170.000...
Computing for sr=46 and t=1200.000...
Computing for sr=46 and t=1200.000...
Computing for sr=46 and t=1230.000...
Computing for sr=46 and t=1230.000...
Computing for sr=46 and t=1260.000...
Computing for sr=46 and t=1290.000...
Computing for sr=46 and t=1320.000...
Computing for sr=46 and t=1380.000...
Computing for sr=46 and t=1410.000...
Computing for sr=46 and t=1680.000...
Computing for sr=46 and t=1770.000...
Computing for sr=46 and t=1830.000...
Computing for sr=46 and t=1890.000...
Computing for sr=46 and t=1920.000...
Computing for sr=46 and t=1980.000...
Computing for sr=46 and t=2040.000...
Computing for sr=46 and t=2100.000...
Computing for sr=46 and t=2130.000...
Computing for sr=46 and t=2160.000...
Computing fo

In [54]:
%%bash -s "$dataPath" "$resultPath" "$filename"
#If results folder does not exists
mkdir -p $2
#Create heatmaps from the results of machine learning and puts them in resultPath folder
Rscript ./helpers/2.calculateMetrics.R $1$3.csv $2