In [129]:
from pathlib import Path
import re

In [130]:
COMPILER_TYPES = ['gcc', 'clang', 'icc']
results = dict.fromkeys(COMPILER_TYPES)
results_path = Path('binutils')

def get_binary_info(path):
    result = {}
    filename = path.name
    if filename.startswith('binutils'):
        m = re.search(r'.*-.*_(.*)-([\d\.]*)_(.*)_(O\d)_(.*)', filename)
        compiler, version, arch, opt, target = m.groups()
        target = target.replace('.elf', '')
        result = {'compiler': compiler, 'version': version, 'arch': arch,
                  'opt': opt, 'target': target}
        return result
    else:
        m = re.search(r'(.*)-(\w*)-([\d\.]*)-(O\d)-\w*-(.*)', filename)
        target, compiler, version, opt, arch = m.groups()
        arch = arch.replace('x64', 'x86_64')
        result = {'compiler': compiler, 'version': version, 'arch': arch,
                  'opt': opt, 'target': target}
        return result


def read_result(result_path):
    s = result_path.read_text()
    if 'rank' not in s:
        return None
    b2r2_time, infer_time = re.findall(r'Time:\s*(\d+)', s)
    b2r2_time, infer_time = int(b2r2_time), int(infer_time)
    matches = re.search(r': (\w+),\s*\w+\s*:\s*(.*),\s\w*:\s*(.*)', s).groups()
    identified_result, rank, probabilities = matches
    rank = rank[2:-2].split(';')
    probabilities = probabilities[1:-1].replace('"','').split(';')
    result = {'result': identified_result.lower(), 'b2r2_time': b2r2_time, 
              'infer_time': infer_time, 'rank':rank, 'probabilities': probabilities}
    return result



results = []
for path in results_path.glob('*'):
    info = get_binary_info(path)
    result = read_result(path)
    if result:
        result.update(info)
        results.append(result)
        

In [131]:
import pandas as pd
pd.options.display.float_format = '{:.2f}'.format

In [132]:
df = pd.DataFrame.from_dict(results)
df['correct'] = df.result == df.compiler
df

Unnamed: 0,result,b2r2_time,infer_time,rank,probabilities,compiler,version,arch,opt,target,correct
0,clang,96998,146,"[Clang, Gcc, Icc]","[0.999, 0.623, 0.595]",clang,4.0,x86_32,O0,addr2line,True
1,clang,98165,171,"[Clang, Gcc, Icc]","[0.999, 0.631, 0.604]",clang,4.0,x86_32,O0,ar,True
2,clang,73846,159,"[Clang, Gcc, Icc]","[0.999, 0.633, 0.602]",clang,4.0,x86_32,O0,as,True
3,clang,96449,154,"[Clang, Gcc, Icc]","[0.999, 0.624, 0.596]",clang,4.0,x86_32,O0,c++filt,True
4,clang,1695,60,"[Clang, Gcc, Icc]","[0.958, 0.792, 0.708]",clang,4.0,x86_32,O0,elfedit,True
...,...,...,...,...,...,...,...,...,...,...,...
247,clang,105803,141,"[Clang, Gcc, Icc]","[0.997, 0.706, 0.071]",gcc,8.2.0,x86_64,O0,ranlib,False
248,clang,79943,78,"[Clang, Gcc, Icc]","[0.992, 0.538, 0.000]",gcc,8.2.0,x86_64,O0,readelf,False
249,clang,103911,108,"[Clang, Gcc, Icc]","[0.996, 0.702, 0.077]",gcc,8.2.0,x86_64,O0,size,False
250,clang,106501,140,"[Clang, Gcc, Icc]","[0.997, 0.700, 0.077]",gcc,8.2.0,x86_64,O0,strings,False


In [133]:
df[df.correct == True].correct.shape[0] / df.shape[0]

0.7063492063492064

In [134]:
from itertools import product

archs = ['x86_64', 'x86_32']
opts = ['O0', 'O1', 'O2', 'O3']
compilers = {'gcc':['4.9.4', '5.5.0', '6.4.0', '7.3.0', '8.2.0'], 'clang':['4.0', '5.0', '6.0', '7.0'], 'icc':['19.0.4']}

columns = list(product(archs, opts))
rows = []
for compiler, versions in compilers.items():
    rows.extend([(compiler, version) for version in versions])
accuracies = []
for compiler, version in rows:
    col_accuracies = []
    for arch, opt in columns:
        d = df[(df.compiler == compiler) & (df.version == version) & (df.opt == opt) & (df.arch == arch)]
        if d.shape[0] == 0:
            col_accuracies.append(-1)
            continue
        accuracy = d[d.correct == True].shape[0] / d.shape[0]
        col_accuracies.append(accuracy)
    accuracies.append(col_accuracies)

df_acc = pd.DataFrame(accuracies)
print(df_acc.to_string(index=False))


    0  1  2  3     4  5  6  7
 0.00 -1 -1 -1  1.00 -1 -1 -1
 0.00 -1 -1 -1  1.00 -1 -1 -1
 0.00 -1 -1 -1  1.00 -1 -1 -1
 0.00 -1 -1 -1  1.00 -1 -1 -1
 0.00 -1 -1 -1  1.00 -1 -1 -1
 0.93 -1 -1 -1  1.00 -1 -1 -1
 0.93 -1 -1 -1  1.00 -1 -1 -1
 0.93 -1 -1 -1  1.00 -1 -1 -1
 0.93 -1 -1 -1  1.00 -1 -1 -1
-1.00 -1 -1 -1 -1.00 -1 -1 -1


In [135]:
times = ['b2r2_time', 'infer_time']
rows = ['gcc', 'clang', 'icc']
columns = list(product(opts, times))

time_results = []
for compiler in rows:
    col_times = []
    for opt, time in columns:
        d = df[(df.compiler == compiler) & (df.opt == opt)]
        means = d.mean()
        col_times.append(means[time])
    time_results.append(col_times)

df_time = pd.DataFrame(time_results)
print(df_time.to_string(float_format='%d', index=False))

    0   1   2   3   4   5   6   7
93253 112 NaN NaN NaN NaN NaN NaN
96883 120 NaN NaN NaN NaN NaN NaN
  NaN NaN NaN NaN NaN NaN NaN NaN


In [136]:
import warnings
from sklearn.metrics import classification_report
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning)

archs = ['x86_64', 'x86_32']
opts = ['O0', 'O1', 'O2', 'O3']
columns = list(product(archs, opts))
rows = ['gcc', 'clang', 'icc']

cls_reports = []
for arch, opt in columns:
    d = df[(df.opt == opt) & (df.arch == arch)]
    y_pred = d.result.values
    y_true = d.compiler.values
    cls_report = classification_report(y_true, y_pred, labels=rows, output_dict=True)
    cls_reports.append(cls_report)

reports = []
for compiler in compilers:
    col_reports = []
    for cls_report in cls_reports:
        compiler_report = cls_report[compiler]
        report = compiler_report['precision'], compiler_report['recall'], compiler_report['f1-score']
        col_reports.append(report)
    reports.append(col_reports)
        
df_report = pd.DataFrame(reports)
print(df_report.to_string(float_format='%.2f', index=False))

                                                            0                1                2                3                4                5                6                7
                                              (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (1.0, 1.0, 1.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)
 (0.4262295081967213, 0.9285714285714286, 0.5842696629213483)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (1.0, 1.0, 1.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)
                                              (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)  (0.0, 0.0, 0.0)


In [137]:
pd.options.display.float_format = lambda x: '{:f}'.format(x)
s_probs = []
for i in range(3):
    s_prob = df[df.correct == False].probabilities.map(lambda x: pd.to_numeric(x[i]))
    s_probs.append(s_prob)
s_probs = pd.DataFrame(s_probs)

s_probs.mean()

18    0.333333
46    0.333333
74    0.333333
102   0.333333
126   0.593667
        ...   
247   0.591333
248   0.510000
249   0.591667
250   0.591333
251   0.593000
Length: 74, dtype: float64