# Experiment 2: Change Sensitivity

**Function to inject**

```c

#include <stdlib.h>

int peaclab_math() {
    volatile int num;
    volatile int nabs;
    volatile div_t ndiv;
    volatile int result;
    volatile int array[16];
    unsigned short int i;
    
    for (i=0; i < 16; i++) { /* LP-CC (i=0; i < 15; i++) */
        num = rand() / (RAND_MAX/360);
        array[i] = (i > 0) ? (array[i-1] + num) : num;
    }
    
    /* IF-APC: if (num % 2 == 0) */
    if (num > 100 ) { /* IF-CC: num == 100 */
        nabs = abs(num + 1);
    } else {
        nabs = abs(num - 1);
    }
    
    ndiv = div(nabs, 7); /* MC-DAP: div(nabs, 14) */
    result = 2*(num * ndiv.quot) - nabs;

    return result;
}

```

**Methodology**
1. Inject custom function into at least one application.
2. Compile each application repeatedly, each time applying one mutation by itself.
3. Fingerprint function. If fingerprint changes, measure distance and stop. If no changes, continue.
4. Compile each application repeatedly, each time applying every permutation of *n* mutations (*n* defaults to 2).
5. Fingerprint function. If fingerprint changes, measure distance and stop. If no changes, *n*++ and repeat Step 4.

**Mutations (see comments in code above)**
1. `ORIG` (no changes)
1. `LP-CC` 
2. `IF-APC`
3. `IF-CC`
4. `MC-DAP`

For diffutils/src/diff.c
 - [x] ORIG
 - [x] LP-CC
 - [x] IF-APC
 - [x] IF-CC
 - [x] MC-DAP



## Level 1 (1 mutation at a time)
First check all the files exist

In [1]:
import os
mutations = ["ORIG", "LP-CC", "IF-APC", "IF-CC", "MC-DAP"]
level1_bin_paths = list(["../../dataset-gen-injected/sensitivity/diffutils-3.7.{}/src/diff".format(x) for x in mutations])
display(level1_bin_paths)
for bp in level1_bin_paths:
    assert(os.path.isfile(bp))

['../../dataset-gen-injected/sensitivity/diffutils-3.7.ORIG/src/diff',
 '../../dataset-gen-injected/sensitivity/diffutils-3.7.LP-CC/src/diff',
 '../../dataset-gen-injected/sensitivity/diffutils-3.7.IF-APC/src/diff',
 '../../dataset-gen-injected/sensitivity/diffutils-3.7.IF-CC/src/diff',
 '../../dataset-gen-injected/sensitivity/diffutils-3.7.MC-DAP/src/diff']

Then fingerprint'em

In [3]:
import ace
import pandas as pd
import time
import os

def get_src_dir(bin_path, dataset_path):
    dn = os.path.dirname(bin_path)
    if os.path.abspath(dn) == os.path.abspath(dataset_path):
        return os.path.abspath(bin_path)
    elif dn == "/":
        return None
    else:
        return get_src_dir(dn, dataset_path)
        

level1_profiles=[]
level1_runtimes={}
for bin_path in level1_bin_paths:
    if os.path.isfile(bin_path):
        src_path = get_src_dir(bin_path, "../../dataset-gen-injected/sensitivity")
        start = time.monotonic()
        print("Profiling {} ({} KB)".format(bin_path, os.path.getsize(bin_path)/1000))
        level1_profiles.append(ace.full_profile(bin_path, 
                                               threads = 8, 
                                               ins_sort = True, 
                                               src_only = True, 
                                               src_path = src_path
                                              )
                             )
        level1_runtimes[bin_path] = time.monotonic() - start

Profiling ../../dataset-gen-injected/sensitivity/diffutils-3.7.ORIG/src/diff (967.168 KB)
Profiling ../../dataset-gen-injected/sensitivity/diffutils-3.7.LP-CC/src/diff (967.176 KB)
Profiling ../../dataset-gen-injected/sensitivity/diffutils-3.7.IF-APC/src/diff (967.128 KB)
Profiling ../../dataset-gen-injected/sensitivity/diffutils-3.7.IF-CC/src/diff (967.104 KB)
Profiling ../../dataset-gen-injected/sensitivity/diffutils-3.7.MC-DAP/src/diff (967.12 KB)


In [4]:
level1_fps = pd.concat(level1_profiles, ignore_index=True)
level1_fps

Unnamed: 0,binary,function,address,length,src_path,src_line,src_code,attributor,raw_bytes,fingerprint
0,../../dataset-gen-injected/sensitivity/diffuti...,__strftime_internal,70624,6375,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,447.0,"__strftime_internal (STREAM_OR_CHAR_T *s, STRF...",nm-t,b'AWAVI\x89\xffAUATH\x8d\x05\xde\x91\x01\x00US...,"[12, 1, -2147483648, -134217728, 0, 1, -214748..."
1,../../dataset-gen-injected/sensitivity/diffuti...,add_exclude,61312,878,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,518.0,"add_exclude (struct exclude *ex, char const *p...",nm-T,b'AWAVAUATI\x89\xf5US\x89\xd5I\x89\xfcH\x83\xe...,"[8, -2147483647, 2147483647, 134217727, 0, -21..."
2,../../dataset-gen-injected/sensitivity/diffuti...,add_exclude_file,62736,167,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,676.0,add_exclude_file (void (*add_func) (struct exc...,nm-T,b'ATUE\x89\xc4SH\x89\xf5H\x83\xec\x10\x80:-H\x...,"[16, -2147483647, 173, 0, -46, -129, -46, 0, 8..."
3,../../dataset-gen-injected/sensitivity/diffuti...,add_exclude_fp,62192,531,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,611.0,add_exclude_fp (void (*add_func) (struct exclu...,nm-T,"b""AWAVI\x89\xd7AUATI\x89\xf5USD\x89\xc5E1\xf61...","[72, -2147483647, -2147483648, -134217728, 0, ..."
4,../../dataset-gen-injected/sensitivity/diffuti...,add_regexp,30272,219,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,815.0,"add_regexp (struct regexp_list *reglist, char ...",nm-t,b'AVAUATUH\x89\xfdSH\x89\xf7H\x89\xf3\xe8\xfa\...,"[0, -2147483647, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,..."
...,...,...,...,...,...,...,...,...,...,...
1515,../../dataset-gen-injected/sensitivity/diffuti...,xstrcat,161984,323,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,33.0,"xstrcat (size_t argcount, va_list args)\n{\n ...",nm-t,"b""AWAVAUATUSH\x83\xec(dH\x8b\x04%(\x00\x00\x00...","[40, -2147483647, 2147483623, 134217726, 0, -2..."
1516,../../dataset-gen-injected/sensitivity/diffuti...,xstrdup,84752,19,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,119.0,xstrdup (char const *string)\n{\n return xmem...,nm-T,b'SH\x89\xfb\xe87\xe7\xfe\xffH\x89\xdfH\x8dp\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
1517,../../dataset-gen-injected/sensitivity/diffuti...,xvasprintf,162320,164,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,76.0,"xvasprintf (const char *format, va_list args)\...",nm-T,b'H\x83\xec\x18dH\x8b\x04%(\x00\x00\x00H\x89D$...,"[24, -2147483647, 2147483647, 134217727, 0, 0,..."
1518,../../dataset-gen-injected/sensitivity/diffuti...,xzalloc,84624,23,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,84.0,xzalloc (size_t s)\n{\n return memset (xmallo...,nm-T,b'SH\x89\xfb\xe8G\xfe\xff\xffH\x89\xda1\xf6H\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."


In [5]:
level1_fps["fingerprint_tuple"] = level1_fps.fingerprint.apply(lambda x: tuple(x))
peaclab_math_fps = level1_fps[level1_fps.function == "peaclab_math"].fingerprint_tuple.unique()
peaclab_math_fps

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

Well that's not super encouraging... It seems that all mutations result in the same fingerprint. And the same thing happens regardless of the value of `ins_sort` (although the 1 fingerprint is different when ins_sort changes, thankfully), and I also tried adding a call to `peaclab_math()` within the `main()` to no avail (see `../../dataset-gen-injected/sensitivity-with-call`

In [76]:
import Levenshtein as lev
pm_mask = (level1_fps.function == "peaclab_math")
level1_raw_bytes = list(level1_fps[pm_mask].raw_bytes)
orig_raw_bytes = list(level1_fps[pm_mask & (level1_fps.binary == "../../dataset-gen-injected/sensitivity/diffutils-3.7.ORIG/src/diff")].raw_bytes)[0]
level1_raw_bytes_noorig = set(level1_raw_bytes) - set([orig_raw_bytes])

level1_lev_distances = []
for rb in level1_raw_bytes_noorig:
    display("size0={} size1={}".format(len(orig_raw_bytes), len(rb)))
    level1_lev_distances.append(lev.distance(orig_raw_bytes, rb))
    print(lev.distance(orig_raw_bytes, rb))
    
print("min={} avg={} max={}".format(min(level1_lev_distances), sum(level1_lev_distances)/len(level1_lev_distances), max(level1_lev_distances)))

'size0=226 size1=227'

35


'size0=226 size1=226'

1


'size0=226 size1=226'

1


'size0=226 size1=242'

22
min=1 avg=14.75 max=35


## Level 2
Now we try two at a time...

In [6]:
from itertools import combinations
level2_combos = list(combinations(["LP-CC", "IF-APC", "IF-CC", "MC-DAP"], 2))
level2_combos

[('LP-CC', 'IF-APC'),
 ('LP-CC', 'IF-CC'),
 ('LP-CC', 'MC-DAP'),
 ('IF-APC', 'IF-CC'),
 ('IF-APC', 'MC-DAP'),
 ('IF-CC', 'MC-DAP')]

Wanna see a really hacky way of writing a shell script?

In [7]:
bash_block="""cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.{0}.{1}
cd diffutils-3.7.{0}.{1}/src/
patch -uF3 < ../../../{0}.patch
patch -uF3 < ../../../{1}.patch
cd ../
make
cd ..
"""

for combo in level2_combos:
    print(bash_block.format(combo[0], combo[1]))

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.LP-CC.IF-APC
cd diffutils-3.7.LP-CC.IF-APC/src/
patch -uF3 < ../../../LP-CC.patch
patch -uF3 < ../../../IF-APC.patch
cd ../
make
cd ..

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.LP-CC.IF-CC
cd diffutils-3.7.LP-CC.IF-CC/src/
patch -uF3 < ../../../LP-CC.patch
patch -uF3 < ../../../IF-CC.patch
cd ../
make
cd ..

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.LP-CC.MC-DAP
cd diffutils-3.7.LP-CC.MC-DAP/src/
patch -uF3 < ../../../LP-CC.patch
patch -uF3 < ../../../MC-DAP.patch
cd ../
make
cd ..

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.IF-APC.IF-CC
cd diffutils-3.7.IF-APC.IF-CC/src/
patch -uF3 < ../../../IF-APC.patch
patch -uF3 < ../../../IF-CC.patch
cd ../
make
cd ..

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.IF-APC.MC-DAP
cd diffutils-3.7.IF-APC.MC-DAP/src/
patch -uF3 < ../../../IF-APC.patch
patch -uF3 < ../../../MC-DAP.patch
cd ../
make
cd ..

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.IF-CC.MC-DAP
cd diffutils-3.7.IF-CC.MC-DAP/src/
patch -u

In [8]:
level2_bin_paths = list(["../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.{}.{}/src/diff".format(x[0], x[1]) for x in level2_combos])
display(level2_bin_paths)
for bp in level2_bin_paths:
    assert(os.path.isfile(bp))

['../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.LP-CC.IF-APC/src/diff',
 '../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.LP-CC.IF-CC/src/diff',
 '../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.LP-CC.MC-DAP/src/diff',
 '../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.IF-APC.IF-CC/src/diff',
 '../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.IF-APC.MC-DAP/src/diff',
 '../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.IF-CC.MC-DAP/src/diff']

In [9]:
level2_profiles=[]
level2_runtimes={}
for bin_path in level2_bin_paths:
    if os.path.isfile(bin_path):
        src_path = get_src_dir(bin_path, "../../dataset-gen-injected/sensitivity/level2")
        start = time.monotonic()
        print("Profiling {} ({} KB)".format(bin_path, os.path.getsize(bin_path)/1000))
        level2_profiles.append(ace.full_profile(bin_path, 
                                               threads = 8, 
                                               ins_sort = True, 
                                               src_only = True, 
                                               src_path = src_path
                                              )
                             )
        level2_runtimes[bin_path] = time.monotonic() - start

Profiling ../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.LP-CC.IF-APC/src/diff (967.256 KB)
Profiling ../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.LP-CC.IF-CC/src/diff (967.256 KB)
Profiling ../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.LP-CC.MC-DAP/src/diff (967.256 KB)
Profiling ../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.IF-APC.IF-CC/src/diff (967.232 KB)
Profiling ../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.IF-APC.MC-DAP/src/diff (967.256 KB)
Profiling ../../dataset-gen-injected/sensitivity/level2/diffutils-3.7.IF-CC.MC-DAP/src/diff (967.232 KB)


In [10]:
level2_fps = pd.concat(level2_profiles, ignore_index=True)
level2_fps

Unnamed: 0,binary,function,address,length,src_path,src_line,src_code,attributor,raw_bytes,fingerprint
0,../../dataset-gen-injected/sensitivity/level2/...,__strftime_internal,70640,6375,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,447.0,"__strftime_internal (STREAM_OR_CHAR_T *s, STRF...",nm-t,b'AWAVI\x89\xffAUATH\x8d\x05\xee\x91\x01\x00US...,"[12, 1, -2147483648, -134217728, 0, 1, -214748..."
1,../../dataset-gen-injected/sensitivity/level2/...,add_exclude,61328,878,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,518.0,"add_exclude (struct exclude *ex, char const *p...",nm-T,b'AWAVAUATI\x89\xf5US\x89\xd5I\x89\xfcH\x83\xe...,"[8, -2147483647, 2147483647, 134217727, 0, -21..."
2,../../dataset-gen-injected/sensitivity/level2/...,add_exclude_file,62752,167,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,676.0,add_exclude_file (void (*add_func) (struct exc...,nm-T,b'ATUE\x89\xc4SH\x89\xf5H\x83\xec\x10\x80:-H\x...,"[16, -2147483647, 173, 0, -46, -129, -46, 0, 8..."
3,../../dataset-gen-injected/sensitivity/level2/...,add_exclude_fp,62208,531,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,611.0,add_exclude_fp (void (*add_func) (struct exclu...,nm-T,"b""AWAVI\x89\xd7AUATI\x89\xf5USD\x89\xc5E1\xf61...","[72, -2147483647, -2147483648, -134217728, 0, ..."
4,../../dataset-gen-injected/sensitivity/level2/...,add_regexp,30272,219,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,815.0,"add_regexp (struct regexp_list *reglist, char ...",nm-t,b'AVAUATUH\x89\xfdSH\x89\xf7H\x89\xf3\xe8\xfa\...,"[0, -2147483647, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,..."
...,...,...,...,...,...,...,...,...,...,...
1819,../../dataset-gen-injected/sensitivity/level2/...,xstrcat,161984,323,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,33.0,"xstrcat (size_t argcount, va_list args)\n{\n ...",nm-t,"b""AWAVAUATUSH\x83\xec(dH\x8b\x04%(\x00\x00\x00...","[40, -2147483647, 2147483623, 134217726, 0, -2..."
1820,../../dataset-gen-injected/sensitivity/level2/...,xstrdup,84752,19,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,119.0,xstrdup (char const *string)\n{\n return xmem...,nm-T,b'SH\x89\xfb\xe87\xe7\xfe\xffH\x89\xdfH\x8dp\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
1821,../../dataset-gen-injected/sensitivity/level2/...,xvasprintf,162320,164,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,76.0,"xvasprintf (const char *format, va_list args)\...",nm-T,b'H\x83\xec\x18dH\x8b\x04%(\x00\x00\x00H\x89D$...,"[24, -2147483647, 2147483647, 134217727, 0, 0,..."
1822,../../dataset-gen-injected/sensitivity/level2/...,xzalloc,84624,23,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,84.0,xzalloc (size_t s)\n{\n return memset (xmallo...,nm-T,b'SH\x89\xfb\xe8G\xfe\xff\xffH\x89\xda1\xf6H\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."


In [11]:
level2_fps["fingerprint_tuple"] = level2_fps.fingerprint.apply(lambda x: tuple(x))
level2_peaclab_math_fps = level2_fps[level2_fps.function == "peaclab_math"].fingerprint_tuple.unique()
display(level2_peaclab_math_fps)
display(peaclab_math_fps)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

Sooo again, no change...

*cries in Markdown*

In [78]:
pm_mask = (level2_fps.function == "peaclab_math")
level2_raw_bytes = list(level2_fps[pm_mask].raw_bytes)
#orig_raw_bytes = list(level1_fps[pm_mask & (level1_fps.binary == "../../dataset-gen-injected/sensitivity/diffutils-3.7.ORIG/src/diff")].raw_bytes)[0]
level2_raw_bytes_noorig = set(level2_raw_bytes) - set([orig_raw_bytes])

level2_lev_distances = []
for rb in level2_raw_bytes_noorig:
    display("size0={} size1={}".format(len(orig_raw_bytes), len(rb)))
    level2_lev_distances.append(lev.distance(orig_raw_bytes, rb))
    print(lev.distance(orig_raw_bytes, rb))
    
print("min={} avg={} max={}".format(min(level2_lev_distances), sum(level2_lev_distances)/len(level2_lev_distances), max(level2_lev_distances)))

'size0=226 size1=227'

36


'size0=226 size1=226'

2


'size0=226 size1=226'

2


'size0=226 size1=242'

23


'size0=226 size1=242'

23


'size0=226 size1=235'

44
min=2 avg=21.666666666666668 max=44


## Level 3

In [51]:
level3_combos = list(combinations(["LP-CC", "IF-APC", "IF-CC", "MC-DAP"], 3))
level3_combos

[('LP-CC', 'IF-APC', 'IF-CC'),
 ('LP-CC', 'IF-APC', 'MC-DAP'),
 ('LP-CC', 'IF-CC', 'MC-DAP'),
 ('IF-APC', 'IF-CC', 'MC-DAP')]

In [52]:
bash_block="""cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.{0}.{1}.{2}
cd diffutils-3.7.{0}.{1}.{2}/src/
patch -uF3 < ../../../{0}.patch
patch -uF3 < ../../../{1}.patch
patch -uF3 < ../../../{2}.patch
cd ../
make
cd ..
"""

for combo in level3_combos:
    print(bash_block.format(combo[0], combo[1], combo[2]))

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.LP-CC.IF-APC.IF-CC
cd diffutils-3.7.LP-CC.IF-APC.IF-CC/src/
patch -uF3 < ../../../LP-CC.patch
patch -uF3 < ../../../IF-APC.patch
patch -uF3 < ../../../IF-CC.patch
cd ../
make
cd ..

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.LP-CC.IF-APC.MC-DAP
cd diffutils-3.7.LP-CC.IF-APC.MC-DAP/src/
patch -uF3 < ../../../LP-CC.patch
patch -uF3 < ../../../IF-APC.patch
patch -uF3 < ../../../MC-DAP.patch
cd ../
make
cd ..

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.LP-CC.IF-CC.MC-DAP
cd diffutils-3.7.LP-CC.IF-CC.MC-DAP/src/
patch -uF3 < ../../../LP-CC.patch
patch -uF3 < ../../../IF-CC.patch
patch -uF3 < ../../../MC-DAP.patch
cd ../
make
cd ..

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.IF-APC.IF-CC.MC-DAP
cd diffutils-3.7.IF-APC.IF-CC.MC-DAP/src/
patch -uF3 < ../../../IF-APC.patch
patch -uF3 < ../../../IF-CC.patch
patch -uF3 < ../../../MC-DAP.patch
cd ../
make
cd ..



In [53]:
level3_bin_paths = list(["../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.{}.{}.{}/src/diff".format(x[0], x[1], x[2]) for x in level3_combos])
display(level3_bin_paths)
for bp in level3_bin_paths:
    assert(os.path.isfile(bp))

['../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.LP-CC.IF-APC.IF-CC/src/diff',
 '../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.LP-CC.IF-APC.MC-DAP/src/diff',
 '../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.LP-CC.IF-CC.MC-DAP/src/diff',
 '../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.IF-APC.IF-CC.MC-DAP/src/diff']

In [54]:
level3_profiles=[]
level3_runtimes={}
for bin_path in level3_bin_paths:
    if os.path.isfile(bin_path):
        src_path = get_src_dir(bin_path, "../../dataset-gen-injected/sensitivity/level3")
        start = time.monotonic()
        print("Profiling {} ({} KB)".format(bin_path, os.path.getsize(bin_path)/1000))
        level3_profiles.append(ace.full_profile(bin_path, 
                                               threads = 8, 
                                               ins_sort = True, 
                                               src_only = True, 
                                               src_path = src_path
                                              )
                             )
        level3_runtimes[bin_path] = time.monotonic() - start

Profiling ../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.LP-CC.IF-APC.IF-CC/src/diff (967.24 KB)
Profiling ../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.LP-CC.IF-APC.MC-DAP/src/diff (967.264 KB)
Profiling ../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.LP-CC.IF-CC.MC-DAP/src/diff (967.256 KB)
Profiling ../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.IF-APC.IF-CC.MC-DAP/src/diff (967.24 KB)


In [55]:
level3_fps = pd.concat(level3_profiles, ignore_index=True)
level3_fps

Unnamed: 0,binary,function,address,length,src_path,src_line,src_code,attributor,raw_bytes,fingerprint
0,../../dataset-gen-injected/sensitivity/level3/...,__strftime_internal,70624,6375,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,447.0,"__strftime_internal (STREAM_OR_CHAR_T *s, STRF...",nm-t,b'AWAVI\x89\xffAUATH\x8d\x05\xde\x91\x01\x00US...,"[12, 1, -2147483648, -134217728, 0, 1, -214748..."
1,../../dataset-gen-injected/sensitivity/level3/...,add_exclude,61312,878,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,518.0,"add_exclude (struct exclude *ex, char const *p...",nm-T,b'AWAVAUATI\x89\xf5US\x89\xd5I\x89\xfcH\x83\xe...,"[8, -2147483647, 2147483647, 134217727, 0, -21..."
2,../../dataset-gen-injected/sensitivity/level3/...,add_exclude_file,62736,167,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,676.0,add_exclude_file (void (*add_func) (struct exc...,nm-T,b'ATUE\x89\xc4SH\x89\xf5H\x83\xec\x10\x80:-H\x...,"[16, -2147483647, 173, 0, -46, -129, -46, 0, 8..."
3,../../dataset-gen-injected/sensitivity/level3/...,add_exclude_fp,62192,531,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,611.0,add_exclude_fp (void (*add_func) (struct exclu...,nm-T,"b""AWAVI\x89\xd7AUATI\x89\xf5USD\x89\xc5E1\xf61...","[72, -2147483647, -2147483648, -134217728, 0, ..."
4,../../dataset-gen-injected/sensitivity/level3/...,add_regexp,30272,219,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,815.0,"add_regexp (struct regexp_list *reglist, char ...",nm-t,b'AVAUATUH\x89\xfdSH\x89\xf7H\x89\xf3\xe8\xfa\...,"[0, -2147483647, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,..."
...,...,...,...,...,...,...,...,...,...,...
1211,../../dataset-gen-injected/sensitivity/level3/...,xstrcat,161984,323,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,33.0,"xstrcat (size_t argcount, va_list args)\n{\n ...",nm-t,"b""AWAVAUATUSH\x83\xec(dH\x8b\x04%(\x00\x00\x00...","[40, -2147483647, 2147483623, 134217726, 0, -2..."
1212,../../dataset-gen-injected/sensitivity/level3/...,xstrdup,84752,19,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,119.0,xstrdup (char const *string)\n{\n return xmem...,nm-T,b'SH\x89\xfb\xe87\xe7\xfe\xffH\x89\xdfH\x8dp\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
1213,../../dataset-gen-injected/sensitivity/level3/...,xvasprintf,162320,164,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,76.0,"xvasprintf (const char *format, va_list args)\...",nm-T,b'H\x83\xec\x18dH\x8b\x04%(\x00\x00\x00H\x89D$...,"[24, -2147483647, 2147483647, 134217727, 0, 0,..."
1214,../../dataset-gen-injected/sensitivity/level3/...,xzalloc,84624,23,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,84.0,xzalloc (size_t s)\n{\n return memset (xmallo...,nm-T,b'SH\x89\xfb\xe8G\xfe\xff\xffH\x89\xda1\xf6H\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."


In [56]:
level3_fps["fingerprint_tuple"] = level3_fps.fingerprint.apply(lambda x: tuple(x))
level3_peaclab_math_fps = level3_fps[level3_fps.function == "peaclab_math"].fingerprint_tuple.unique()
display(level3_peaclab_math_fps)
display(level2_peaclab_math_fps)
display(peaclab_math_fps)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

Still no change :/

Let's look around a little bit to ensure what we're seeing is correct

In [57]:
# First let's verify the raw bytes are changing
len(level3_fps[level3_fps.function == "peaclab_math"].raw_bytes.unique()) == len(level3_bin_paths)

True

In [58]:
# Then let's verify the code is changing
len(level3_fps[level3_fps.function == "peaclab_math"].src_code.unique()) == len(level3_bin_paths)

True

In [59]:
# Now let's visually verify the source code
for index, row in level3_fps[level3_fps.function == "peaclab_math"].iterrows():
    print(row['binary'])
    print(row['src_code'])

../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.LP-CC.IF-APC.IF-CC/src/diff
int peaclab_math() {
    volatile int num;
    volatile int nabs;
    volatile div_t ndiv;
    volatile int result;
    volatile int array[16];
    unsigned short int i;

    for (i=0; i < 15; i++) {
        num = rand() / (RAND_MAX/360);
        array[i] = (i > 0) ? (array[i-1] + num) : num;
    }

    if (num % 2 == 0)
    if (num == 100) {
        nabs = abs(num + 1);
    } else {
        nabs = abs(num - 1);
    }

    ndiv = div(nabs, 7); /* MC-DAP: div(nabs, 14) */
    result = 2*(num * ndiv.quot) - nabs;

    return result;
}

../../dataset-gen-injected/sensitivity/level3/diffutils-3.7.LP-CC.IF-APC.MC-DAP/src/diff
int peaclab_math() {
    volatile int num;
    volatile int nabs;
    volatile div_t ndiv;
    volatile int result;
    volatile int array[16];
    unsigned short int i;

    for (i=0; i < 15; i++) {
        num = rand() / (RAND_MAX/360);
        array[i] = (i > 0) ? (array[i-1] + n

Visual inspection checks out... Let's look at similarity of the byte strings

In [92]:
!pip3 install python-Levenshtein

Collecting python-Levenshtein
  Using cached https://files.pythonhosted.org/packages/42/a9/d1785c85ebf9b7dfacd08938dd028209c34a0ea3b1bcdb895208bd40a67d/python-Levenshtein-0.12.0.tar.gz
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25ldone
[?25h  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.0-cp38-cp38-linux_x86_64.whl size=145686 sha256=8339f0f6c65f780fd92faec234ed3986e37aa2dcf74b8800f1a2500df6773f9d
  Stored in directory: /home/ubuntu/.cache/pip/wheels/de/c2/93/660fd5f7559049268ad2dc6d81c4e39e9e36518766eaf7e342
Successfully built python-Levenshtein
Installing collected packages: python-Levenshtein
Successfully installed python-Levenshtein-0.12.0
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [60]:
level3_raw_bytes = list(level3_fps[level3_fps.function == "peaclab_math"].raw_bytes)
level3_raw_bytes

[b'US1\xed\xbb\xb5\x00\x00\xb4H\x83\xecxdH\x8b\x04%(\x00\x00\x00H\x89D$h1\xc0\xeb\x04\x90\x83\xc5\x01\xe8\xd8\xaf\xff\xff\x89\xc1\xf7\xeb\x01\xca\xc1\xf9\x1f\xc1\xfa\x16)\xca\x85\xed\x89T$\x0c\x0f\x84\x86\x00\x00\x00\x8dE\xffH\x98\x8bD\x84 \x8bT$\x0c\x01\xd0Hc\xd5f\x83\xfd\x0e\x89D\x94 u\xc2\x8bD$\x0c\xa8\x01u\x19\x8bD$\x0c\x83\xf8d\x8bD$\x0cte\x83\xe8\x01\x991\xd0)\xd0\x89D$\x10\x8b|$\x10\xbe\x07\x00\x00\x00\xe8\xe3\xac\xff\xffH\x89D$\x18\x8bD$\x18\x8bL$\x0c\x8bT$\x10\x0f\xaf\xc1\x01\xc0)\xd0\x89D$\x14\x8bD$\x14H\x8bt$hdH34%(\x00\x00\x00u)H\x83\xc4x[]\xc3\x0f\x1f@\x00\x8bD$\x0c\x89D$ \xe9K\xff\xff\xff\x0f\x1f\x00\x83\xc0\x01\x991\xd0)\xd0\x89D$\x10\xeb\x99\xe8\x85\xaa\xff\xff',
 b'US1\xed\xbb\xb5\x00\x00\xb4H\x83\xecxdH\x8b\x04%(\x00\x00\x00H\x89D$h1\xc0\xeb\x04\x90\x83\xc5\x01\xe8\xd8\xaf\xff\xff\x89\xc1\xf7\xeb\x01\xca\xc1\xf9\x1f\xc1\xfa\x16)\xca\x85\xed\x89T$\x0c\x0f\x84\x9e\x00\x00\x00\x8dE\xffH\x98\x8bD\x84 \x8bT$\x0c\x01\xd0Hc\xd5f\x83\xfd\x0e\x89D\x94 u\xc2\x8bD$\x0c\xa8\x01u\

In [61]:
import Levenshtein as lev

level3_lev_distances = []
for rb_combo in combinations(level3_raw_bytes, 2):
    display("size0={} size1={}".format(len(rb_combo[0]), len(rb_combo[1])))
    display(lev.distance(rb_combo[0], rb_combo[1]))
    print(lev.distance(rb_combo[0], rb_combo[1]))

'size0=235 size1=242'

35

35


'size0=235 size1=226'

44

44


'size0=235 size1=235'

2

2


'size0=242 size1=226'

23

23


'size0=242 size1=235'

35

35


'size0=226 size1=235'

44

44


So the changes are relatively small: for blobs with sizes between 226 and 242 bytes, the distances ranged from 2 bytes to 44 bytes. That's roughly 1% to 19% changed.

Welp, I guess on to level 4

In [79]:
pm_mask = (level3_fps.function == "peaclab_math")
level3_raw_bytes = list(level3_fps[pm_mask].raw_bytes)
#orig_raw_bytes = list(level1_fps[pm_mask & (level1_fps.binary == "../../dataset-gen-injected/sensitivity/diffutils-3.7.ORIG/src/diff")].raw_bytes)[0]
level3_raw_bytes_noorig = set(level3_raw_bytes) - set([orig_raw_bytes])

level3_lev_distances = []
for rb in level3_raw_bytes_noorig:
    display("size0={} size1={}".format(len(orig_raw_bytes), len(rb)))
    level3_lev_distances.append(lev.distance(orig_raw_bytes, rb))
    print(lev.distance(orig_raw_bytes, rb))
    
print("min={} avg={} max={}".format(min(level3_lev_distances), sum(level3_lev_distances)/len(level3_lev_distances), max(level3_lev_distances)))

'size0=226 size1=226'

3


'size0=226 size1=235'

45


'size0=226 size1=242'

24


'size0=226 size1=235'

45
min=3 avg=29.25 max=45


## Level 4

In [62]:
level4_combos = list(combinations(["LP-CC", "IF-APC", "IF-CC", "MC-DAP"], 4))
level4_combos

[('LP-CC', 'IF-APC', 'IF-CC', 'MC-DAP')]

In [63]:
bash_block="""cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.{0}.{1}.{2}.{3}
cd diffutils-3.7.{0}.{1}.{2}.{3}/src/
patch -uF3 < ../../../{0}.patch
patch -uF3 < ../../../{1}.patch
patch -uF3 < ../../../{2}.patch
patch -uF3 < ../../../{3}.patch
cd ../
make
cd ..
"""

for combo in level4_combos:
    print(bash_block.format(combo[0], combo[1], combo[2], combo[3]))

cp -a ../diffutils-3.7.ORIG ./diffutils-3.7.LP-CC.IF-APC.IF-CC.MC-DAP
cd diffutils-3.7.LP-CC.IF-APC.IF-CC.MC-DAP/src/
patch -uF3 < ../../../LP-CC.patch
patch -uF3 < ../../../IF-APC.patch
patch -uF3 < ../../../IF-CC.patch
patch -uF3 < ../../../MC-DAP.patch
cd ../
make
cd ..



In [64]:
level4_bin_paths = list(["../../dataset-gen-injected/sensitivity/level4/diffutils-3.7.{}.{}.{}.{}/src/diff".format(x[0], x[1], x[2], x[3]) for x in level4_combos])
display(level4_bin_paths)
for bp in level4_bin_paths:
    assert(os.path.isfile(bp))

['../../dataset-gen-injected/sensitivity/level4/diffutils-3.7.LP-CC.IF-APC.IF-CC.MC-DAP/src/diff']

In [65]:
level4_profiles=[]
level4_runtimes={}
for bin_path in level4_bin_paths:
    if os.path.isfile(bin_path):
        src_path = get_src_dir(bin_path, "../../dataset-gen-injected/sensitivity/level4")
        start = time.monotonic()
        print("Profiling {} ({} KB)".format(bin_path, os.path.getsize(bin_path)/1000))
        level4_profiles.append(ace.full_profile(bin_path, 
                                               threads = 8, 
                                               ins_sort = True, 
                                               src_only = True, 
                                               src_path = src_path
                                              )
                             )
        level4_runtimes[bin_path] = time.monotonic() - start

Profiling ../../dataset-gen-injected/sensitivity/level4/diffutils-3.7.LP-CC.IF-APC.IF-CC.MC-DAP/src/diff (967.248 KB)


In [66]:
level4_fps = pd.concat(level4_profiles, ignore_index=True)
level4_fps

Unnamed: 0,binary,function,address,length,src_path,src_line,src_code,attributor,raw_bytes,fingerprint
0,../../dataset-gen-injected/sensitivity/level4/...,__strftime_internal,70624,6375,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,447.0,"__strftime_internal (STREAM_OR_CHAR_T *s, STRF...",nm-t,b'AWAVI\x89\xffAUATH\x8d\x05\xde\x91\x01\x00US...,"[12, 1, -2147483648, -134217728, 0, 1, -214748..."
1,../../dataset-gen-injected/sensitivity/level4/...,add_exclude,61312,878,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,518.0,"add_exclude (struct exclude *ex, char const *p...",nm-T,b'AWAVAUATI\x89\xf5US\x89\xd5I\x89\xfcH\x83\xe...,"[8, -2147483647, 2147483647, 134217727, 0, -21..."
2,../../dataset-gen-injected/sensitivity/level4/...,add_exclude_file,62736,167,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,676.0,add_exclude_file (void (*add_func) (struct exc...,nm-T,b'ATUE\x89\xc4SH\x89\xf5H\x83\xec\x10\x80:-H\x...,"[16, -2147483647, 173, 0, -46, -129, -46, 0, 8..."
3,../../dataset-gen-injected/sensitivity/level4/...,add_exclude_fp,62192,531,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,611.0,add_exclude_fp (void (*add_func) (struct exclu...,nm-T,"b""AWAVI\x89\xd7AUATI\x89\xf5USD\x89\xc5E1\xf61...","[72, -2147483647, -2147483648, -134217728, 0, ..."
4,../../dataset-gen-injected/sensitivity/level4/...,add_regexp,30272,219,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,815.0,"add_regexp (struct regexp_list *reglist, char ...",nm-t,b'AVAUATUH\x89\xfdSH\x89\xf7H\x89\xf3\xe8\xfa\...,"[0, -2147483647, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,..."
...,...,...,...,...,...,...,...,...,...,...
299,../../dataset-gen-injected/sensitivity/level4/...,xstrcat,161984,323,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,33.0,"xstrcat (size_t argcount, va_list args)\n{\n ...",nm-t,"b""AWAVAUATUSH\x83\xec(dH\x8b\x04%(\x00\x00\x00...","[40, -2147483647, 2147483623, 134217726, 0, -2..."
300,../../dataset-gen-injected/sensitivity/level4/...,xstrdup,84752,19,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,119.0,xstrdup (char const *string)\n{\n return xmem...,nm-T,b'SH\x89\xfb\xe87\xe7\xfe\xffH\x89\xdfH\x8dp\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
301,../../dataset-gen-injected/sensitivity/level4/...,xvasprintf,162320,164,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,76.0,"xvasprintf (const char *format, va_list args)\...",nm-T,b'H\x83\xec\x18dH\x8b\x04%(\x00\x00\x00H\x89D$...,"[24, -2147483647, 2147483647, 134217727, 0, 0,..."
302,../../dataset-gen-injected/sensitivity/level4/...,xzalloc,84624,23,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,84.0,xzalloc (size_t s)\n{\n return memset (xmallo...,nm-T,b'SH\x89\xfb\xe8G\xfe\xff\xffH\x89\xda1\xf6H\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."


In [67]:
level4_fps["fingerprint_tuple"] = level4_fps.fingerprint.apply(lambda x: tuple(x))
level4_peaclab_math_fps = level4_fps[level4_fps.function == "peaclab_math"].fingerprint_tuple.unique()
display(level4_peaclab_math_fps)
display(level3_peaclab_math_fps)
display(level2_peaclab_math_fps)
display(peaclab_math_fps)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

Well this is... bewildering...

I just tried manually executing a dummy program containing the function, and when I apply the various mutations, the output definitely changes. This should indicate a change in the effect of the function, and therefore a change in the fingerprint. But... that's just not happening here.

In [80]:
pm_mask = (level4_fps.function == "peaclab_math")
level4_raw_bytes = list(level4_fps[pm_mask].raw_bytes)
#orig_raw_bytes = list(level1_fps[pm_mask & (level1_fps.binary == "../../dataset-gen-injected/sensitivity/diffutils-3.7.ORIG/src/diff")].raw_bytes)[0]
level4_raw_bytes_noorig = set(level4_raw_bytes) - set([orig_raw_bytes])

level4_lev_distances = []
for rb in level4_raw_bytes_noorig:
    display("size0={} size1={}".format(len(orig_raw_bytes), len(rb)))
    level4_lev_distances.append(lev.distance(orig_raw_bytes, rb))
    print(lev.distance(orig_raw_bytes, rb))
    
print("min={} avg={} max={}".format(min(level4_lev_distances), sum(level4_lev_distances)/len(level4_lev_distances), max(level4_lev_distances)))

'size0=226 size1=235'

46
min=46 avg=46.0 max=46


# MAJOR


In [82]:
major_bin_path = "../../dataset-gen-injected/sensitivity/major/diffutils-3.7.MAJOR/src/diff"
major_src_path = "../../dataset-gen-injected/sensitivity/major/diffutils-3.7.MAJOR/src"
major_fps = ace.full_profile(major_bin_path, threads = 8, ins_sort = True, src_only = True,src_path = major_src_path)
major_fps

Unnamed: 0,binary,function,address,length,src_path,src_line,src_code,attributor,raw_bytes,fingerprint
2,../../dataset-gen-injected/sensitivity/major/d...,__strftime_internal,70656,6375,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,447.0,"__strftime_internal (STREAM_OR_CHAR_T *s, STRF...",nm-t,b'AWAVI\x89\xffAUATH\x8d\x05\xde\x91\x01\x00US...,"[12, 1, -2147483648, -134217728, 0, 1, -214748..."
4,../../dataset-gen-injected/sensitivity/major/d...,add_exclude,61344,878,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,518.0,"add_exclude (struct exclude *ex, char const *p...",nm-T,b'AWAVAUATI\x89\xf5US\x89\xd5I\x89\xfcH\x83\xe...,"[8, -2147483647, 2147483647, 134217727, 0, -21..."
5,../../dataset-gen-injected/sensitivity/major/d...,add_exclude_file,62768,167,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,676.0,add_exclude_file (void (*add_func) (struct exc...,nm-T,b'ATUE\x89\xc4SH\x89\xf5H\x83\xec\x10\x80:-H\x...,"[16, -2147483647, 173, 0, -46, -129, -46, 0, 8..."
6,../../dataset-gen-injected/sensitivity/major/d...,add_exclude_fp,62224,531,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,611.0,add_exclude_fp (void (*add_func) (struct exclu...,nm-T,"b""AWAVI\x89\xd7AUATI\x89\xf5USD\x89\xc5E1\xf61...","[72, -2147483647, -2147483648, -134217728, 0, ..."
7,../../dataset-gen-injected/sensitivity/major/d...,add_regexp,30272,219,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,815.0,"add_regexp (struct regexp_list *reglist, char ...",nm-t,b'AVAUATUH\x89\xfdSH\x89\xf7H\x89\xf3\xe8\xfa\...,"[0, -2147483647, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,..."
...,...,...,...,...,...,...,...,...,...,...
340,../../dataset-gen-injected/sensitivity/major/d...,xstrcat,162016,323,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,33.0,"xstrcat (size_t argcount, va_list args)\n{\n ...",nm-t,b'AWAVAUATUSH\x83\xec(dH\x8b\x04%(\x00\x00\x00...,"[40, -2147483647, 2147483623, 134217726, 0, -2..."
341,../../dataset-gen-injected/sensitivity/major/d...,xstrdup,84784,19,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,119.0,xstrdup (char const *string)\n{\n return xmem...,nm-T,b'SH\x89\xfb\xe8\x17\xe7\xfe\xffH\x89\xdfH\x8d...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."
342,../../dataset-gen-injected/sensitivity/major/d...,xvasprintf,162352,164,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,76.0,"xvasprintf (const char *format, va_list args)\...",nm-T,b'H\x83\xec\x18dH\x8b\x04%(\x00\x00\x00H\x89D$...,"[24, -2147483647, 2147483647, 134217727, 0, 0,..."
343,../../dataset-gen-injected/sensitivity/major/d...,xzalloc,84656,23,/home/ubuntu/ace/dataset-gen-injected/sensitiv...,84.0,xzalloc (size_t s)\n{\n return memset (xmallo...,nm-T,b'SH\x89\xfb\xe8G\xfe\xff\xffH\x89\xda1\xf6H\x...,"[-1, -2147483647, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0..."


In [87]:
major_fps["fingerprint_tuple"] = major_fps.fingerprint.apply(lambda x: tuple(x))
major_peaclab_math_fps = major_fps[major_fps.function == "peaclab_math"].fingerprint_tuple.unique()
display(major_peaclab_math_fps)
display(level4_peaclab_math_fps)
display(level3_peaclab_math_fps)
display(level2_peaclab_math_fps)
display(peaclab_math_fps)

array([(112, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

array([(120, -2147483647, -2147483648, -134217728, 0, -2147483648, 0, 40, 104, 0, 40, 2, 9626, 38505, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)],
      dtype=object)

In [85]:
pm_mask = (major_fps.function == "peaclab_math")
major_raw_bytes = list(major_fps[pm_mask].raw_bytes)
#orig_raw_bytes = list(level1_fps[pm_mask & (level1_fps.binary == "../../dataset-gen-injected/sensitivity/diffutils-3.7.ORIG/src/diff")].raw_bytes)[0]
major_raw_bytes_noorig = set(major_raw_bytes) - set([orig_raw_bytes])

major_lev_distances = []
for rb in major_raw_bytes_noorig:
    display("size0={} size1={}".format(len(orig_raw_bytes), len(rb)))
    major_lev_distances.append(lev.distance(orig_raw_bytes, rb))
    print(lev.distance(orig_raw_bytes, rb))
    
print("min={} avg={} max={}".format(min(major_lev_distances), sum(major_lev_distances)/len(major_lev_distances), max(major_lev_distances)))

'size0=226 size1=261'

108
min=108 avg=108.0 max=108


In [93]:
from scipy.spatial import distance
a = major_peaclab_math_fps[0]
b = level3_peaclab_math_fps[0]
dst = distance.euclidean(a, b)

In [94]:
dst

8.0