# Correctness checkings

In [None]:
import os
import sys
import subprocess
import pandas as pnd

In [2]:
FOLDER_CP=".."
DIFF_FILE="../.branch_diff"
RES_CMP="../RES.csv"

- `get_sym_set` takes as input an object file and returns all of its symbols as a set.

In [3]:
def get_sym_set(obj):
    if not os.path.isfile(obj):
        print(obj, "does not exist")
        return {}
    cmd = "nm {}".format(obj) + " | awk '{print $3}'"
    return\
        {sym for sym in subprocess\
         .check_output(cmd, shell=True)\
         .decode("utf-8")\
         .split('\n')\
         if sym}

- `cmp_sym` compares two sets of symbols and returns the difference (additions/substractions) as a dictionary

In [4]:
def cmp_sym(syms1, syms2, verbose=False):
    res = {"add": [], "sub": []}
    for s1 in syms1:
        if s1 not in syms2:
            res["sub"].append(s1)
            if verbose:
                print("-", s1)
    for s2 in syms2:
        if s2 not in syms1:
            res["add"].append(s1)
            if verbose:
                print("+", s2)
    if verbose:
        if not res["add"] and not res["sub"]:
            print("SAME SYMBOLS")
    return res

- `bcmp` compares (size and symbols) the produced binary of a build of two branches of a git repository.
    - `git_repo`: git repository
    - `b1, b2`:   branches to compare
    - `btc_name`: path to the produced binary
    - `out_csv`:  the output file of the result

In [55]:
def bcmp(git_repo, b1, b2, btc_name, out_csv):
    os.chdir(git_repo)
    os.system("git checkout {}".format(b1))
    has_binary = os.path.isfile(btc_name)
    os.system("git checkout {}".format(b2))
    has_binary = has_binary and os.path.isfile(btc_name)
    res_s = "{},{},-\n".format(b1, b2)
    if has_binary:
        curr_file = btc_name
        btc_name_without_path = btc_name.split('/')[-1]
        other_file = "../.{}".format(btc_name_without_path)
        os.system("git checkout {} && cp {} {}".format(b2, btc_name, other_file))
        os.system("git checkout {}".format(b1))
        cmp_res_dict = cmp_sym(get_sym_set(curr_file), get_sym_set(other_file))
        curr_size, other_size = os.path.getsize(curr_file),\
                                    os.path.getsize(other_file)
        val = not cmp_res_dict["add"] and not cmp_res_dict["sub"] and curr_size == other_size
        os.system("rm -f {}".format(other_file))
        res_s = "{},{},{}\n".format(b1, b2, val)
    with open(out_csv, 'a') as out:
        out.write(res_s)
    os.chdir("..")

- `test` takes a folder (which is a git repo), respectively the prefix of the clean build nad incremental build and the path to the produced binary file. Then it will the produced binary for every branches.

In [49]:
def test(folder, clean_prefix, incre_prefix, btc):
    out_csv = "data/{}".format(folder)
    if out_csv[-1] == "/":
        out_csv = out_csv[:-1]
    out_csv += ".csv"
    with open(out_csv, 'w') as out:
        out.write("config1,config2,val\n")
    for i in range(1, 21):
        clean = "{}-{:05d}".format(clean_prefix, i)
        incre = ""
        for j in range(1, 21):
            if j == i:
                continue
            else:
                incre = "{}-{:05d}-{:05d}".format(incre_prefix, j, i)
            bcmp(folder, clean, incre, btc, "../{}".format(out_csv))

- To run the correctness test, you need to build all configurations first then get the data locally. To run it on a system, x264 for instance, you can run the following line:
```
test("x264-sample03I", "x264", "ix264", "x264")
```

- The cell below shows how to call test on every tested projects

In [None]:
test("x264-sample03I", "x264", "ix264", "x264")
test("x264-sample04I", "x264", "ix264", "x264")
test("sqlite-sample03I", "sqlite", "isqlite", "sqlite3")
test("sqlite-sample04I", "sqlite", "isqlite", "sqlite3")
test("curl_sample03I", "curl", "icurl", "src/curl")
test("curl_sample04I", "curl", "icurl", "src/curl")
test("xterm-sample03I", "xterm", "ixterm", "xterm")
test("xterm-sample04I", "xterm", "ixterm", "xterm")