## Imports

In [64]:
from pathlib import Path
from typing import Callable, Any, Dict
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from natsort import natsorted

## Global information about each suite

In [65]:
runs  = {e: {} for e in Path("./out").iterdir()}

def add_entry_to_run(new_key: str, f: Callable[[Path, Dict[str, Any]], Any]) -> Dict[Path, Dict[str, Any]]:
    global runs
    for key, value in runs.items():
        value.update({new_key: f(key, value)})

add_entry_to_run("name", lambda p, d: p.name)

### Time

In [66]:
def get_time(p:Path, d):
    file_path = p.rglob("klee/info").__next__()
    if file_path.exists():
        with open(file_path, "r") as file:
            res = re.search(r"--max-time=(\w*)", file.read())
            if res:
                return res.group(1)
    return None

add_entry_to_run("time", get_time)

## Per util information

### Setup

In [67]:
add_entry_to_run("df", lambda p, d: pd.DataFrame(columns=[e.name for e in p.iterdir() if e.is_dir()]))

def add_entry_for_utils(key: str, f: Callable[[Path], Any]) -> None:
    """
    Add entry for all utils

    Paramenters:
    key (str): key to add the new value at in the dataframe
    f (Callable[[Path], Any]): function taking the path to the subfolder for the util and returning the appropriate value
    """

    def adder(p: Path, d):
        df = d["df"]
        res = {}
        for util in df.columns:
            path = p / util
            if not path.exists():
                raise Exception(f"Path \"{util}\" does not exist")
            res[util] = f(path)
        df.loc[key] = res
        return df
    add_entry_to_run("df", adder)

### Number of errors according to KLEE

In [68]:
def read_num_errors(util_path: Path) -> str:
    file_path = util_path / "klee"
    if file_path.exists():
        return str(len(list(file_path.glob("*.err"))))
    return None

add_entry_for_utils("num_errors", read_num_errors)

### Coverage according to KLEE

In [69]:
def read_klee_csv(csv_name: str) -> Callable[[Path], str]:
    def f(util_path: Path) -> str:
        file_path = util_path / "klee-stats.csv"
        if file_path.exists():
            df = pd.read_csv(file_path)
            return str(df[csv_name][0])
        return None
    return f

add_entry_for_utils("klee_ICov", read_klee_csv("ICov(%)"))
add_entry_for_utils("klee_BCov", read_klee_csv("BCov(%)"))

### Coverage according to `gcov`

In [70]:
def read_gcov_cov(util_path: Path) -> str:
    file_path = util_path / "cov.txt"
    if file_path.exists():
        with open(file_path, "r") as file:
            res = re.search(r"File '\.\./\.\./src/(\w+)\.c'\nLines executed:(\d?\d\d.\d\d)% of \d+", file.read())
            if res:
                return res.group(2)
    return None

add_entry_for_utils("gcov_cov", read_gcov_cov)

## Plots
### Massaging `df`s together

In [71]:
dfs = []
for k, v in runs.items():
    df = v["df"]
    df = df.reset_index(names="key")
    df = df.melt(id_vars="key", var_name="util")
    # .melt(id_vars="")
    df["DataFrame"] = str(k)
    df["time"] = v["time"]
    dfs.append(df)

combined_df = pd.concat(dfs)
combined_df['value'] = combined_df['value'].astype(np.float64)
combined_df = combined_df.dropna(subset=['value'])
combined_df = combined_df.reset_index(drop=True)
print(combined_df.head(15))

           key      util  value                 DataFrame     time
0   num_errors        ln   0.00  out/coreutils-6.10-24h-3  1440min
1    klee_ICov        ln  45.43  out/coreutils-6.10-24h-3  1440min
2    klee_BCov        ln  33.77  out/coreutils-6.10-24h-3  1440min
3     gcov_cov        ln  86.08  out/coreutils-6.10-24h-3  1440min
4   num_errors     shred   7.00  out/coreutils-6.10-24h-3  1440min
5    klee_ICov     shred  44.97  out/coreutils-6.10-24h-3  1440min
6    klee_BCov     shred  32.62  out/coreutils-6.10-24h-3  1440min
7     gcov_cov     shred  68.53  out/coreutils-6.10-24h-3  1440min
8   num_errors       pwd   0.00  out/coreutils-6.10-24h-3  1440min
9    klee_ICov       pwd  34.95  out/coreutils-6.10-24h-3  1440min
10   klee_BCov       pwd  23.03  out/coreutils-6.10-24h-3  1440min
11    gcov_cov       pwd  20.34  out/coreutils-6.10-24h-3  1440min
12  num_errors  unexpand   1.00  out/coreutils-6.10-24h-3  1440min
13   klee_ICov  unexpand  44.64  out/coreutils-6.10-24h-3  144

### Plots by util

In [83]:
for util in np.sort(combined_df["util"].unique()):
    filtered_by_util_df = combined_df[combined_df['util'] == util]
    keys = combined_df["key"].unique()
    fig, axes = plt.subplots(nrows=1, ncols=len(keys), figsize=(5*len(keys), 5), dpi=300)
    fig.suptitle(f"Plots for util \"{util}\"", fontsize=20)
    for i, key in enumerate(keys):
        filtered_by_key_df = filtered_by_util_df[filtered_by_util_df['key'] == key]
        axes[i].set_title(key)
        order = natsorted(filtered_by_key_df["time"].unique())
        sns.boxplot(x="time", y="value", data=filtered_by_key_df, ax=axes[i], order=order)
    plt.tight_layout()
    plt.savefig(f"plots/by-util/{util}.png")
    plt.close()
    print(f"Done with {util}")

Done with base64
Done with basename
Done with cat
Done with chcon
Done with chgrp
Done with chmod
Done with chown
Done with chroot
Done with cksum
Done with comm
Done with cp
Done with csplit
Done with cut
Done with date
Done with dd
Done with df
Done with dircolors
Done with dirname
Done with du
Done with echo
Done with env
Done with expand
Done with expr
Done with factor
Done with false
Done with fmt
Done with fold
Done with ginstall
Done with head
Done with hostid
Done with hostname
Done with id
Done with join
Done with kill
Done with link
Done with ln
Done with logname
Done with ls
Done with md5sum
Done with mkdir
Done with mkfifo
Done with mknod
Done with mktemp
Done with mv
Done with nice
Done with nl
Done with nohup
Done with od
Done with paste
Done with pathchk
Done with pinky
Done with pr
Done with printenv
Done with printf
Done with ptx
Done with pwd
Done with readlink
Done with rm
Done with rmdir
Done with runcon
Done with seq
Done with setuidgid
Done with shred
Done with sh

### Plots by coverage

In [99]:
coverage_df = combined_df.drop(columns="DataFrame")
keys = coverage_df["key"].unique()
fig, axes = plt.subplots(nrows=len(keys), ncols=1, figsize=(10, 5*len(keys)), dpi=300)
fig.suptitle(f"Empirical Cumulative Distribution Function (ECDF)", fontsize=20, y=0.99)
for time in natsorted(coverage_df["time"].unique()):
    filtered_by_time_df = coverage_df[coverage_df["time"] == time]
    filtered_by_time_df = filtered_by_time_df.drop(columns="time")
    filtered_by_time_df = filtered_by_time_df.groupby(["util", "key"]).mean()
    filtered_by_time_df = filtered_by_time_df.reset_index()
    for key_i, key in enumerate(np.sort(keys)):
        filtered_by_key_df = filtered_by_time_df[filtered_by_time_df['key'] == key]
        axes[key_i].set_title(f"{key} for {time}")
        sns.ecdfplot(y="value", data=filtered_by_key_df, ax=axes[key_i], label=time)
        axes[key_i].legend()
plt.tight_layout()
plt.savefig(f"plots/by-time.png")
plt.close()