In [None]:
import itertools
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import mannwhitneyu

import experiment
import extract

# Plotting setup
sns.set_style("whitegrid", {"font.family": "Arial"})

# Constants
PALETTE = {
    "Constant / FLUFFI": "#6f4e7b",
    "FAST / FLUFFI": "#c9472f",
    "Constant / Round-Robin": "#ffa056",
    "FAST / Round-Robin": "#f7c860",
    "Constant / AFLFast": "#9dd766",
    "FAST / AFLFast": "#267895",
}  # 8dddd0
Y_KEY_LABELS = {
    "paths": "# Paths Covered",
    "covered_blocks": "# Blocks Covered",
    "crashes_unique": "# Crashes Found",
}

# Load the data
df_measurements = pd.read_parquet(
    os.path.join(extract.DATA_DIR, "measurements.parquet")
)
df_measurements["cpu_seconds_round"] = df_measurements["cpu_time"].round(-3)
df_measurements["cpu_hours_round"] = df_measurements["cpu_seconds_round"] / 3600
df_covered_blocks = pd.read_parquet(
    os.path.join(extract.DATA_DIR, "covered_blocks.parquet")
)
df_paths = pd.read_parquet(os.path.join(extract.DATA_DIR, "paths.parquet"))
df_crashes = pd.read_parquet(os.path.join(extract.DATA_DIR, "crashes.parquet"))

# Get maxes in steps
def get_max(steps=1):
    dfs = {}
    for i in range(1, steps + 1):
        trial_time = (experiment.TRIAL_TIME / steps) * i
        df_lim = df_measurements.loc[df_measurements["cpu_time"] <= trial_time]
        df_lim = df_lim.loc[
            df_lim.groupby(["experiment", "benchmark", "trial"])["cpu_time"].idxmax()
        ]
        dfs[trial_time] = df_lim
    return dfs


In [None]:
y_key = "crashes_total"

for trial_time, df_max in get_max(10).items():

    # Initialization
    wins = {}
    for exp in extract.EXPERIMENTS:
        wins[exp] = 0

    # Calculate for each benchmark
    for benchmark in experiment.BENCHMARKS:
        # print(f"\n\n{benchmark}")
        df_benchmark = df_max.loc[df_max["benchmark"] == benchmark]
        for exp1, exp2 in itertools.combinations(extract.EXPERIMENTS, 2):
            x = df_benchmark.loc[df_benchmark["experiment"] == exp1][y_key]
            y = df_benchmark.loc[df_benchmark["experiment"] == exp2][y_key]
            # print(f"{exp1} - {x.mean()}, {exp2} - {y.mean()}")
            # print(mannwhitneyu(x, y))
            try:
                _, p = mannwhitneyu(x, y)
            except:
                continue
            if p < 0.05:
                if x.mean() > y.mean():
                    # print(f"{exp1} {x.mean()} over {exp2} {y.mean()}")
                    wins[exp1] += 1
                else:
                    # print(f"{exp2} {y.mean()} over {exp1} {x.mean()}")
                    wins[exp2] += 1

    # Print result
    print(trial_time // 60)
    print(dict(sorted(wins.items(), key=lambda item: item[1], reverse=True)))


In [None]:
y_key = "covered_blocks"
for benchmark in experiment.BENCHMARKS:
    df_benchmark = df_measurements.loc[(df_measurements["benchmark"] == benchmark)]
    plt.figure(figsize=(6, 4), dpi=100)
    g = sns.lineplot(
        y=y_key,
        x="cpu_hours_round",
        hue="experiment",
        palette=PALETTE,
        data=df_benchmark,
        estimator=np.median,
    )
    g.legend(title=None)
    g.set_xlim(0, 30)
    g.set_xlabel("CPU Hours")
    g.set_ylabel(Y_KEY_LABELS[y_key])
    g.set_title(benchmark)


In [None]:
y_key = "paths"
coverage_dict = {"cpu_hours": [], "experiment": [], "score": []}
rank_dict = {"cpu_hours": [], "experiment": [], "rank": []}

for trial_time, df_max in get_max(100).items():

    # Initialization
    sum_coverage = {}
    sum_ranks = {}
    for exp in extract.EXPERIMENTS:
        sum_coverage[exp] = 0
        sum_ranks[exp] = 0

    # Get median for each benchmark
    for benchmark in experiment.BENCHMARKS:
        exp_coverage = {}
        df_benchmark = df_max[df_max["benchmark"] == benchmark]
        max_coverage = df_benchmark[y_key].max()
        for exp in extract.EXPERIMENTS:
            median_coverage = df_benchmark.loc[df_benchmark["experiment"] == exp][
                y_key
            ].median()
            sum_coverage[exp] += (
                0 if max_coverage == 0 else (median_coverage / max_coverage) * 100.0
            )
            exp_coverage[exp] = median_coverage
        for rank, key in enumerate(
            sorted(exp_coverage, key=exp_coverage.get, reverse=True), 1
        ):
            sum_ranks[key] += rank

    # Calculate the scores
    coverage_score = {}
    rank_score = {}
    for exp in extract.EXPERIMENTS:
        coverage_score[exp] = sum_coverage[exp] / len(experiment.BENCHMARKS)
        rank_score[exp] = sum_ranks[exp] / len(experiment.BENCHMARKS)

    # Add to dict
    for exp, score in coverage_score.items():
        coverage_dict["cpu_hours"].append(trial_time / 3600)
        coverage_dict["experiment"].append(exp)
        coverage_dict["score"].append(score)
    for exp, rank in rank_score.items():
        rank_dict["cpu_hours"].append(trial_time / 3600)
        rank_dict["experiment"].append(exp)
        rank_dict["rank"].append(rank)

    # Sort and print results
    if trial_time == experiment.TRIAL_TIME:
        coverage_score_sorted = dict(
            sorted(coverage_score.items(), key=lambda item: item[1], reverse=True)
        )
        rank_score_sorted = dict(
            sorted(rank_score.items(), key=lambda item: item[1], reverse=True)
        )
        print(coverage_score_sorted)
        print(rank_score_sorted)

# Coverage line plot
df_coverage = pd.DataFrame(coverage_dict)
plt.figure(figsize=(6, 4), dpi=100)
g = sns.lineplot(
    y="score",
    x="cpu_hours",
    hue="experiment",
    hue_order=coverage_score_sorted.keys(),
    palette=PALETTE,
    data=df_coverage,
)
g.legend(title=None)
g.set_xlim(0, 30)
g.set_xlabel("CPU Hours")
g.set_ylabel("Average Normalized Score")

# Rank line plot
df_rank = pd.DataFrame(rank_dict)
plt.figure(figsize=(6, 4), dpi=100)
g = sns.lineplot(
    y="rank",
    x="cpu_hours",
    hue="experiment",
    hue_order=rank_score_sorted.keys(),
    palette=PALETTE,
    data=df_rank,
)
g.legend(title=None)
g.set_xlim(0, 30)
g.set_xlabel("CPU Hours")
g.set_ylabel("Average Rank")
