In [3]:
import sys, os

sys.path.append("/home/yizhou7/mariposa/src")
os.chdir("/home/yizhou7/mariposa/")

from enum import Enum
from collections import Counter

from debugger.demo_utils import Report
from debugger.debugger import Debugger, DbgMode
from debugger.bench_viewer import BenchViewer
from debugger.mutant_info import TraceFailure
from utils.analysis_utils import Categorizer, fmt_percent
from benchmark_consts import *

In [4]:
revs = BenchViewer(FIXED_POINT_TARGETS, DbgMode.SINGLETON)

In [None]:
def fixable_by_erase(r: Report):
    assert len(r.stabilized) != 0
    return "erase" in r.stabilized.action.values

def fixable_by_inst(r: Report):
    assert len(r.stabilized) != 0
    return "inst_keep" in r.stabilized.action.values or "inst_replace" in r.stabilized.action.values

to_fixes = Counter()
uk_fixes = Counter()

for k, rev in revs.items():
    r = rev.build_report()

    fixes = []

    if fixable_by_erase(r):
        fixes.append("erase")
    if fixable_by_inst(r):
        fixes.append("inst")

    if len(fixes) == 2:
        fixes.append("both")

    if k in f_modes[TraceFailure.FAST_UNKNOWN]:
        uk_fixes.update(fixes)
    elif k in f_modes[TraceFailure.TIMEOUT]:
        to_fixes.update(fixes)

to_count = len(f_modes[TraceFailure.TIMEOUT])

print("Timeouts", to_count)
print("fixed by")
for i in sorted(to_fixes, key=lambda x: to_fixes[x], reverse=True):
    print(f"\t{i}:", fmt_percent(to_fixes[i], to_count))

print("")

uk_count = len(f_modes[TraceFailure.FAST_UNKNOWN])
print("Fast Unknowns", uk_count)
print("fixed by")
for i in sorted(uk_fixes, key=lambda x: uk_fixes[x], reverse=True):
    print(f"\t{i}:", fmt_percent(uk_fixes[i], uk_count))

Timeouts 299
fixed by
	erase: 76.59%
	inst: 72.91%
	both: 49.83%

Fast Unknowns 66
fixed by
	inst: 96.97%
	erase: 31.82%
	both: 28.79%


In [9]:
import multiprocessing
from utils.cache_utils import *

def get_best_rank(rev: Evaluator):
    report = rev.build_report()
    mi = rev.get_trace_info()
    g = mi.get_trace_graph()
    ratios = rev.get_trace_graph_ratios()
    scores = dict()

    for qname, ratio in ratios.items():
        scores[qname] = g.aggregate_scores(ratio)

    scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    ranked = [x[0] for x in scores]
    best_rank = len(ranked)

    for qname in report.stabilized.qname.values:
        print(qname)
        best_rank = min(best_rank, ranked.index(qname) + 1)

    return best_rank

def get_best_ranks(mode: TraceFailure):
    jobs = []

    for query in f_modes[mode]:
        rev = revs[query]
        # print(name_hash)
        ratio_cache = rev.name_hash + ".ratios"

        if not has_cache(ratio_cache):
            print("./src/debugger3.py -i", query, "--build-ratios")
            continue

        jobs.append(rev)

    pool = multiprocessing.Pool(16)
    return pool.map(get_best_rank, jobs)

summaries = dict()

for mode in [TraceFailure.SLOW_UNKNOWN, TraceFailure.NOT_FAIL, TraceFailure.TIMEOUT, TraceFailure.FAST_UNKNOWN]:
    summaries[mode] = get_best_ranks(mode)


./src/debugger3.py -i data/projs/bench_unstable/base.z3/fs_dice--queries-ASN1.Spec.Tag-2.smt2 --build-ratios
./src/debugger3.py -i data/projs/bench_unstable/base.z3/d_fvbkv--lib-Buckets-BucketsLib.i.dfy.Impl__BucketsLib.__default.SplitBucketOnPivotsAt.smt2 --build-ratios
./src/debugger3.py -i data/projs/bench_unstable/base.z3/d_lvbkv--MapSpec-TSJ.i.dfy.Impl__TSJ.__default.Move1to2StepPreservesInv.smt2 --build-ratios
./src/debugger3.py -i data/projs/bench_unstable/base.z3/d_fvbkv--lib-Buckets-PackedStringArray.i.dfy.Impl__PackedStringArray.__default.IndexOfFirstKeyGte.smt2 --build-ratios
./src/debugger3.py -i data/projs/bench_unstable/base.z3/d_fvbkv--lib-Marshalling-GenericMarshalling.i.dfy.Impl__GenericMarshalling.__default.MarshallCase_split0.smt2 --build-ratios
./src/debugger3.py -i data/projs/bench_unstable/base.z3/d_lvbkv--lib-Base-Sequences.i.dfy.Impl__Sequences.__default.UnflattenIndexIsCorrect.smt2 --build-ratios
./src/debugger3.py -i data/projs/bench_unstable/base.z3/d_lvbkv--

In [9]:
os.chdir("/home/amarshah/mariposa/src")
from calculate_average_rank import *
os.chdir("/home/yizhou7/mariposa/")


def get_name_hash(filename):
    import hashlib
    return hashlib.sha256(filename.encode()).hexdigest()[0:10]

fast_unknown_files = [get_name_hash(f) for f in f_modes[TraceFailure.FAST_UNKNOWN]]
timeout_files = files = [get_name_hash(f) for f in f_modes[TraceFailure.TIMEOUT]]
slow_unknown_files = files = [get_name_hash(f) for f in f_modes[TraceFailure.SLOW_UNKNOWN]]
not_fail_files = files = [get_name_hash(f) for f in f_modes[TraceFailure.NOT_FAIL]]

print(fast_unknown_files)

heuristic = "proof_count"


kw_parameters = {"ranking_heuristic": heuristic}
print(files)
fast_unknown_results = calculate_rankings(kw_parameters, fast_unknown_files)




['80d7543acb', '5ab30afc32', '383f10cfb6', '16d92ddc7b', '519fdc89c1', '9735a6b051', '3818823b0a', '2f7324b043', 'b778746723', 'fa1767e6e3', 'e95adeebaf', '167389a8ef', 'c8f75a1321', 'cd6cdb377d', 'c48bc62edc', '1db53f149e', '79fffdb8cb', 'e3b4e5b780', 'e05c339447', '4bb10dc4e9', 'd87c685738', '7d2782e41a', 'b5cc583f2d', 'f9f9100cfb', '4ae002e825', 'acf6656f68', '4aecdc3251', 'd8c158b8bf', 'a70ef5a023', 'cbc1128701', 'e6d6347750', 'aa849323ae', '416446ca74', '40d72d4071', '712b931174', '462c420128', '7fedd43bf6', 'fcb903eff0', '2b4b7cd571', '4680269ef1', 'fa548b2af5', '1aa46bd795', '842038cbbb', 'de58a38d6c', 'c2671f2f38', '5954cb98c8', 'd8c62df78c', '83448eecf5', 'dd8e6c2233', '81df0e63de', '125461812b', 'fdbf242cbe', 'ed978bf127', '2a2073ceb5', '0a4b4299d2', '14aaa649c7', '6f64b72bc8', '873cd88973', 'dea7cbc5c5', '28672c8a4e', '3d63ac7eaf', 'c8ca6d3f20', '43588efb1c', '8852397c03', 'a878753336', 'f8e63dc317']
['0b497bc090', '95ece4e2c8', '1a397d663b', 'fb0072addd', 'a19f4272bf', '5b7

In [10]:
kw_parameters = {"ranking_heuristic": heuristic}
print(files)
timeout_results = calculate_rankings(kw_parameters, timeout_files)

['0b497bc090', '95ece4e2c8', '1a397d663b', 'fb0072addd', 'a19f4272bf', '5b7d18ab3e', '473fa6f229', '0334fcf203']
cache/6884529c21.report


cache/790839853f.report
cache/58a0fdb9fc.report
cache/a95607b981.report
cache/2ead327dad.report
cache/0effbdcba4.report
cache/782b28a40e.report
cache/5c1612881d.report
cache/8e8d83f571.report
cache/4dc3312a9a.report
cache/b52f25923a.report
cache/adb0c59b26.report
cache/ff70c76375.report
cache/94bc3cfd5f.report
cache/2d98098a21.report
cache/6200b46f8f.report
cache/cdfc45fd41.report
cache/fbd113c34b.report
cache/1c167aafa9.report
cache/e2b0591692.report
cache/a854e6c8ad.report
cache/061f1d0514.report
cache/1ecf533970.report
cache/6bbd2c848c.report
cache/eec60fd21e.report
cache/d8fb40a98b.report
cache/391e1b7ac0.report
cache/1752022f42.report
cache/eff0b44ea2.report
cache/f7f46a3a04.report
cache/03df204fcd.report
cache/0cedd72fe1.report
cache/2f6a6b82e9.report
cache/3ed37733c3.report
cache/92bd652a7c.report
cache/d0c54edf56.report
cache/7eea650229.report
cache/81f91a69f2.report
cache/b3ec86d7f1.report
cache/34eda2f999.report
cache/47c0fbf175.report
cache/37eddcb7ab.report
cache/4ddbe507f4

kw_parameters = {"ranking_heuristic": heuristic}
print(files)
calculate_rankings(kw_parameters, fast_unknown_files)

In [11]:
os.chdir("/home/amarshah/mariposa/src")
from calculate_average_rank import *
os.chdir("/home/yizhou7/mariposa/")

slow_unknown_files = files = [get_name_hash(f) for f in f_modes[TraceFailure.SLOW_UNKNOWN]]
not_fail_files = files = [get_name_hash(f) for f in f_modes[TraceFailure.NOT_FAIL]]

kw_parameters = {"ranking_heuristic": heuristic}
slow_unknown_results = calculate_rankings(kw_parameters, slow_unknown_files)

cache/3ce5c5e4dc.report
cache/e998d08b31.report
[1, 12]
Total:  2  files
Mean:  6
Median:  6.5
1 would fix on first try
1 would fix in 3 or fewer tries
1 would fix in 10 or fewer tries


In [12]:
kw_parameters = {"ranking_heuristic": heuristic}
print(files)
not_fail_results = calculate_rankings(kw_parameters, not_fail_files)

['0b497bc090', '95ece4e2c8', '1a397d663b', 'fb0072addd', 'a19f4272bf', '5b7d18ab3e', '473fa6f229', '0334fcf203']
cache/0b497bc090.report
cache/95ece4e2c8.report
cache/1a397d663b.report
cache/fb0072addd.report
cache/a19f4272bf.report
cache/5b7d18ab3e.report
cache/473fa6f229.report
cache/0334fcf203.report
[1, 2, 1, 1, 2, 1, 1, 14]
Total:  8  files
Mean:  2
Median:  1.0
5 would fix on first try
7 would fix in 3 or fewer tries
7 would fix in 10 or fewer tries


In [13]:
import numpy as np

for mode in [TraceFailure.TIMEOUT, TraceFailure.FAST_UNKNOWN, TraceFailure.SLOW_UNKNOWN, TraceFailure.NOT_FAIL]:
    best_ranks = summaries[mode]
    ranks = np.array(best_ranks)
    print(mode, len(best_ranks))
    print(np.where(ranks == 1)[0].shape[0], "would fix on first try")
    print(np.where(ranks <= 3)[0].shape[0], "would fix in 3 or fewer tries")
    print(np.where(ranks <= 10)[0].shape[0], "would fix in 10 or fewer tries")
    print("")






TraceFailure.TIMEOUT 299
114 would fix on first try
173 would fix in 3 or fewer tries
238 would fix in 10 or fewer tries

TraceFailure.FAST_UNKNOWN 66
9 would fix on first try
18 would fix in 3 or fewer tries
35 would fix in 10 or fewer tries

TraceFailure.SLOW_UNKNOWN 0
0 would fix on first try
0 would fix in 3 or fewer tries
0 would fix in 10 or fewer tries

TraceFailure.NOT_FAIL 0
0 would fix on first try
0 would fix in 3 or fewer tries
0 would fix in 10 or fewer tries



In [20]:
from benchmark_consts import *


timeout_ranks = summaries[TraceFailure.TIMEOUT]
total_ranks = timeout_ranks + fast_unknown_results + slow_unknown_results + not_fail_results
ranks = np.array(total_ranks)

print("CUMULATIVE: ", len(UNSTABLE_MARIPOSA))
# print("CUMULATIVE", len(total_ranks))
# print("mean: ", mean(total_ranks))
# print("median: ", median(total_ranks))
print(np.where(ranks == 1)[0].shape[0], "would fix on first try")
print(np.where(ranks <= 3)[0].shape[0], "would fix in 3 or fewer tries")
print(np.where(ranks <= 10)[0].shape[0], "would fix in 10 or fewer tries")
print(f"We solve {(np.where(ranks <= 10)[0].shape[0] / len(UNSTABLE_MARIPOSA)) * 100}% of the queries with the top 10 quantifiers!")

CUMULATIVE:  545
139 would fix on first try
210 would fix in 3 or fewer tries
293 would fix in 10 or fewer tries
We solve 53.76146788990825% of the queries with the top 10 quantifiers!


In [2]:
print(len(['80d7543acb', '5ab30afc32', '383f10cfb6', '16d92ddc7b', '519fdc89c1', '9735a6b051', '3818823b0a', '2f7324b043', 'b778746723', 'fa1767e6e3', 'e95adeebaf', '167389a8ef', 'c8f75a1321', 'cd6cdb377d', 'c48bc62edc', '1db53f149e', '79fffdb8cb', 'e3b4e5b780', 'e05c339447', '4bb10dc4e9', 'd87c685738', '7d2782e41a', 'b5cc583f2d', 'f9f9100cfb', '4ae002e825', 'acf6656f68', '4aecdc3251', 'd8c158b8bf', 'a70ef5a023', 'cbc1128701', 'e6d6347750', 'aa849323ae', '416446ca74', '40d72d4071', '712b931174', '462c420128', '7fedd43bf6', 'fcb903eff0', '2b4b7cd571', '4680269ef1', 'fa548b2af5', '1aa46bd795', '842038cbbb', 'de58a38d6c', 'c2671f2f38', '5954cb98c8', 'd8c62df78c', '83448eecf5', 'dd8e6c2233', '81df0e63de', '125461812b', 'fdbf242cbe', 'ed978bf127', '2a2073ceb5', '0a4b4299d2', '14aaa649c7', '6f64b72bc8', '873cd88973', 'dea7cbc5c5', '28672c8a4e', '3d63ac7eaf', 'c8ca6d3f20', '43588efb1c', '8852397c03', 'a878753336', 'f8e63dc317']))

66
