In [1]:
import sys, os

MARIPOSA_PATH = "/home/yizhou7/mariposa/"

sys.path.append(MARIPOSA_PATH + "src")
os.chdir(MARIPOSA_PATH)

from debugger.debugger import DbgMode
from debugger.bench_viewer import BenchViewer
from debugger.debugger_options import DebugOptions
from debugger.strainer import DebugStatus
from benchmark_consts import *
from utils.system_utils import list_smt2_files


In [2]:
options = DebugOptions()
# remember to set this!
options.is_verus = True

round1 = BenchViewer(UNSTABLE_VERUS, options)

[93m[WARN] [proof] no proofs available [0m
[93m[WARN] [init] 8d058577cd trace slow unknown, fallback to fast_fail [0m
[93m[WARN] [proof] no proofs available [0m
[93m[WARN] [proof] no proofs available [0m


In [3]:
round1.status.print_status()

| category                  |   count | percentage   |
|---------------------------|---------|--------------|
| DebugStatus.FIX_FOUND     |      50 | 71.43 %      |
| DebugStatus.FIX_NOT_FOUND |      17 | 24.29 %      |
| DebugStatus.NO_PROOF      |       3 | 4.29 %       |
| total                     |      70 | 100.00 %     |


In [4]:
discount = [
    ("08719204aa", "4b9e624e"),
    ("81b159a977", "fbe192f3"),
    ("2556504d15", "7ab783b0"),
    ("6540edb885", "5e546c11"),
    ("82449bd2e8", "4c612f9b"),
    ("a1b4b3fd4a", "4e444e51"),
    ("55b4af82a4", "02d50371"),
    ("2556504d15", "0e424555"),
    ("4d2d4fbc20", "5a3b4208"),
    ("82449bd2e8", "905f3174"),
    ("2045867a58", "7ec12ea5"),
    ("55b4af82a4", "781157b6"),
    ("68ac429152", "75d801e3"),
]

In [5]:
def fixable_after_discount(dbg):
    report =dbg.report
    name_hash = dbg.name_hash
    fixable = False
    for edit_path in report.stabilized['edit_path']:
        edit_hash = os.path.basename(edit_path).split('.')[0]
        if (name_hash, edit_hash) in discount:
            continue
        fixable = True
    return fixable

In [6]:
from utils.analysis_utils import fmt_percent

bench_total = len(round1.status.tally)

to_cases = set()
rd1_to_fixable = set()
ff_cases = set()
rd1_ff_fixable = set()

def fmt_break_down(p, q):
    return f"{p}/{q} ({fmt_percent(p, q, 1)})"

for q in round1.status.tally:
    dbg = round1[q]
    if dbg.mode == DbgMode.TIMEOUT:
        to_cases.add(q)
        if q in round1.status[DebugStatus.FIX_FOUND] and fixable_after_discount(dbg):
            rd1_to_fixable.add(q)
    elif dbg.mode == DbgMode.FAST_FAIL:
        ff_cases.add(q)
        if q in round1.status[DebugStatus.FIX_FOUND] and fixable_after_discount(dbg):
            rd1_ff_fixable.add(q)
    else:
        print("unexpected mode", dbg.mode)
        assert False

print("TOs:", fmt_break_down(len(to_cases), bench_total))
rd1_to_no_proof = to_cases & round1.status[DebugStatus.NO_PROOF].items
rd1_to_no_fix = to_cases - rd1_to_no_proof - rd1_to_fixable
print("\t- 1-no-proof:", fmt_break_down(len(rd1_to_no_proof), len(to_cases)))
print("\t- 1-fixable:", fmt_break_down(len(rd1_to_fixable), len(to_cases)))
print("\t- 1-unfixable:", fmt_break_down(len(rd1_to_no_fix), len(to_cases)))

print("FFs:", fmt_break_down(len(ff_cases), bench_total))
rd1_ff_no_proof = ff_cases & round1.status[DebugStatus.NO_PROOF].items
rd1_ff_no_fix = ff_cases - rd1_ff_no_proof - rd1_ff_fixable
print("\t- 1-no-proof:", fmt_break_down(len(rd1_ff_no_proof), len(ff_cases)))
print("\t- 1-fixable:", fmt_break_down(len(rd1_ff_fixable), len(ff_cases)))
print("\t- 1-unfixable:", fmt_break_down(len(rd1_ff_no_fix), len(ff_cases)))

print("")
print("1-fixable:", fmt_break_down(len(rd1_to_fixable | rd1_ff_fixable), bench_total))

TOs: 39/70 (55.7%)
	- 1-no-proof: 2/39 (5.1%)
	- 1-fixable: 25/39 (64.1%)
	- 1-unfixable: 12/39 (30.8%)
FFs: 31/70 (44.3%)
	- 1-no-proof: 1/31 (3.2%)
	- 1-fixable: 22/31 (71.0%)
	- 1-unfixable: 8/31 (25.8%)

1-fixable: 47/70 (67.1%)


In [7]:
# for Verus, we bundle and re-test the fixable cases

for query in rd1_to_fixable | rd1_ff_fixable:
    report = round1[query].report
    name_hash = round1[query].name_hash

    for edit_path in report.stabilized['edit_path']:
        edit_query = os.path.basename(edit_path)
        bundle_path = f"data/projs/verus_singleton_bundle/base.z3/{name_hash}.{edit_query}"

In [8]:
for q in round1.status[DebugStatus.NO_PROOF]:
    print(q)

data/projs/vsystemsnew/base.z3/page-table-smt-impl_u__l2_refinement.4.smt2
data/projs/verismo.dedup/base.z3/arch__ptram__ptram_p2.smt2
data/projs/vsystemsnew/base.z3/mimalloc-smt-page_organization__PageOrg__impl_%4__merge_with_before_ll_inv_valid_unused.smt2


In [9]:
for q in round1.status[DebugStatus.NOT_CREATED]:
    print("./src/debugger3.py -m auto -i", round1[q].name_hash, "--create-project")

In [10]:
for q in round1.status[DebugStatus.UNFILTERED]:
    assert len(list_smt2_files(round1[q].strainer.filter_dir)) != 0
    print("./src/make_spaghet.py --verus --local -i", round1[q].strainer.filter_dir)

for q in round1.status[DebugStatus.NOT_TESTED]:
    assert len(list_smt2_files(round1[q].strainer.test_dir)) != 0
    print("./src/make_spaghet.py --verus --local -i", round1[q].strainer.test_dir)

In [11]:
from debugger.edit_info import EditAction, EditInfo

skolem_failed = []

for q in rd1_ff_no_fix:
    dbg = round1[q]
    qname = dbg.editor.choose_qanme_to_skolemize()

    if qname is None:
        skolem_failed.append(q)
        continue

    ei = EditInfo(VERUS_SKOLEM_DIR, {qname: EditAction.SKOLEMIZE})
    edit_hash = ei.get_id()
    name_hash = dbg.name_hash

    query_path = f"{VERUS_SKOLEM_DIR}/{name_hash}.{edit_hash}.smt2"

    if os.path.exists(query_path):
        continue

    if not dbg.editor.edit_by_qname(qname, EditAction.SKOLEMIZE):
        skolem_failed.append(q)
        continue

    if not dbg.editor.save(query_path):
        skolem_failed.append(q)
        continue


cce3dd987c131a5c430a27e224e9fdf1.pickle


[94m[DEBUG] [edit] proof path: dbg/7d8c4302ab/proofs/reseed.9908923975124208297.proof [0m
[94m[DEBUG] [edit] trace path: dbg/7d8c4302ab/traces/shuffle.15513482486857776214 [0m


[93m[WARN] [differ] qid constructor_accessor_axiom not found in dbg/7d8c4302ab/orig.smt2 [0m
d9a6fd7885eb3eb6c413c53316a91f44.pickle
[93m[WARN] [differ] qid constructor_accessor_axiom not found in dbg/d057fff3f5/orig.smt2 [0m
c4c1f6d166ad5b106554b653f12ed9c4.pickle


[94m[DEBUG] [edit] proof path: dbg/d057fff3f5/proofs/rename.17608056682826294385.proof [0m
[94m[DEBUG] [edit] trace path: dbg/d057fff3f5/traces/shuffle.227536422975727424 [0m
[94m[DEBUG] [edit] proof path: dbg/f6f3f962c0/proofs/shuffle.13526046991560484318.proof [0m
[94m[DEBUG] [edit] trace path: dbg/f6f3f962c0/traces/shuffle.5345052648385214803 [0m


[93m[WARN] [differ] qid constructor_accessor_axiom not found in dbg/f6f3f962c0/orig.smt2 [0m
b031de557bb34b4d198667b141063b1a.pickle


[94m[DEBUG] [edit] proof path: dbg/a896b920ca/proofs/rename.13547342595759592148.proof [0m
[94m[DEBUG] [edit] trace path: dbg/a896b920ca/traces/shuffle.3935504091054792581 [0m


[93m[WARN] [differ] qid constructor_accessor_axiom not found in dbg/a896b920ca/orig.smt2 [0m
e471625220affba6fc9c32b2df8243dd.pickle


[94m[DEBUG] [edit] proof path: dbg/8d058577cd/proofs/rename.9999379753263398590.proof [0m
[94m[DEBUG] [edit] trace path: dbg/8d058577cd/traces/shuffle.16031162538414592248 [0m


[93m[WARN] [differ] qid unknown_37 not found in dbg/8d058577cd/orig.smt2 [0m
[93m[WARN] [differ] qid unknown_45 not found in dbg/8d058577cd/orig.smt2 [0m
[93m[WARN] [differ] qid constructor_accessor_axiom not found in dbg/8d058577cd/orig.smt2 [0m
[93m[WARN] [differ] qid <null> not found in dbg/8d058577cd/orig.smt2 [0m
7f3387739f00544be2dda14c996725b3.pickle


[94m[DEBUG] [edit] proof path: dbg/be920877ca/proofs/shuffle.9555299481395785809.proof [0m
[94m[DEBUG] [edit] trace path: dbg/be920877ca/traces/shuffle.14575692651422062579 [0m


[93m[WARN] [differ] qid constructor_accessor_axiom not found in dbg/be920877ca/orig.smt2 [0m
bb177960ff0bf14e16b55d9b3b47a355.pickle
[93m[WARN] [differ] qid constructor_accessor_axiom not found in dbg/d6181053ff/orig.smt2 [0m
41884a1c910ece84f3f54ae2e3ed76a2.pickle


[94m[DEBUG] [edit] proof path: dbg/d6181053ff/proofs/shuffle.1879102912735967004.proof [0m
[94m[DEBUG] [edit] trace path: dbg/d6181053ff/traces/shuffle.15181022330680520267 [0m
[94m[DEBUG] [edit] proof path: dbg/090a2a7d67/proofs/reseed.7014106981689618503.proof [0m
[94m[DEBUG] [edit] trace path: dbg/090a2a7d67/traces/shuffle.10445631812693282071 [0m


[93m[WARN] [differ] qid constructor_accessor_axiom not found in dbg/090a2a7d67/orig.smt2 [0m
[93m[WARN] [differ] qid <null> not found in dbg/090a2a7d67/orig.smt2 [0m


In [12]:

rd2_to_targets = [round1[q].name_hash for q in rd1_to_no_fix]

options = DebugOptions()
options.is_verus = True
# it is tested under the name DOUBLETON
# overwrite the mode, otherwise AUTO -> TIMEOUT
options.mode = DbgMode.DOUBLETON

round2_to = BenchViewer(rd2_to_targets, options)
round2_to.status.print_status()


| category                  |   count | percentage   |
|---------------------------|---------|--------------|
| DebugStatus.FIX_NOT_FOUND |       8 | 66.67 %      |
| DebugStatus.FIX_FOUND     |       4 | 33.33 %      |
| total                     |      12 | 100.00 %     |


In [13]:
rd2_sk_targets = [round1[q].name_hash for q in rd1_ff_no_fix]


# this is a subset of rd2_sk_targets
skolmized = list_smt2_files(VERUS_SKOLEM_DIR)
options = DebugOptions()
options.mode = DbgMode.SKOLEM

round2_sk = BenchViewer(skolmized, options)
round2_sk.status.print_status()

| category                  |   count | percentage   |
|---------------------------|---------|--------------|
| DebugStatus.FIX_NOT_FOUND |       4 | 57.14 %      |
| DebugStatus.FIX_FOUND     |       3 | 42.86 %      |
| total                     |       7 | 100.00 %     |


In [15]:
print("TOs:", fmt_break_down(len(to_cases), bench_total))
rd1_to_no_proof = to_cases & round1.status[DebugStatus.NO_PROOF].items
rd1_to_no_fix = to_cases & round1.status[DebugStatus.FIX_NOT_FOUND].items
print("\t- 1-no-proof:", fmt_break_down(len(rd1_to_no_proof), len(to_cases)))
print("\t- 1-fixable:", fmt_break_down(len(rd1_to_fixable), len(to_cases)))
# print("\t- 1-unfixable:", print_break_down(len(rd1_to_no_fix), len(to_cases)))
rd2_to_fixable = round2_to.status[DebugStatus.FIX_FOUND].items
assert rd2_to_fixable.issubset(to_cases)
print("\t- 2-fixable:", fmt_break_down(len(rd2_to_fixable), len(to_cases)))

print("FFs:", fmt_break_down(len(ff_cases), bench_total))
rd1_ff_no_proof = ff_cases & round1.status[DebugStatus.NO_PROOF].items
rd1_ff_no_fix = ff_cases & round1.status[DebugStatus.FIX_NOT_FOUND].items
print("\t- 1-no-proof:", fmt_break_down(len(rd1_ff_no_proof), len(ff_cases)))
print("\t- 1-fixable:", fmt_break_down(len(rd1_ff_fixable), len(ff_cases)))
# print("\t- 1-unfixable:", print_break_down(len(rd1_ff_no_fix), len(ff_cases)))
rd2_ff_fixable = round2_sk.status[DebugStatus.FIX_FOUND].items

for q in rd2_ff_fixable:
    assert round1[round2_sk[q].pre_skolem_name_hash].given_query_path in rd1_ff_no_fix

print("\t- 2-fixable:", fmt_break_down( len(rd2_ff_fixable), len(ff_cases)))

print("")
print("1-fixable:", fmt_break_down(len(rd1_to_fixable | rd1_ff_fixable), bench_total))
print("2-fixable:", fmt_break_down(len(rd2_to_fixable | rd2_ff_fixable), bench_total))
print(
    "*-fixable:",
    fmt_break_down(
        len(rd1_to_fixable | rd1_ff_fixable | rd2_to_fixable | rd2_ff_fixable),
        bench_total,
    ),
)



TOs: 39/70 (55.7%)
	- 1-no-proof: 2/39 (5.1%)
	- 1-fixable: 25/39 (64.1%)
	- 2-fixable: 4/39 (10.3%)
FFs: 31/70 (44.3%)
	- 1-no-proof: 1/31 (3.2%)
	- 1-fixable: 22/31 (71.0%)
	- 2-fixable: 3/31 (9.7%)

1-fixable: 47/70 (67.1%)
2-fixable: 7/70 (10.0%)
*-fixable: 54/70 (77.1%)
