In [67]:
import os
import json
from pathlib import Path
from coqstoq import EvalTheorem
from coqstoq.check import EvalResults
import random

In [15]:
if Path(os.curdir).resolve().name == "evaluation":
    os.chdir("../..")
elif Path(os.curdir).resolve().name == "coq-modeling":
    pass
else:
    raise ValueError(f"In an unexpected directory: {os.curdir}")

In [16]:
def load_results(loc: Path) -> EvalResults:
    with loc.open() as fin:
        eval_data = json.load(fin) 
        return EvalResults.from_json(eval_data)

In [71]:
HUMAN_LOC = Path("evaluations/human.json")
RANGO_LOC = Path("evaluations/rango-results.json")
TACTICIAN_LOC = Path("evaluations/tactician-results.json")
PROVERBOT_LOC = Path("evaluations/proverbot-results.json")

COQSTOQ_LOC = Path("/work/pi_brun_umass_edu/kthompson/CoqStoq")
ANALYSIS_LOC = Path("evaluations/analysis")
os.makedirs(ANALYSIS_LOC, exist_ok=True)
SUCCESSES_HUMAN_LOC = ANALYSIS_LOC / "successes-human.v"
SUCCESSES_RANGO_LOC = ANALYSIS_LOC / "successes-rango.v"
FAILURES_LOC = ANALYSIS_LOC / "failures.v"



In [18]:
HUMAN_EVAL = load_results(HUMAN_LOC)
RANGO_EVAL = load_results(RANGO_LOC)
TACTICIAN_EVAL = load_results(TACTICIAN_LOC)
PROVERBOT_EVAL = load_results(PROVERBOT_LOC)

In [19]:
def sample_successes(eval: EvalResults, sample_num: int, seed: int, timeout: int=600) -> list[int]:
    random.seed(seed)
    successes = [i for i, res in enumerate(eval.results) if res.proof is not None and res.time is not None and res.time < timeout]
    return random.sample(successes, sample_num)


def sample_failures(eval: EvalResults, sample_num: int, seed: int, timeout: int=600) -> list[int]:
    random.seed(seed)
    failures = [i for i, res in enumerate(eval.results) if res.proof is None or res.time is None or timeout <= res.time]
    return random.sample(failures, sample_num)


success_sample = sample_successes(RANGO_EVAL, 20, 0)
failure_sample = sample_failures(RANGO_EVAL, 20, 0)


In [73]:
def get_theorem_str(thm: EvalTheorem, coqstoq_loc: Path) -> str:
    path = coqstoq_loc / thm.project.workspace / thm.path
    with path.open("r") as fin:
        contents = fin.read()
    thm_lines = contents.split("\n")[thm.theorem_start_pos.line:(thm.theorem_end_pos.line + 1)]
    thm_lines[-1] = thm_lines[-1][:thm.theorem_end_pos.column]
    thm_lines[0] = thm_lines[0][thm.theorem_start_pos.column:]
    return "\n".join(thm_lines)


## Inspect a Success

In [None]:
INSPECT_IDX = 19
proof_idx = success_sample[INSPECT_IDX]
result = RANGO_EVAL.results[proof_idx]
thm = result.thm
file = thm.project.workspace.name / thm.path
line = thm.theorem_start_pos.line
assert result.proof is not None
print("Info")
print(f"Coqstoq id {proof_idx}")
print(f"File: {file}")
print(f"Line: {line}")
print("Rango Proof")
print(result.proof)
print("Human Proof")
print(HUMAN_EVAL.results[proof_idx].proof)


Info
Coqstoq id 1201
File: compcert/backend/Selectionproof.v
Line: 163
Rango Proof

Proof.
  intros. destruct f; monadInv H; auto.
Qed.
Human Proof
Proof.
  intros. monadInv H. auto.
Qed.


In [75]:

rango_strs: list[str] = []
human_strs: list[str] = []

for pidx in success_sample:
    rango_result = RANGO_EVAL.results[pidx]
    human_result = HUMAN_EVAL.results[pidx]
    file = rango_result.thm.project.workspace.name / rango_result.thm.path
    line = rango_result.thm.theorem_start_pos.line
    assert rango_result.proof is not None
    assert human_result.proof is not None
    p_thm = get_theorem_str(RANGO_EVAL.results[pidx].thm, COQSTOQ_LOC)
    rango_str = f"(** Coqstoq id {pidx}; file {file}; line {line} **)\n{p_thm}{rango_result.proof}"
    human_str = f"(** Coqstoq id {pidx}; file {file}; line {line} **)\n{p_thm}{human_result.proof}"
    rango_strs.append(rango_str)
    human_strs.append(human_str)

with SUCCESSES_RANGO_LOC.open("w") as fout:
    fout.write("\n\n".join(rango_strs))

with SUCCESSES_HUMAN_LOC.open("w") as fout:
    fout.write("\n\n".join(human_strs))
    

## Inspect a Failure

In [66]:
INSPECT_IDX = 19
proof_idx = failure_sample[INSPECT_IDX]
human_result = HUMAN_EVAL.results[proof_idx] 
thm = human_result.thm
file = thm.project.workspace.name / thm.path
line = thm.theorem_start_pos.line
assert human_result.proof is not None
print("Info")
print(f"Coqstoq id {proof_idx}")
print(f"File: {file}")
print(f"Line: {line}")
print("Human Proof")
print(human_result.proof)

Info
Coqstoq id 1710
File: compcert/cfrontend/SimplExprspec.v
Line: 377
Human Proof
Proof.
  specialize tr_rvalof_monotone. intros RVALOF.
  induction 1; intros; econstructor; unfold incl in *; eauto.
  induction 1; intros; econstructor; unfold incl in *; eauto.
Qed.


In [None]:
human_strs: list[str] = []

for pidx in failure_sample:
    human_result = HUMAN_EVAL.results[pidx]
    file = rango_result.thm.project.workspace.name / rango_result.thm.path
    line = rango_result.thm.theorem_start_pos.line
    assert rango_result.proof is not None
    assert human_result.proof is not None
    p_thm = get_theorem_str(RANGO_EVAL.results[pidx].thm, COQSTOQ_LOC)
    rango_str = f"(** Coqstoq id {pidx}; file {file}; line {line} **)\n{p_thm}{rango_result.proof}"
    human_str = f"(** Coqstoq id {pidx}; file {file}; line {line} **)\n{p_thm}{human_result.proof}"
    rango_strs.append(rango_str)
    human_strs.append(human_str)

with SUCCESSES_RANGO_LOC.open("w") as fout:
    fout.write("\n\n".join(rango_strs))

with SUCCESSES_HUMAN_LOC.open("w") as fout:
    fout.write("\n\n".join(human_strs))