In [1]:
import os
from pathlib import Path 
import json
from dataclasses import dataclass

import pandas as pd

from coqstoq.check import Result

In [2]:
if Path(os.curdir).resolve().name == "evaluation":
    os.chdir("../..")
elif Path(os.curdir).resolve().name == "coq-modeling":
    pass
else:
    raise ValueError(f"In an unexpected directory: {os.curdir}")

In [23]:

@dataclass
class NamedDF:
    name: str
    df: pd.DataFrame


def load_results(p: Path, sysname: str) -> NamedDF:
    rows = [] 
    for r_path in p.glob("**/*.json"):
        with r_path.open() as f:
            r_data = json.load(f)
            result = Result.from_json(r_data)
            rows.append({
                "project": result.thm.project.workspace.name, 
                "path": result.thm.path,
                "line": result.thm.theorem_start_pos.line,
                "success": result.proof is not None,
                "time": result.time,
            })
    df = pd.DataFrame(rows)
    return NamedDF(sysname, df.set_index(["project", "path", "line"]))

def filter_df(named_df: NamedDF, idx : pd.Index) -> NamedDF:
    return NamedDF(named_df.name, named_df.df.loc[idx])

def get_by_project_summary(named_df: NamedDF) -> pd.DataFrame:
    return named_df.df.groupby("project").agg(
        RATE=("success", "mean"), 
        SUCCESSES=("success", "sum"), 
        TOTAL=("success", "count")
    ).sort_values("TOTAL", ascending=False)


def get_totals(named_df: NamedDF) -> pd.Series:
    n_df = NamedDF(named_df.name, named_df.df.agg(
        RATE=("success", "mean"), 
        SUCCESSES=("success", "sum"), 
        TOTAL=("success", "count")
    ))
    series = n_df.df["success"]
    series.name = named_df.name
    return series

## Ablations

In [8]:
RANGO_RESULT = load_results(Path("evaluations/coqstoq-results/test-rango"), "rango")

In [29]:
RANGO_NO_LEMMA = load_results(Path("evaluations/coqstoq-results/test-abl-no-lemma"), "no-lemma")
RANGO_NO_PROOF = load_results(Path("evaluations/coqstoq-results/test-abl-no-lemma"), "no-proof")
RANGO_FIRST_STEP = load_results(Path("evaluations/coqstoq-results/test-abl-first-step"), "first-step")

ABL_RESULTS = [
    RANGO_NO_LEMMA,

    RANGO_FIRST_STEP,
]

In [30]:
assert 0 < len(ABL_RESULTS)
rango_total = get_totals(filter_df(RANGO_RESULT, ABL_RESULTS[0].df.index))
abl_totals = [get_totals(r) for r in ABL_RESULTS]
pd.concat([rango_total] + abl_totals, axis=1)

Unnamed: 0,rango,first-step,no-lemma
RATE,0.313043,0.208696,0.29
SUCCESSES,72.0,48.0,145.0
TOTAL,230.0,230.0,500.0


## Cross-Tool Comparison

In [26]:
RANGO_RESULT = load_results(Path("evaluations/coqstoq-results/test-rango"))
TACTICIAN_RESULT = load_results(Path("evaluations/coqstoq-results/test-tactician"))

In [35]:
get_by_project_summary(TACTICIAN_RESULT)

Unnamed: 0_level_0,RATE,SUCCESSES,TOTAL
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
compcert,0.203251,1238,6091
fourcolor,0.085011,114,1341
math-classes,0.25557,195,763
buchberger,0.18541,122,658
reglang,0.075472,24,318
poltac,0.625483,162,259
huffman,0.105469,27,256
zfc,0.333333,69,207
zorns-lemma,0.142857,25,175
ext-lib,0.393939,65,165


In [36]:
get_by_project_summary(RANGO_RESULT)

Unnamed: 0_level_0,RATE,SUCCESSES,TOTAL
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
compcert,0.325562,1983,6091
fourcolor,0.158091,212,1341
math-classes,0.397117,303,763
buchberger,0.273556,180,658
reglang,0.132075,42,318
poltac,0.833977,216,259
huffman,0.320312,82,256
zfc,0.362319,75,207
zorns-lemma,0.291429,51,175
ext-lib,0.636364,105,165


In [38]:
get_totals(TACTICIAN_RESULT)

Unnamed: 0,success
RATE,0.202866
SUCCESSES,2109.0
TOTAL,10396.0


In [39]:
get_totals(RANGO_RESULT)

Unnamed: 0,success
RATE,0.320412
SUCCESSES,3331.0
TOTAL,10396.0
