In [None]:
import pickle
from prover.search_tree_context import InternalNode
import glob
import math

# files = glob.glob('traces/proven*')
# files = glob.glob('traces/failed*')
files = glob.glob('traces/*')

traces = []
for file in files:
    with open(file, "rb") as f:
        trace = pickle.load(f)
        traces.append(trace)

In [None]:
nodes = traces[0].nodes

In [None]:
len(traces)

In [None]:
from prover.search_tree_context import ErrorNode


def add_data(node, visits):
    data = []
    # proving_edges = [edge for edge in node.out_edges if edge.distance_to_proof() < math.inf]
    # for edge in proving_edges:
    if node.out_edges:
        for edge in node.out_edges:
            # todo could record error message for e.g. self-correcting proof approach
            if len(edge.dst) == 1 and isinstance(edge.dst[0], ErrorNode):
                data.append((node.goal, edge.tactic, edge.distance_to_proof(), visits[node.goal], edge.time, True))
            else:
                data.append((node.goal, edge.tactic, edge.distance_to_proof(), visits[node.goal], edge.time, False))
    return data


full_data = []

for trace in traces:
    nodes = trace.nodes

    updated_visit_count = {node: nodes[node].visit_count for node in nodes}

    for goal, node in nodes.items():
        for a in node.ancestors:
            updated_visit_count[a] += node.visit_count

    for node in nodes:
        full_data.extend(add_data(nodes[node], updated_visit_count))


In [None]:
len(full_data)

In [None]:
proven = [d for d in full_data if d[2] < math.inf]

In [None]:
failed = [d for d in full_data if d[2] == math.inf]

In [None]:
len(failed)

In [None]:
# large percentage of failed tactics result in an error. Can safely take these as negative/low reward examples
len([f for f in failed if f[-1]]) / len(failed)

In [None]:
len(proven) / len(full_data)

In [None]:
prov_dict = {}

for d in proven:
    if d[0] not in prov_dict:
        prov_dict[d[0]] = [(d[1], d[2], d[3], d[4])]
    else:
        prov_dict[d[0]].append((d[1], d[2], d[3], d[4]))

In [None]:
len(prov_dict)


In [None]:
prov_dict.keys()

In [None]:
# sort by largest visit_count (as proxy for difficulty) and then by distance_to_proof, then by time taken for tactic
sorted([sorted(l, key=lambda x: (x[1], x[3])) for l in list(prov_dict.values())], key= lambda l: l[0][2],reverse=True)

In [None]:
paths = [('simp [hf]', 1, 64, 0.2502266130177304),
 ('exact or.inl hf', 1, 64, 0.10240329301450402),
 ('refine or.inl hf', 1, 64, 0.07327978097600862),
 ('simp [hf, or_imp_distrib]', 1, 64, 0.23496658500516787),
 ('tauto', 1, 64, 0.13764887902652845),
 ('simp *', 1, 64, 0.25073003000579774),
 ('simp [hf, eq_comm]', 1, 64, 0.302748822956346),
 ('simp [*]', 1, 64, 0.25384806498186663),
 ('simp [hf, polynomial.degree_zero]', 1, 64, 0.2960367589839734),
 ('simp [hf, zero_dvd_iff]', 1, 64, 0.2570988420047797),
 ('simp * at *', 1, 64, 0.25957931601442397),
 ('simp [hf, or_imp_distrib, forall_const]', 1, 64, 0.3196376989944838),
 ('simp [hf, or_self]', 1, 64, 0.2654978930368088),
 ('simp [*, or_imp_distrib]', 1, 64, 0.2677176539436914)]

In [None]:
paths = sorted(paths, key=lambda x: (x[1], x[3]))

In [None]:
# very large gap in time from the quickest to the slowest tactic for the proof
paths