In [1]:
import sys
sys.path.append("../../src")

import pickle
import os
import numpy as np

from theorem_database import TheoremDatabase

from expanding import construct_proof

from tree_parser import file_contents, meta_math_database

from models_functions import get_props_features_xy

In [2]:
%%time

text = file_contents("../../data/set_mod.mm")
database = meta_math_database(text,n=4000)

included 3651825 tokens from ../../data/set_mod.mm
proposition: 4000CPU times: user 19.6 s, sys: 1.61 s, total: 21.2 s
Wall time: 22.9 s


In [3]:
!ls -lhSa ../../data/trained_models

total 96
drwx------@ 1 lucasvoliveira  staff    16K Aug 12 00:13 [34m.[m[m
drwx------@ 1 lucasvoliveira  staff    16K Aug 12 00:13 [34m..[m[m
drwx------@ 1 lucasvoliveira  staff    16K Aug 12 00:10 [34m1660273773.971621[m[m
-rwx------@ 1 lucasvoliveira  staff   6.0K Aug 11 23:14 [31m.DS_Store[m[m


In [4]:
model_name = "1660273773.971621"
model = pickle.load(open(os.path.join("../../data/trained_models", model_name, "model.pkl"), "rb"))

In [5]:
tdb = TheoremDatabase("../../data/tdb")

In [6]:
def get_prop_data(prop_label):
    
    prop = database.propositions[prop_label]
    
    prop_proof = construct_proof(prop)

    prop_dataset = {
#         'id':prop.number,
#         'prop': prop_label,
        'steps':[],
        'links':[]
    }

    #Populate step numbers
    next_step_n = 0
    for s in prop_proof.get_steps_df(): 
        next_step_n += 1
        s._step_num = next_step_n


    for s in prop_proof.get_steps_df():

        prop_dataset["steps"].append((
            s._step_num,
            s.label,
            s.raw_statement,
            s.raw_prop_statement,
            get_theorem_complexity(s.label), #Lemma complexity
            int(s.statement_depth == 0)
        ))

        for child_s in s.inputs:
            prop_dataset["links"].append((child_s._step_num, s._step_num))


    return prop_dataset

In [7]:
def get_theorem_complexity(theorem_name):
    theorem = tdb[theorem_name]
    
    if theorem == None:
        return 0
    
    if "complexity" not in theorem:
        #print(theorem["theorem"])
        
        if len(theorem["steps"]) == 0:
            theorem["complexity"] = 1
        else:
            theorem["complexity"] = sum([get_theorem_complexity(tt["theorem"]) for tt in theorem["steps"]])
            
    return theorem["complexity"]

In [12]:
prop_data = get_prop_data("dfss2")

In [13]:
prop_params_x, prop_params_y = get_props_features_xy(prop_data)

In [14]:
pred_y = model.predict_proba(prop_params_x)

In [15]:
pred_y[:,1]

array([0.7065    , 0.84541667, 0.59139286, 0.64933333, 0.8025    ,
       0.4025    , 0.41041667, 0.23125   ])

In [64]:
pred_y[:,1]

array([0.97666667, 0.98166667, 0.87      , 0.94833333, 0.72666667])

In [17]:
prop_data["steps"]

[(1,
  'bitr4i',
  '( A C_ B <-> A. x ( x e. A -> x e. B ) )',
  '( ph <-> ch )',
  10727,
  1),
 (2,
  '3bitri',
  '( A C_ B <-> A. x ( x e. A <-> ( x e. A /\\ x e. B ) ) )',
  '( ph <-> th )',
  15454,
  1),
 (3,
  'dfss',
  '( A C_ B <-> A = ( A i^i B ) )',
  '( A C_ B <-> A = ( A i^i B ) )',
  546507,
  1),
 (4,
  'eqeq2i',
  '( A = ( A i^i B ) <-> A = { x | ( x e. A /\\ x e. B ) } )',
  '( C = A <-> C = B )',
  1409025,
  1),
 (5,
  'df-in',
  '( A i^i B ) = { x | ( x e. A /\\ x e. B ) }',
  '( A i^i B ) = { x | ( x e. A /\\ x e. B ) }',
  1,
  1),
 (6,
  'abeq2',
  '( A = { x | ( x e. A /\\ x e. B ) } <-> A. x ( x e. A <-> ( x e. A /\\ x e. B ) ) )',
  '( A = { x | ph } <-> A. x ( x e. A <-> ph ) )',
  155514667,
  1),
 (7,
  'albii',
  '( A. x ( x e. A -> x e. B ) <-> A. x ( x e. A <-> ( x e. A /\\ x e. B ) ) )',
  '( A. x ph <-> A. x ps )',
  3027,
  1),
 (8,
  'pm4.71',
  '( ( x e. A -> x e. B ) <-> ( x e. A <-> ( x e. A /\\ x e. B ) ) )',
  '( ( ph -> ps ) <-> ( ph <-> ( ph /