In [1]:
import io
import subprocess
import re
import pickle
import random

# TPTP Formatting
fof(name,formula_type,statement)
- name is the name of the formula
- formula_type is either axiom or conjecture, for our purposes.
- Axioms are facts
- Conjectures are what we want to prove/disprove


Extra TPTP notes:
- ! is the universal quantifier
- ? is the existential quantifier
- variables are denoted with capital letters
- func(C) is a function with variable C
- => is the implication operator
- & is AND
- | is OR
- ~ is NEGATION
- <=> is EQUIVALENCE
- = is EQUALITY
- != is INEQUALITY

In [2]:
class Axiom:
    def __init__(self,name,statement):
        self.name = name
        self.statement = statement
    def __str__(self):
        return "fof("+str(self.name)+",axiom,"+str(self.statement)+")."

class Conjecture:
    def __init__(self,name,statement):
        self.name = name
        self.statement = statement
    def __str__(self):
        return "fof("+str(self.name)+",conjecture,"+str(self.statement)+")."

# CNF creation
- & is AND
- | is OR
- ~ is NOT
- CNF -> (Disjunction) & CNF
- CNF -> (Disjunction)
- Disjunction -> Literal | Disjunction
- Disjunction -> Literal
- Literal -> ~Variable
- Literal -> Variable


In [None]:
def produce_disjunctions(literal_list):
    disjunction_dict = dict()
    disjunction_dict_old = dict()
    disjunction_list = []

    for l in literal_list:
        disjunction_dict[str(l)]=l
        disjunction_dict_old[str(l)]=l

    for key in disjunction_dict_old:
        for l in literal_list:
            disjunction_dict[str(l)+'|'+str(disjunction_dict_old[key])]=str(l)+'|'+str(disjunction_dict_old[key])

    for key in disjunction_dict:
        disjunction_list.append(disjunction_dict[key])

    disjunction_dict = None
    disjunction_dict_old = None
    
    return disjunction_list

def produce_cnf(disjunction_list):
    cnf_dict_new = dict()
    cnf_list_old = []
    cnf_list_new = []

    for d in disjunction_list:
        cnf_list_old.append('('+str(d)+')')
        cnf_dict_new['('+str(d)+')']='('+str(d)+')'

    for c in cnf_list_old:
        for d in disjunction_list:
            cnf_dict_new['('+str(d)+')&'+str(c)]='('+str(d)+')&'+str(c)
    
    for key in cnf_dict_new:
        cnf_list_new.append(cnf_dict_new[key])

    cnf_list_old = None
    cnf_dict_new = None

    return cnf_list_new

#variable_list = ['a','b','c', 'd', 'e']
variable_list = ['a','b']
literal_list = []
disjunction_list = []


for var in variable_list:
    for i in range(2):
        if i % 2 == 0:
            literal_list.append(str(var))
        else:
            literal_list.append('~'+str(var))

d_list = produce_disjunctions(literal_list)
cnf_list = produce_cnf(d_list)
print(len(d_list))
print(len(cnf_list))

In [4]:
statements = list()
conclusions = list()
for e in cnf_list:
    statements.append(e)
    conclusions.append(e)

statement_list = list()
conclusion_list = list()


for i in range(len(statements)):
    statement_list.append([Axiom(str(i),statements[i]),statements[i]])

for i in range(len(conclusions)):
    conclusion_list.append([Conjecture(str(i),conclusions[i]),conclusions[i]])

theorum_list = list()
theorum_list2 = list()
for s in statement_list:
    for c in conclusion_list:
        input = str(s[0])+str(c[0])
        plain_text = str(s[1])+".>"+str(c[1])+"."
        theorum_list.append([input,plain_text])

for s1 in statement_list:
    for s2 in statement_list:
        if s1 is not s2:
            for c in conclusion_list:
                input =  str(s1[0])+str(s2[0])+str(c[0])
                plain_text = str(s1[1])+"."+str(s2[1])+".>"+str(c[1])+"."
                theorum_list2.append([input,plain_text])

In [None]:
print(len(theorum_list))
print(len(theorum_list2))

# Use E to find proofs (or non-proofs).

In [6]:
file_path = "f.tptp"
eprover_path = "/home/anmarch/source/eprover/PROVER/eprover"
found_proofs = list()
unfound_proofs = list()

for i in range(len(theorum_list)):
    with io.open(file_path,'w',encoding='utf-8') as f:
        f.write(str(theorum_list[i][0]))

    result = subprocess.run([eprover_path, "--proof-object", str(file_path)], capture_output=True)
    output = result.stdout.decode()

    if result.returncode == 0:
        #proof found
        found_proofs.append([str(theorum_list[i][0]),result,theorum_list[i][1]])

    elif result.returncode == 1:
        #proof not found
        unfound_proofs.append([str(theorum_list[i][0]),result,theorum_list[i][1]])

    else:
        #something else happened
        print(result)
        raise Exception("Something unexpected occured")

# Produce Training data.
- "Found" indicates a found proof.
- "Unfound" indicates no proof found.

In [7]:
def format_training_data(found, unfound):
    input = []
    theorum = []
    non_theorum = []
    for sample in found:
        s = sample[2]
        r = "Found"
        input.append([s,r])
        theorum.append([s,r])

    for sample in unfound:
        s = sample[2]
        r = "Unfound"
        input.append([s,r])
        non_theorum.append([s,r])
    
    return [input, theorum, non_theorum]

training_data,found,unfound = format_training_data(found_proofs,unfound_proofs)

with open('training_data.pickle','wb') as f:
    pickle.dump(training_data,f)

with open('theorum.pickle','wb') as f:
    pickle.dump(found,f)

with open('non_theorum.pickle','wb') as f:
    pickle.dump(unfound,f)

In [None]:
found_proofs2 = list()
unfound_proofs2 = list()

random.shuffle(theorum_list2)
for i in range(len(theorum_list2)//500):
    with io.open(file_path,'w',encoding='utf-8') as f:
        f.write(str(theorum_list2[i][0]))

    result = subprocess.run([eprover_path, "--proof-object", str(file_path)], capture_output=True)
    output = result.stdout.decode()

    if result.returncode == 0:
        #proof found
        found_proofs2.append([str(theorum_list2[i][0]),result,theorum_list2[i][1]])
        print(output)

    elif result.returncode == 1:
        #proof not found
        unfound_proofs2.append([str(theorum_list2[i][0]),result,theorum_list2[i][1]])

    else:
        #something else happened
        print(result)
        raise Exception("Something unexpected occured")

In [9]:
training_data,found,unfound = format_training_data(found_proofs2,unfound_proofs2)

with open('training_data_v2.pickle','wb') as f:
    pickle.dump(training_data,f)

with open('theorum_v2.pickle','wb') as f:
    pickle.dump(found,f)

with open('non_theorum_v2.pickle','wb') as f:
    pickle.dump(unfound,f)

In [None]:
with open('theorum.pickle','rb') as f:
    theorum_data = pickle.load(f)

with open('non_theorum.pickle','rb') as f:
    non_theorum_data = pickle.load(f)

print(theorum_data[-500])