In [1]:
VAMPIRE_PATH = '/home/apluska/.vampire/bin/vampire_z3_rel_static_casc2023_6749'
TPTP_PATH = '/home/apluska/TPTP-v8.2.0/'

We select problems which are at most 100_000 bytes in size, have functions with at most arity 8, and at most 16 functions of each arity. 

In [16]:
from foreduce.tptp.parser import read_file
import os
from tqdm.auto import tqdm
from itertools import zip_longest


total, success = 0, 0
num_variables, num_functions = 0, []
for dir, file in (pbar := tqdm([(dir, file) for dir in sorted(os.listdir(TPTP_PATH + 'Problems')) for file in sorted(os.listdir(TPTP_PATH + 'Problems/' + dir))])):
    current = file
    pbar.set_description(f'Selected {success}/{total} Problems, parsing {dir}/{file}')
    if not file.endswith('.p'):
        continue
    try:
        total += 1
        problem = read_file(TPTP_PATH + 'Problems/' + dir + '/' + file, include_path=TPTP_PATH, max_size=100_000)
        _variables = max(len(clause.variables()) for clause in problem.clauses)
        num_variables = max(num_variables, _variables)
        _symbols = []
        for s in problem.function_symbols() | problem.predicate_symbols():
            if s.arity > 8:
                break
            if len(_symbols) <= s.arity:
                _symbols += [0 for _ in range(s.arity + 1 - len(_symbols))]
            _symbols[s.arity] += 1
        else:
            if any(count > 16 for count in _symbols):
                continue
            num_functions = [max(a, b) for a, b in zip_longest(num_functions, _symbols, fillvalue=0)]
            success += 1
            os.makedirs('./problems/' + dir, exist_ok=True)
            with open('./problems/' + dir + '/' + file, 'w') as f:
                f.write(problem.to_tptp())
    except Exception as e:
        continue

print(f'Maximum number of variables: {num_variables}')
print(f'Maximum number of functions of each arity: {num_functions}')

Selected 0/0 Problems, parsing AGT/AGT001+1.p:   0%|          | 0/25963 [00:00<?, ?it/s]

Selected 4906/25473 Problems, parsing TOP/TOP053-1.p: 100%|██████████| 25963/25963 [07:42<00:00, 56.14it/s]         


Maximum number of variables: 96
Maximum number of functions of each arity: [16, 16, 16, 16, 9, 4, 3, 4, 5]


4906/25963 have been selected. The maximum number of functions of each respective arity is [16, 16, 16, 16, 9, 4, 3, 4, 5].

Next, we generate proofs for these problems using vampire with a timeout of 1. We limit ourselves to proofs which are at most 1_000_000 characters long.

In [17]:
import subprocess

with open('problems.txt', 'r') as f:
    problems = f.read().split('\n')

total, success = 0, 0
for dir, file in (pbar := tqdm([(dir, file) for dir in sorted(os.listdir('./problems')) for file in sorted(os.listdir('./problems/' + dir))])):
    pbar.set_description(f'Succesfully proved {success}/{total} Problems, proving {dir}/{file}')
    args = [VAMPIRE_PATH, './problems/' + dir + '/' + file,  '--show_new', 'on', '-t', '1', '--avatar', 'off', '--proof', 'off']
    try:
        result = subprocess.run(args, capture_output=True, text=True, timeout=5)
    except subprocess.TimeoutExpired:
        continue
    if result.returncode == 0:
        if 'Refutation found.' in result.stdout:
            success += 1    
            os.makedirs('./proofs/' + dir, exist_ok=True)
            with open('./proofs/' + dir + '/' + file, 'w') as f:
                f.write(result.stdout)
    total += 1

total, success

Succesfully proved 0/0 Problems, proving ALG/ALG002-1.p:   0%|          | 0/4906 [00:00<?, ?it/s]

Succesfully proved 1807/4902 Problems, proving TOP/TOP022+1.p: 100%|██████████| 4906/4906 [53:41<00:00,  1.52it/s]        


(4903, 1807)

We manage to prove about half the selected problems.

Let's bring our proofs into tensor form. We fix a maximum number of 1024 steps per proof and 128 tokens per clause. We will be generating 64 data points per problem, i.e. 8MB of data.

In [21]:
#num_functions = [16, 16, 16, 16, 9, 4, 3, 4, 5]

import os
import torch
from tqdm.auto import tqdm

from foreduce.data.data import VampireProofs
from foreduce.transformer.tokenizer import TokenConfig
from foreduce.vampire.parser import read_file

config = TokenConfig(num_functions=num_functions)
dataset = VampireProofs(config=config, max_steps=1024, max_tokens=128)

datapoints_per_proof = 64

for dir, file in (pbar := tqdm([(dir, file) for dir in sorted(os.listdir('./proofs')) for file in sorted(os.listdir('./proofs/' + dir))])):
    pbar.set_description(f'Parsing proof {dir}/{file}')
    problem, tree = read_file('./proofs/' + dir + '/' + file)
    for i in range(datapoints_per_proof):
        pbar.set_description(f'Converting proof of {dir}/{file} to {i+1}/{datapoints_per_proof} datapoints')
        dataset.add_proof(problem, tree)

torch.save(dataset, './proofs.pt')

Converting proof of TOP/TOP021+1.p to 64/64 datapoints: 100%|██████████| 1807/1807 [22:18:28<00:00, 44.44s/it]          


In [23]:
#dataset = torch.load('./proofs.pt')

len(dataset)

115648

In [26]:
print(f'Size of proofs.pt: {os.path.getsize("./proofs.pt") / 1024**3:.2f} GB')

Size of proofs.pt: 56.97 GB


In [28]:
data[5]

(tensor([[  1.,  61., 100.,  ...,   0.,   0.,   0.],
         [  1.,   4.,  61.,  ...,   0.,   0.,   0.],
         [  1.,  61.,  32.,  ...,   0.,   0.,   0.],
         ...,
         [  1.,  61.,  32.,  ...,   0.,   0.,   0.],
         [  1.,  61.,  32.,  ...,   0.,   0.,   0.],
         [  1.,  61.,  32.,  ...,   0.,   0.,   0.]]),
 tensor([1., 0., 0.,  ..., 0., 0., 0.]),
 tensor([  1.,   4.,  61., 100., 100.,  23.,   3.,  61.,  32.,  32.,  32.,  32.,
          32.,  32., 100.,  32.,  32.,  32.,  32.,  32.,  32., 100.,  23.,   2.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
           0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,  

In [None]:
inverted = {v: k for k, v in mapping.items()}

input = [inverted[i] if i in inverted else f"X{i}" for i in goal.tolist()]
result = ""
for i in range(input):
    result += input[i]
    if input[i+1] not in ["(", ")"]:
        result += " "

'<START>achievable(west(m(s(s(X12())))c(s(s(s(s(X12()))))))boatonwest()east(m(X20())c(X23())))<END>'

Now it's time to extract data from the proofs. Again, we only go for proofs with less than 1_000_000 bytes.

In [None]:
from foreduce.vampire.parser import parse_string
import os
from tqdm.auto import tqdm

success, total = 0, 0
attempts = []
for dir in (pbar := tqdm(os.listdir('./proofs/'))):
    for file in os.listdir('./proofs/' + dir):
        pbar.set_description(f'Parsed {success}/{total}, curently parsing {dir}/{file}')
        total += 1
        if os.path.getsize('./proofs/' + dir + '/' + file) > 1_000_000:
            continue
        with open('./proofs/' + dir + '/' + file, 'r') as f:
            problem = f.read()
        success += 1
        attempts.append(parse_string(problem))


Parsed 29/43, curently parsing RNG/RNG011-5.p: 100%|██████████| 1/1 [00:21<00:00, 21.48s/it]


In [14]:
from torchtune.modules import RotaryPositionalEmbeddings
import torch

rotary = RotaryPositionalEmbeddings(4, base=50)
x = torch.cat([torch.zeros(2, 7, 3), torch.ones(2, 7, 1)], dim=-1)
rotary(x[:, :, [2, 3, 0, 1]].view(2, 7, 1, -1)).view(2, 7, -1)[:, :, [2, 3, 0, 1]]


tensor([[[ 0.0000,  0.0000,  0.0000,  1.0000],
         [ 0.0000,  0.0000, -0.8415,  0.5403],
         [ 0.0000,  0.0000, -0.9093, -0.4161],
         [ 0.0000,  0.0000, -0.1411, -0.9900],
         [ 0.0000,  0.0000,  0.7568, -0.6536],
         [ 0.0000,  0.0000,  0.9589,  0.2837],
         [ 0.0000,  0.0000,  0.2794,  0.9602]],

        [[ 0.0000,  0.0000,  0.0000,  1.0000],
         [ 0.0000,  0.0000, -0.8415,  0.5403],
         [ 0.0000,  0.0000, -0.9093, -0.4161],
         [ 0.0000,  0.0000, -0.1411, -0.9900],
         [ 0.0000,  0.0000,  0.7568, -0.6536],
         [ 0.0000,  0.0000,  0.9589,  0.2837],
         [ 0.0000,  0.0000,  0.2794,  0.9602]]])