In [1]:
VAMPIRE_PATH = '/home/apluska/.vampire/bin/vampire_z3_rel_static_casc2023_6749'
TPTP_PATH = '/home/apluska/TPTP-v8.2.0/'

In [2]:
from foreduce.tptp.parser import read_file as read_tptp

problem = read_tptp(TPTP_PATH + 'Problems/' + 'GRP/GRP001-1.p', include_path=TPTP_PATH, max_size=10_000)
with open('problem.p', 'w') as f:
    f.write(problem.to_tptp())

In [3]:
from foreduce.vampire.vampire import VampireAutomatic

vampire = VampireAutomatic(VAMPIRE_PATH, './problem.p')
vampire.run()

In [4]:
print(vampire.proof)

% Running in auto input_syntax mode. Trying TPTP
[SA] new: 1. product(X0,X0,identity) [input]
[SA] new: 2. goal_0 | product(a,b,c) [input]
[SA] new: 3. goal_1 | ~product(b,a,c) [input]
[SA] new: 4. product(identity,X1,X1) [input]
[SA] new: 5. product(X2,identity,X2) [input]
[SA] new: 6. product(inverse(X3),X3,identity) [input]
[SA] new: 7. product(X4,inverse(X4),identity) [input]
[SA] new: 8. product(X5,X6,multiply(X5,X6)) [input]
[SA] new: 9. X9 = X10 | ~product(X7,X8,X10) | ~product(X7,X8,X9) [input]
[SA] new: 10. product(X11,X15,X16) | ~product(X13,X14,X16) | ~product(X12,X14,X15) | ~product(X11,X12,X13) [input]
[SA] new: 11. product(X19,X20,X22) | ~product(X17,X21,X22) | ~product(X18,X20,X21) | ~product(X17,X18,X19) [input]
[SA] new: 12. ~goal_0 [input]
[SA] new: 13. ~goal_1 [input]
[SA] new: 14. identity = X0 | ~product(X1,X1,X0) [resolution 9,1]
[SA] new: 15. X2 = X3 | ~product(identity,X3,X2) [resolution 9,4]
[SA] new: 16. identity = X4 | ~product(inverse(X5),X5,X4) [resolution 

In [5]:
from foreduce.data.data import ProofTokens
from foreduce.transformer.tokenizer import TokenConfig

config = TokenConfig()
dataset = ProofTokens(config, seq_len=64)
mapping = vampire.problem.random_mapping()
dataset.add_proof(vampire.problem, vampire.tree, mapping)

In [6]:
from foreduce.transformer.embedding import FormulaEmbedding
from torch.utils.data import DataLoader

embedding = FormulaEmbedding(config, seq_len=64, dim=1024, n_layers=8, n_heads=8)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from lightning import Trainer
import wandb
from lightning.pytorch.loggers import WandbLogger
import torch

torch.set_float32_matmul_precision('medium')

wandb.init(project='foreduce')

trainer = Trainer(max_epochs=8, logger=WandbLogger(), accumulate_grad_batches=8, log_every_n_steps=1)

trainer.fit(embedding, dataloader)

wandb.finish()

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlexpk[0m. Use [1m`wandb login --relogin`[0m to force relogin


Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/apluska/miniconda3/envs/foreduce/lib/python3.12/site-packages/lightning/pytorch/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name       | Type       | Params | Mode 
--------------------------------------------------
0 | embeddings | Embedding  | 65.5 K | train
1 | layers     | ModuleList | 67.2 M | train
2 | out        | Linear     | 1.0 M  | train
---------------------------

Epoch 5:  73%|███████▎  | 465/640 [02:54<01:05,  2.66it/s, v_num=0w9e, train_loss_step=1.590, train_loss_epoch=1.160]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [None]:
embedding = embedding.cuda().eval()

In [None]:
import torch

x = torch.zeros((len(vampire.problem.clauses), 64), dtype=torch.long)

for i, clause in enumerate(vampire.problem.clauses):
    tokens = vampire.problem.clauses[i].tokenize(config, mapping)
    for j, token in enumerate(tokens):
        x[i, j] = tokens[j]

In [None]:
_x = embedding(x.cuda())
similarities = sorted([
    (
        i,
        dataset.target[sum(list(range(1, len(vampire.problem.clauses) + 1))[-i - 1:]) - 1].item(),
        torch.nn.functional.cosine_similarity(_x[i], _x[-1], dim=0).item()
    )
    for i in range(len(vampire.problem.clauses))
], key=lambda x: x[2], reverse=True)

for i, target, sim in similarities:
    print(f"{sim:.2f}", f"{target:.2f}", vampire.problem.clauses[i])

1.00 1.00 $false
0.87 0.96 goal_1
0.86 0.92 product(b, a, c)
0.84 0.84 product(identity, b, b)
0.84 0.90 c = X0 | ~product(b, a, X0)
0.83 0.81 product(c, a, b)
0.83 0.90 product(c, a, b)
0.83 0.90 product(X1, X2, c) | ~product(X3, a, X2) | ~product(X1, X3, b)
0.83 0.88 c = multiply(b, a)
0.83 0.81 product(b, identity, b)
0.83 0.84 product(b, identity, b)
0.83 0.90 product(X8, c, a) | ~product(X8, b, identity)
0.81 0.88 product(X7, c, identity) | ~product(X7, b, a)
0.81 0.81 product(b, b, identity)
0.81 0.90 ~product(X4, c, X5) | product(X6, a, X5) | ~product(X4, b, X6)
0.81 0.88 product(c, identity, X11) | ~product(b, a, X11)
0.80 0.88 product(X9, c, multiply(X10, a)) | ~product(X9, b, X10)
0.80 0.81 ~product(b, X3, X2) | product(X2, X3, b)
0.80 0.83 b = multiply(c, a)
0.78 0.77 ~product(X13, b, identity) | product(X13, b, identity)
0.78 0.79 ~product(X14, b, identity) | product(X14, b, identity)
0.78 0.77 b = multiply(b, identity)
0.78 0.77 ~product(b, X0, X1) | product(b, X0, X1)
0.7

In [None]:
import torch

goal = torch.zeros(64, dtype=torch.long)
for i, token in enumerate(vampire.problem.clauses[-1].tokenize(config, mapping)):
    goal[i] = token

In [None]:
from sortedcontainers import SortedList
import random

from foreduce.vampire.vampire import VampireInteractive

MAX_STEP = 100

with VampireInteractive(VAMPIRE_PATH, './problem.p') as interactive:
    seen = 0
    similarities = SortedList()
    premise_count = []
    
    while not interactive.finished and interactive.step_count < MAX_STEP:
        new_clauses = interactive.problem.clauses[seen:]
        if new_clauses:
            tokens = [clause.tokenize(config, mapping) for clause in new_clauses]
            x = torch.zeros((len(new_clauses), 64), dtype=torch.long)
            for i, clause in enumerate(tokens):
                for j, token in enumerate(clause[:64]):
                    x[i, j] = clause[j]
            with torch.no_grad():
                sim = torch.nn.functional.cosine_similarity(embedding(x.cuda()), embedding(goal.cuda().unsqueeze(0)), dim=-1)
            
            for i, (s, p) in enumerate(zip(sim, interactive.tree[seen:])):
                premise_count.append(1 + sum(premise_count[idx] for idx in p))
                similarities.add((s.item() / premise_count[-1]**0.5, seen + i))
                
            seen = len(interactive.problem.clauses)
        
        age_weight = 1
        next_clause = similarities.pop(-1)[1]
        interactive.step(next_clause)

In [None]:
print(interactive)

[1m 0: product(X0, X0, identity)[0m
[1m 1: goal_0 | product(a, b, c)[0m
2: goal_1 | ~product(b, a, c)
[1m 3: product(identity, X1, X1)[0m
[1m 4: product(X2, identity, X2)[0m
[1m 5: product(inverse(X3), X3, identity)[0m
6: product(X4, inverse(X4), identity)
[1m 7: product(X5, X6, multiply(X5, X6))[0m
[1m 8: X9 = X10 | ~product(X7, X8, X10) | ~product(X7, X8, X9)[0m
[1m 9: product(X11, X15, X16) | ~product(X13, X14, X16) | ~product(X12, X14, X15) | ~product(X11, X12, X13)[0m
[1m 10: product(X19, X20, X22) | ~product(X17, X21, X22) | ~product(X18, X20, X21) | ~product(X17, X18, X19)[0m
[1m 11: ~goal_0[0m
12: ~goal_1
[1m 13: ~product(X0, X1, X2) | product(X3, X1, X2) | ~product(X0, identity, X3)[0m
14: ~product(X0, identity, X1) | product(X2, inverse(X3), X1) | ~product(X0, X3, X2)
[1m 15: product(X0, X1, X2) | ~product(X3, X2, X1) | ~product(X0, X3, identity)[0m
[1m 16: product(X4, X5, identity) | ~product(X6, inverse(X7), X5) | ~product(X4, X6, X7)[0m
17: produc

In [None]:
similarities[-1]

(0.11403338114420573, 89)