In [1]:
# !unzip proofs.zip -d proofs

In [2]:
# Step 1: Install elan (Lean toolchain manager)
!curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- -y

# Step 2: Update Python process PATH so subprocess.run() can find `lean`
import os
elan_bin_path = os.path.expanduser("~/.elan/bin")
os.environ["PATH"] = elan_bin_path + ":" + os.environ["PATH"]

# Verify the installation by checking the version
!lean --version

import os
import subprocess

def setup_lean_project(project_dir="/tmp/lean_project"):
    """
    Creates a Lean project, configures it to use Mathlib,
    and downloads pre-compiled library files.
    """
    print(f"--- Setting up Lean project in: {project_dir} ---")
    os.makedirs(project_dir, exist_ok=True)

    # Content for the lakefile.lean
    lakefile_content = """
    import Lake
    open Lake DSL

    package «lean_project»

    require mathlib from git
      "https://github.com/leanprover-community/mathlib4.git"

    @[default_target]
    lean_lib «lean_project»
    """
    # Write the lakefile
    with open(os.path.join(project_dir, "lakefile.lean"), "w") as f:
        f.write(lakefile_content)

    # Run `lake exe cache get` to download Mathlib's pre-compiled files
    # This is much faster than building from source.
    print("--- Downloading Mathlib cache (this may take a few minutes)... ---")
    try:
        subprocess.run(
            ["lake", "exe", "cache", "get"],
            cwd=project_dir,
            check=True,
            capture_output=True,
            text=True
        )
        print("--- Mathlib cache downloaded successfully. ---")
    except subprocess.CalledProcessError as e:
        print("❌ Error setting up Mathlib cache.")
        print(f"--- STDOUT ---\n{e.stdout}")
        print(f"--- STDERR ---\n{e.stderr}")
        raise  # Stop execution if setup fails

    return project_dir

# --- Call this function once at the start of your script ---
lean_project_path = setup_lean_project()
lean_project_path

[1minfo:[0m downloading installer
[1minfo: [mdefault toolchain set to 'stable'
Lean (version 4.24.0, x86_64-unknown-linux-gnu, commit 797c613eb9b6d4ec95db23e3e00af9ac6657f24b, Release)
--- Setting up Lean project in: /tmp/lean_project ---
--- Downloading Mathlib cache (this may take a few minutes)... ---
--- Mathlib cache downloaded successfully. ---


'/tmp/lean_project'

In [3]:
import utils
print(utils.get_proof_variants)

<function get_proof_variants at 0x719a1f8c2320>


In [4]:
import inspect
print(inspect.getsource(utils.get_proof_variants))

def get_proof_variants(s: str) -> List[str]:
    return [s] + apply_bulk_strategies(s)



In [26]:
import subprocess
import os
import re
# from concurrent.futures import ProcessPoolExecutor, as_completed
from utils import get_proof_variants
from typing import Dict
import threading
import concurrent.futures
import tempfile

LOG_PATH = os.path.expanduser("~/error.log")   # expand ~ -> /home/you/...
os.makedirs(os.path.dirname(LOG_PATH) or ".", exist_ok=True)
_log_lock = threading.Lock()

def check_lean_proof(proof_and_context: Dict, log_errors=False, max_workers=None) -> bool:
    """
    Checks a Lean‑4 proof string inside the given project using `lake`.
    If any variant succeeds, the *first* successful proof is saved to:
        corrected_proofs/<problem_id>/<proof_solver>/<attempt_id>.txt
    Returns True if a proof was saved, otherwise False.
    """
    # Verify the top‑level keys that must be present
    assert "proof" in proof_and_context, \
        "Missing 'proof' key – you need a proof string to test."
    assert "formal_statement" in proof_and_context, \
        "Missing 'formal_statement' key – you have to give the theorem statement."
    assert "project_dir" in proof_and_context, \
        "Missing 'project_dir' key – cannot locate the Lean project."
    assert "metadata" in proof_and_context, \
        "Missing 'metadata' key – you'll need context such as attempt_id."

    # Verify the required nested keys inside metadata
    assert "attempt_id" in proof_and_context["metadata"], \
        "Metadata lacks 'attempt_id' – needed to name the output file."
    assert "problem_id" in proof_and_context["metadata"], \
        "Metadata lacks 'problem_id' – needed for the directory structure."
    assert "proof_solver" in proof_and_context["metadata"], \
        "Metadata lacks 'proof_solver' – you need to know which solver produced this."

    # Unpack everything we need
    proof_string   = proof_and_context["proof"]
    statement      = proof_and_context["formal_statement"]
    project_dir    = proof_and_context["project_dir"]

    metadata       = proof_and_context["metadata"]
    attempt_id     = metadata["attempt_id"]
    problem_id     = metadata["problem_id"]
    solver_name    = metadata["proof_solver"]

    if max_workers is None:
        max_workers = max(os.cpu_count() - 1, 1)

    # Where the successful proof will be written.
    save_dir = os.path.join(
        "corrected_proofs", problem_id, solver_name
    )
    os.makedirs(save_dir, exist_ok=True)

    # Build every candidate proof.
    proof_variants = get_proof_variants(proof_string)

    # Each variant becomes a tiny Lean file: statement + proof.
    candidates = [
        f"{statement}\n{variant}" for variant in proof_variants
    ]

    def check_single_variant(idx, code):
        """Check a single proof variant and return (success, variant_index)"""
        # Use a temporary file with a unique name
        with tempfile.NamedTemporaryFile(mode='w', suffix='.lean', dir=project_dir, delete=False) as f:
            f.write(code)
            temp_path = f.name

        try:
            # Run Lean via lake.
            desired = 100_000
            command = [
                "lake", "env", "lean",
                f"-DmaxRecDepth={desired}",
                os.path.basename(temp_path)  # Use basename since we're in project_dir
            ]
            result = subprocess.run(
                command,
                cwd=project_dir,
                capture_output=True,
                text=True,
                timeout=30,
            )

            # Success = returncode 0 and no "error:" in stdout.
            if result.returncode == 0 and "error:" not in result.stdout:
                return (True, idx)
            
            if log_errors and "error:" in result.stdout:
                with _log_lock:
                    with open(LOG_PATH, "a", encoding="utf-8") as g:
                        g.writelines(result.stdout)
            
            return (False, idx)
            
        except Exception as e:
            print(f"Exception ({type(e).__name__}) for variant {idx}: {e}")
            proc = locals().get("result")
            if proc is not None:
                print("---- subprocess stdout ----")
                print(proc.stdout or "<no stdout>")
                print("---- subprocess stderr ----")
                print(proc.stderr or "<no stderr>")
            return (False, idx)
        finally:
            # Clean up the temp file
            try:
                os.remove(temp_path)
            except Exception:
                pass

    # Use ThreadPoolExecutor for parallel execution
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all variants for parallel execution
        future_to_index = {
            executor.submit(check_single_variant, idx, code): idx 
            for idx, code in enumerate(candidates)
        }

        # Process results as they complete
        for future in concurrent.futures.as_completed(future_to_index):
            success, variant_idx = future.result()
            if success:
                # Cancel remaining tasks since we found a working proof
                for f in future_to_index:
                    f.cancel()
                
                # Save the successful proof
                out_path = os.path.join(save_dir, f"{attempt_id}.txt")
                with open(out_path, "w", encoding="utf-8") as out_f:
                    out_f.write(proof_variants[variant_idx])
                
                return True

    # No variant succeeded
    return False

def check_proofs_in_parallel(proof_contexts: list[dict], parallel_workers: int = None) -> bool:
    """
    Runs multiple proof checks in parallel, numbered 1..len(proof_contexts).
    Returns True on first success.
    """
    if not proof_contexts:
        return False

    if parallel_workers is None:
        parallel_workers = 4

    with concurrent.futures.ProcessPoolExecutor(max_workers=parallel_workers) as executor:
        futures = {
            executor.submit(check_lean_proof, ctx): idx
            for idx, ctx in enumerate(proof_contexts, start=1)
        }
        for future in concurrent.futures.as_completed(futures):
            try:
                if future.result():
                    return True  # early exit on first successful attempt
            except Exception:
                pass  # optionally log
    return False


In [6]:
# # 1. Define the proof and context with the Mathlib header
# correct_proof_dict = {
#     'formal_statement': 'import Mathlib.Tactic\ntheorem two_plus_two_is_four : 2 + 2 = 4',
#     'proof': ':= by rfl',
#     'project_dir': lean_project_path,
#     'metadata': {'proof_solver': 'example_solver', 'problem_id': 'example_id', 'attempt_id': '1'}
# }


# check_lean_proof(correct_proof_dict)


# Assumptions

In [7]:
# assert that the folder structure is as expected
import os
from typing import Tuple

def check_structure(path: str):
    def check_numeric_children_consecutive(path: str, require_start_zero: bool = True) -> Tuple[int, int]:
        """
        Validate that:
        * `path` exists and is a directory with at least one child.
        * Immediate children are directories named in canonical natural-number form:
            "0", "1", "2", ... (no leading zeros except "0").
        * Their integer values form a consecutive range from min to max.
        * If require_start_zero is True, the range must start at 0.

        Returns:
            (min_value, max_value)

        Raises:
            AssertionError on any violation.
        """
        if not os.path.isdir(path):
            raise AssertionError(f"{path!r} is not a directory")
        entries = os.listdir(path)
        assert entries, f"{path!r} is empty"

        nat_canonical = re.compile(r"0|[1-9][0-9]*\Z")
        nums = []
        for name in entries:
            full = os.path.join(path, name)
            assert os.path.isdir(full), f"{full!r} is not a directory"
            assert nat_canonical.fullmatch(name), (
                f"{name!r} is not a canonical natural number ('0', '1', '2', ... without leading zeros)"
            )
            nums.append(int(name))

        min_n, max_n = min(nums), max(nums)
        if require_start_zero:
            assert min_n == 0, f"Sequence must start at 0 but starts at {min_n}"
        expected = set(range(min_n, max_n + 1))
        actual = set(nums)
        if actual != expected:
            missing = sorted(expected - actual)
            extra = sorted(actual - expected)
            msg = f"Immediate numeric directory names {sorted(entries)} do not form a consecutive range {min_n}..{max_n}"
            if missing:
                msg += f"; missing {missing}"
            if extra:
                msg += f"; unexpected {extra}"
            raise AssertionError(msg)
        return min_n, max_n
    # assert that it's a directory
    assert os.path.isdir(path), f"{path} is not a directory"

    # assert directory is not empty
    assert any(os.listdir(path)), f"{path} is empty"

    # assert only folders in the first level
    assert all(os.path.isdir(os.path.join(path, subdir)) for subdir in os.listdir(path)), f"Not all items in {path} are directories"

    # assert that all first level subdirectories are nats and ordered
    check_numeric_children_consecutive(path)

    # go into folder titled path/0 and find the set of folder names
    zero_folder = os.path.join(path, "0")
    assert os.path.isdir(zero_folder), f"{zero_folder} is not a directory"

    folder_names = {name for name in os.listdir(zero_folder) if os.path.isdir(os.path.join(zero_folder, name))}

    # assert that set of folder_names is the same across all number directories
    for subdir in os.listdir(path):
        subdir_path = os.path.join(path, subdir)
        if os.path.isdir(subdir_path) and subdir != "0":
            subdir_folder_names = {name for name in os.listdir(subdir_path) if os.path.isdir(os.path.join(subdir_path, name))}
            assert subdir_folder_names == folder_names, f"Folder names in {subdir_path} do not match those in {zero_folder}"

    # assert that all subfolders have the same file names e.g. path/0/subfolder/[1..8].txt matches path/1/
    file_set = None
    for folder in os.listdir(path):
        folder_path = os.path.join(path, folder)
        for subdir in os.listdir(folder_path):
            assert os.path.isdir(os.path.join(folder_path, subdir)), f"{subdir} is not a directory"
            file_names = {name for name in os.listdir(os.path.join(folder_path, subdir)) if os.path.isfile(os.path.join(folder_path, subdir, name))}
            if file_set is None:
                file_set = file_names
            else:
                assert file_set == file_names, f"File names in {subdir} are not consistent with other subdirectories"

# Assumes you have a folder called proofs in the current working directory with the structure enforced by the check_structure function
check_structure("proofs_miniF2F-test")


In [8]:
!pip install datasets

Defaulting to user installation because normal site-packages is not writeable


In [9]:
from datasets import load_dataset
miniF2F_test_df = load_dataset("HaimingW/miniF2F-lean4", split="test").to_pandas()

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
import os
import json
from typing import Any, Union

def _read_text_file(path: str) -> str:
    try:
        with open(path, "r", encoding="utf-8") as f:
            return f.read()
    except UnicodeDecodeError:
        with open(path, "r", encoding="latin-1") as f:
            return f.read()

def build_structure(path: str) -> Union[dict[str, Any], str, None]:
    """
    Recursively walk `path`:
      - If it's a directory, return a dict of its children.
      - If it's a .txt file, return its text content.
      - Other files return None (and are skipped by the caller).
    """
    if os.path.isdir(path):
        out: dict[str, Any] = {}
        for entry in sorted(os.listdir(path)):
            full = os.path.join(path, entry)
            if os.path.isdir(full):
                out[entry] = build_structure(full)
            elif os.path.isfile(full) and entry.lower().endswith(".txt"):
                key = os.path.splitext(entry)[0]
                out[key] = _read_text_file(full)
        return out
    elif os.path.isfile(path) and path.lower().endswith(".txt"):
        return _read_text_file(path)
    else:
        return None  # non-txt file or missing; caller can ignore

# structure = build_structure("proofs_miniF2F-test")
# structure['0']['AI-MO_Kimina-Prover-Preview-Distill-7B']

In [11]:
# miniF2F_test_df.head(5)

In [12]:
import os
import json
from typing import Dict
from tqdm.auto import tqdm  # or: from tqdm import tqdm

In [27]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Optional, Callable

def check_proofs_in_parallel_output_arr(
    proof_contexts: List[dict],
    parallel_workers: int | None = None,
    on_progress: Optional[Callable[[int], None]] = None,
    log_errors = False
) -> List[int]:
    """
    Runs multiple proof checks in parallel (threaded).
    Returns a list of 1/0 aligned with the input order.
    Calls on_progress(1) each time an attempt completes.
    """
    # assert isinstance(parallel_workers, int) and parallel_workers > 1, "parallel_workers must be > 1"
    if not proof_contexts:
        return []

    results: List[int] = [0] * len(proof_contexts)

    def _run(idx: int, ctx: dict) -> Tuple[int, int]:
        name = f"proof_{idx}"
        ok = check_lean_proof(ctx, log_errors)
        return idx, 1 if ok else 0

    with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
        futures = {executor.submit(_run, i, ctx): i for i, ctx in enumerate(proof_contexts, start=1)}
        for future in as_completed(futures):
            idx = futures[future]
            try:
                i, val = future.result()
            except Exception as exc:
                print(exc)
                i, val = idx, 0
            results[i - 1] = val
            if on_progress:
                on_progress(1)

    return results

In [14]:
# row1 = miniF2F_test_df.loc[8]

# correct_proof_dict = [{
#     'formal_statement': row1['header']+"\n"+row1['formal_statement']+"\n",
#     'proof': structure['8']['AI-MO_Kimina-Prover-Preview-Distill-7B'][str(i)],
#     'project_dir': lean_project_path,
#     'metadata': {'proof_solver': 'AI-MO_Kimina-Prover-Preview-Distill-7B', 'problem_id': '8', 'attempt_id': str(i)}
# } for i in range(1,25)]

# check_proofs_in_parallel_output_arr(correct_proof_dict)

In [15]:
# row1 = miniF2F_test_df.loc[0]

# correct_proof_dict = [{
#     'formal_statement': row1['header']+"\n"+row1['formal_statement']+"\n",
#     'proof': structure['0']['deepseek-ai_DeepSeek-Prover-V2-7B'][str(i)],
#     'project_dir': lean_project_path,
#     'metadata': {'proof_solver': 'deepseek-ai_DeepSeek-Prover-V2-7B', 'problem_id': '0', 'attempt_id': str(i)}
# } for i in range(1,25)]

# check_proofs_in_parallel_output_arr(correct_proof_dict)

In [16]:
# !rm -rf corrected_proofs
# !mkdir corrected_proofs

# Evaluation Loop

In [None]:
START_IDX = 0

In [None]:
import os
import numpy as np
from tqdm.auto import tqdm  # progress bar
from bitarray import bitarray
import json

structure_json = build_structure("proofs_miniF2F-test")

# print(structure_json)

# Derive problem/model/attempt axes from the verified folder structure
problem_ids = sorted(structure_json.keys(), key=lambda x: int(x))
problem_ids = problem_ids[START_IDX:]
first_problem = problem_ids[0]
model_names = sorted(structure_json[first_problem].keys())
# assume consistent attempt ids across models/problems (enforced by your check_structure)
attempt_ids = sorted(structure_json[first_problem][model_names[0]].keys(), key=lambda x: int(x))

num_problems = len(problem_ids)
num_models = len(model_names)
num_attempts = len(attempt_ids)

df = miniF2F_test_df

prev_success = None # will hold the result of the *last* pair

with tqdm(total=num_models * num_problems,
          desc="Evaluating (problem, model) pairs",
          unit="pair",
          leave=True,
          dynamic_ncols=True) as pbar:

    # CHANGED: Outer loop is now problems, inner loop is models
    for p_axis_idx, p_str in enumerate(problem_ids):
        p_axis_idx += START_IDX
        for m_idx, model in enumerate(model_names):
            # Show the current model & problem and the result of the previous pair
            cur_desc = f"model={model} prob={p_str}"
            if prev_success is not None:
                cur_desc += f"  prev_success={prev_success}"

            # `refresh=False` prevents an extra refresh
            pbar.set_description(cur_desc, refresh=False)

            attempts_dict = structure_json[p_str][model]
            formal_statement = miniF2F_test_df.iloc[p_axis_idx]["formal_statement"]
            header = df.iloc[p_axis_idx]["header"]

            proof_contexts = []
            for a_str in attempt_ids:
                proof_text = attempts_dict.get(a_str, "")
                proof_contexts.append({
                    "formal_statement": header+"\n"+formal_statement+"\n",
                    "proof":            proof_text,
                    "project_dir":      lean_project_path,
                    "metadata": {
                        "proof_solver": model,
                        "problem_id":   p_str,
                        "attempt_id":   a_str
                    }
                })

            # Run the checks in parallel
            outcomes = check_proofs_in_parallel_output_arr(
                proof_contexts
            )

            # Did *any* attempt succeed for this (model, problem) pair?
            this_success = any(outcomes)
            prev_success = "✔" if this_success else "✘" # store for the *next* iteration

            # Finally advance the bar by one pair
            pbar.update(1)

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=161  prev_success=✔:   1%|          | 3/249 [01:26<1:58:49, 28.98s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpu97uishe.lean']' timed out after 30 seconds


model=Goedel-LM_Goedel-Prover-SFT prob=162  prev_success=✘:   2%|▏         | 5/249 [02:22<1:56:00, 28.53s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxltjq6m2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp8osx8e9y.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpe_7_619j.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpzpqjtfun.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpo6uf7mlg.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp9kykhiav.lean']' timed out after 30 seconds


model=deepseek-ai_DeepSeek-Prover-V2-7B prob=162  prev_success=✔:   2%|▏         | 6/249 [02:53<1:58:43, 29.31s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgklknl6m.lean']' timed out after 30 seconds


model=deepseek-ai_DeepSeek-Prover-V2-7B prob=163  prev_success=✔:   4%|▎         | 9/249 [04:16<1:53:10, 28.29s/pair]     

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpyt5o605n.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp9mqgl2a8.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpm8mnm2hs.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpkt_q1gel.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpg65kdzfw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpoegkwdns.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpy26z4qmb.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=164  prev_success=✘:   4%|▍         | 11/249 [05:15<1:54:48, 28.94s/pair]           

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpodk3uy8l.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpwp0a_4nd.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpiwj6_r3m.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpph1rso_0.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpnp88w3xw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpq7c402_6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpdn5b2kfl.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=164  prev_success=✘:   5%|▍         | 12/249 [05:46<1:56:43, 29.55s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6jbkvfc5.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmrtf5k3b.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmphhmdiooc.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmppl4pnbhk.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpinjftv5b.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpqusruu25.lean']' timed out after 30 seconds


model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=165  prev_success=✘:   5%|▌         | 13/249 [06:17<1:57:56, 29.98s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgsjb6wd7.lean']' timed out after 30 seconds


model=Goedel-LM_Goedel-Prover-SFT prob=165  prev_success=✘:   6%|▌         | 14/249 [06:51<2:01:24, 31.00s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpg24o2z5_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpdun_im21.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3pjnqh4r.lean']' timed out after 30 seconds


model=deepseek-ai_DeepSeek-Prover-V2-7B prob=166  prev_success=✘:   7%|▋         | 18/249 [08:42<1:48:46, 28.25s/pair]     

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpc8wnpj2x.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmwymj5ov.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp2tr97ixz.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_hqj8d4m.lean']' timed out after 30 seconds


model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=167  prev_success=✘:   8%|▊         | 19/249 [09:12<1:51:04, 28.98s/pair]

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmplbh5f1qw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0z8g4oy6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxg00e7ti.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_kds4zn_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbf8vntst.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpaxjigczj.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpcno6j7j5.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=167  prev_success=✘:   8%|▊         | 20/249 [09:43<1:52:13, 29.41s/pair]           

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgxpl50wt.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpx2mauq1e.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpi7tsu4qv.lean']' timed out after 30 secondsException (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp9cifpz75.lean']' timed out after 30 seconds

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp2v3jgtpw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmplwxfdlbc.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3yasmans.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=167  prev_success=✘:   8%|▊         | 21/249 [10:14<1:53:32, 29.88s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmputr6zlmw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp8r8bat5o.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbnn8qxe_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxv0me7xk.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpttvt7j9b.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpz2kdm387.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpd31u0rfi.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=168  prev_success=✘:   9%|▉         | 22/249 [10:46<1:55:42, 30.58s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpr_6qy34u.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpvlqpjhjc.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmzbs7x4x.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpg59bhns0.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpqt29twxs.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpob3n9oxt.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmposdjqvzi.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=168  prev_success=✘:   9%|▉         | 23/249 [11:19<1:57:53, 31.30s/pair]           

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpcatp3jki.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpj1sktgtv.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp26_aivuv.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp723o7be0.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpa_vbinov.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmps8078u65.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_yy8tvxe.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=168  prev_success=✘:  10%|▉         | 24/249 [11:52<1:59:09, 31.77s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp8xee85ik.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpjetx_xze.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp14h0j5ra.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpybfyx3fy.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp247d51lv.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpx5j6majl.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7jcmgiqj.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=169  prev_success=✘:  10%|█         | 25/249 [12:24<1:59:08, 31.91s/pair]

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp2ab57bw6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpnx6lf012.lean']' timed out after 30 secondsException (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmps4s5v3cb.lean']' timed out after 30 seconds

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7u9ae4nl.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpjudqcc8u.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpru5b7_0m.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp114ychzp.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=169  prev_success=✘:  10%|█         | 26/249 [12:57<1:59:22, 32.12s/pair]           

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmw45y4w6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpwosy46wu.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpzbcqvmsk.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmsmqqaql.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgxhtbddw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp9tp5yp8o.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpjet_v7e2.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=169  prev_success=✘:  11%|█         | 27/249 [13:29<1:59:12, 32.22s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmptwpl70v_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp68wlfo4_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6i27ud2c.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpcq34gxo0.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1qm376ly.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpcx0blny2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmprxm8zrlw.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=170  prev_success=✘:  11%|█         | 28/249 [14:01<1:58:31, 32.18s/pair]

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpc8qjjs4x.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp83ncglq2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmppvgug0ma.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpc0bwwi2u.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_mxj3fd9.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpsip9r3ba.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1hadwpfh.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=170  prev_success=✘:  12%|█▏        | 29/249 [14:34<1:58:21, 32.28s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpqfq7goqw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxb_1x8c4.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpk5bbii6r.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmjwozgro.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpvrsqsspg.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpoucjoz7s.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpj_wg_chh.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=170  prev_success=✘:  12%|█▏        | 30/249 [15:07<1:59:07, 32.64s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpw_bxwms8.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpprg2fw6b.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmph3r02pij.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpficr7sgf.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpjgkgiie9.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpkq9q2ktp.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp8icbz2r2.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=171  prev_success=✘:  12%|█▏        | 31/249 [15:40<1:58:20, 32.57s/pair]

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpidut2e9t.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp69qnt903.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxmg7ij0y.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp8vish4_7.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpnklu3jj_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0q0gcrft.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpvx6v8puj.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=171  prev_success=✘:  13%|█▎        | 32/249 [16:12<1:58:04, 32.65s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpuoaxmzdr.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3d9s8luc.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbeeld93n.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgzz62ztl.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpuxztfiml.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpq61h4wbd.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpn_e7o2fd.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=171  prev_success=✘:  13%|█▎        | 33/249 [16:43<1:55:33, 32.10s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmps44bej2b.lean']' timed out after 30 seconds


model=deepseek-ai_DeepSeek-Prover-V2-7B prob=173  prev_success=✔:  16%|█▌        | 39/249 [19:28<1:37:14, 27.79s/pair]     

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpr3vy4imi.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpl0tbhs85.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp9fzbx0g4.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp9g0thtac.lean']' timed out after 30 seconds


model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=174  prev_success=✔:  16%|█▌        | 40/249 [19:59<1:39:48, 28.65s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpqszo4ber.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxxjcjku2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp5ry2n75h.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpd9c069kv.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpic8lo4to.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpielf9mgt.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpzt37odz5.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=174  prev_success=✔:  16%|█▋        | 41/249 [20:30<1:41:50, 29.38s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpuwzb0whp.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpt9idxfjj.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp16psgna1.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmperxc_vg_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpkssj37rp.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmprsh2008m.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpct9cy87v.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=174  prev_success=✘:  17%|█▋        | 42/249 [21:02<1:44:22, 30.25s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmppj4bdrfw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp9vje9vjy.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpzg7llcyb.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6bkf1n_5.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpjfqisme1.lean']' timed out after 30 secondsException (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp5el11wig.lean']' timed out after 30 seconds

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmps8uxypxg.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=175  prev_success=✘:  17%|█▋        | 43/249 [21:34<1:46:05, 30.90s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpae508tt_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmfxt2doz.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpn3pgnyin.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpfhu731yl.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp2jy35tqp.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpqbx152c7.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpy920ywyb.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=175  prev_success=✘:  18%|█▊        | 44/249 [22:06<1:45:54, 31.00s/pair]           

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0fyq3lm6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpkexzk5et.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6i1vkyoh.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp4e642xg5.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6zrzj959.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7_dzi540.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpsq0eqw5e.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=175  prev_success=✘:  18%|█▊        | 45/249 [22:37<1:45:54, 31.15s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpp_olmwlz.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpisr430e7.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0apgs6ym.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpcqr21zxo.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpdt4iyyo6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp75kmtv7l.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpzeciw6ys.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=176  prev_success=✘:  18%|█▊        | 46/249 [23:09<1:46:00, 31.33s/pair]

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3gv3tmhc.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpze4uu0aq.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp97ovf0et.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1kwbvydf.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpnyrr8f0e.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpr3td2ihk.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpok2wgsty.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=176  prev_success=✘:  19%|█▉        | 47/249 [23:41<1:46:25, 31.61s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1y3vwkpz.lean']' timed out after 30 seconds


model=deepseek-ai_DeepSeek-Prover-V2-7B prob=176  prev_success=✔:  19%|█▉        | 48/249 [24:11<1:44:09, 31.09s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp02819o4w.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpitfrr0ss.lean']' timed out after 30 seconds


model=Goedel-LM_Goedel-Prover-SFT prob=177  prev_success=✘:  20%|██        | 50/249 [25:11<1:41:34, 30.63s/pair]           

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpy0xtzz6_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpccf5jag2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpk_6fxlws.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp4hbsns0x.lean']' timed out after 30 secondsException (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpg94tt79u.lean']' timed out after 30 seconds

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpsytdkpf1.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0m1dtees.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=177  prev_success=✘:  20%|██        | 51/249 [25:43<1:41:29, 30.76s/pair]

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_sw0hop9.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp87phfs6z.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpf8ag_j63.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp35jez6i1.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0okp017v.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpsrwt_4it.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmprsqsuga8.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=178  prev_success=✘:  21%|██        | 52/249 [26:14<1:42:08, 31.11s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmppmethjg0.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpzi33nht3.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpa8_3h809.lean']' timed out after 30 secondsException (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpvcucoebk.lean']' timed out after 30 secondsException (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpfgg5oekt.lean']' timed out after 30 seconds


Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpky63hfnd.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp2wx132zl.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=178  prev_success=✔:  21%|██▏       | 53/249 [26:47<1:43:12, 31.60s/pair]           

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpa__8ja5i.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmprnpvojji.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpjld5jcwo.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxv251eu_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1kvugk3x.lean']' timed out after 30 seconds


model=deepseek-ai_DeepSeek-Prover-V2-7B prob=180  prev_success=✔:  24%|██▍       | 60/249 [30:06<1:29:53, 28.54s/pair]     

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpvbatwdam.lean']' timed out after 30 seconds


model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=181  prev_success=✔:  24%|██▍       | 61/249 [30:36<1:31:14, 29.12s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp35k_u98t.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_yafjhdx.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1nn052tr.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbg13wpip.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgf8pbovs.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxmu3kij4.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpr6s61zbv.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=181  prev_success=✘:  25%|██▍       | 62/249 [31:07<1:32:40, 29.74s/pair]           


Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpng21dl1n.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpuddyxvc8.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpumb9j03k.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpljbeq4wc.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp20mu68r9.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp5_9d5ib6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmph2pqemvd.lean'

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=181  prev_success=✔:  25%|██▌       | 63/249 [31:40<1:34:43, 30.56s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpr3y0ezin.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmprvj5glf9.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpb6lv9xao.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpttllxy3k.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpu0vgpf98.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6wxwrkyb.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgdmjop6f.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=182  prev_success=✘:  26%|██▌       | 64/249 [32:15<1:38:18, 31.89s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpum8oauok.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpfrlszavx.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3axjhnht.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmps2tv319w.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp94f_iovz.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpno8wevk2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp8ed_7k7u.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=182  prev_success=✘:  26%|██▌       | 65/249 [32:49<1:40:02, 32.62s/pair]           

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpyzcqvl7l.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmph5i2jcib.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpha60x8eh.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpvow9i57a.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpn8rbmfm2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3q934g6g.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp4xps7b1w.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=182  prev_success=✘:  27%|██▋       | 66/249 [33:23<1:40:38, 33.00s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpa0h9jh2l.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpp71w1d70.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6xs8xivz.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp2542foki.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpu7bgb69b.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp87c6_6sl.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0oclar3o.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=183  prev_success=✘:  27%|██▋       | 67/249 [33:57<1:40:37, 33.17s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmph6c57f08.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp4k26e0ta.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpur3464go.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpvcu1bv1l.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp8uozn5cr.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpsrktx44v.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmnpin4qd.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=184  prev_success=✘:  29%|██▊       | 71/249 [35:53<1:28:05, 29.69s/pair]           

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpsdola7w0.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpn7ynct9m.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmps18los5r.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3qbb8qa8.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpidtak3ir.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3fe86bfi.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpt0x5cti3.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=184  prev_success=✘:  29%|██▉       | 72/249 [36:24<1:29:02, 30.18s/pair]

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpytvypi_a.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpp2pr8j2a.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpser4m4fh.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpkr98snzl.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmphuahj9oi.lean']' timed out after 30 secondsException (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpm64a2ccr.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpm_103ne5.lean']'

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=185  prev_success=✘:  29%|██▉       | 73/249 [36:57<1:30:41, 30.92s/pair]

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmuarq6wf.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp78opjj7p.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpp35e7a5h.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp12_assnj.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbvzxi2fo.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgomqxmiu.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpzr4uppdb.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=185  prev_success=✘:  30%|██▉       | 74/249 [37:31<1:33:12, 31.96s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7f6i28c6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1th871ju.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3awpuk8i.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp__hbi8es.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpkp96_7nh.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp4mivyijn.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpoptn667d.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=185  prev_success=✘:  30%|███       | 75/249 [38:05<1:34:42, 32.66s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpfq88w48a.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpv2drqdgz.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp63y6n3dk.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmphpsn2wf4.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpuwmbe_tl.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmphih0t0cn.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp01o01xxk.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=186  prev_success=✘:  31%|███       | 76/249 [38:40<1:35:30, 33.13s/pair]

Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmphp4ei3_q.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpb_u8kmwp.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpjkbg0vtp.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpnapsdmoa.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpw58uwkqd.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3xc7imya.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpv6s0e0v1.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=186  prev_success=✘:  31%|███       | 77/249 [39:14<1:35:47, 33.41s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1gb824sf.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpy_dkv8jw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpy24ndlw_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7mj_ag5f.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7a02esyy.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp2twjoyu_.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpih544ww2.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=186  prev_success=✘:  31%|███▏      | 78/249 [39:48<1:36:08, 33.73s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1x6omel4.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpoexbgd96.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1fhrkx2j.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6rttx34a.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpg8zksjhe.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpancnyjvs.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpayt5cw9v.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=187  prev_success=✘:  32%|███▏      | 79/249 [40:23<1:36:22, 34.02s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp15d4ex7e.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6vzyi1z8.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpi2dsaht6.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmio1nwjd.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp2xf6uy2c.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmppdvi6ln5.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpkff3_5xc.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=187  prev_success=✘:  32%|███▏      | 80/249 [40:59<1:37:26, 34.59s/pair]           

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmps8fijvwh.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_is3xfrh.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpg9uavznz.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmphg5iblrv.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp8l5bsrjr.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7t_5tpqv.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpqbtycilb.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=187  prev_success=✘:  33%|███▎      | 81/249 [41:34<1:37:25, 34.79s/pair]

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6m4eytxh.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbcl9jh_r.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3d093y7g.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpudqg2kaq.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmps2st_v6w.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3r_hrzcq.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpf4ugzv61.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=188  prev_success=✘:  33%|███▎      | 82/249 [42:10<1:37:58, 35.20s/pair]

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpwxowxf6q.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp6g6noguq.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmyq64r2w.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpo0qt957y.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp4tygtp3p.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp81adixtb.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_901gt69.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=188  prev_success=✘:  33%|███▎      | 83/249 [42:47<1:38:20, 35.55s/pair]           

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpoitc1x4k.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpon23xsbi.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpu5dq636c.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxekmogl2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpgnbp2_ms.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_ydt5unk.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpwuzzgz6q.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=188  prev_success=✘:  34%|███▎      | 84/249 [43:24<1:39:01, 36.01s/pair]

Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmprx70kvz2.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpicyw48fc.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7iyqqstl.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp_o3ckvpe.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpt_3idnnp.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbr0dk1l1.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbylhz13x.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=189  prev_success=✘:  34%|███▍      | 85/249 [44:01<1:39:05, 36.26s/pair]

Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpw64at32y.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpt1xqrp05.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp5qhve_gd.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmbwb4667.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpjluggscx.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpxgremrjt.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0zm73jmx.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=189  prev_success=✘:  35%|███▍      | 86/249 [44:37<1:38:34, 36.29s/pair]           

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpv24m79p1.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp3osv9vbb.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7dc68wx5.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpc2vnafh8.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpd04ms992.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmppcv4ybuj.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp5d6kriiu.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=189  prev_success=✘:  35%|███▍      | 87/249 [45:13<1:37:45, 36.21s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpa_zpsbki.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpe2sz_kf4.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmponi1jisw.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpwm2uq4q7.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp1qctpkq4.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp90d_w97j.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp60h9es_3.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=190  prev_success=✘:  35%|███▌      | 88/249 [45:50<1:37:43, 36.42s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpenwyvkir.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmplneb5msv.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpfzi2cwy5.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp618qx2lb.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0o66mg5d.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpbbdknffk.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpnblcp5yq.lean']

model=Goedel-LM_Goedel-Prover-SFT prob=190  prev_success=✘:  36%|███▌      | 89/249 [46:25<1:36:17, 36.11s/pair]           

Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpmjnbfn2n.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpqtjt12go.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpsrpdl0pd.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp7j20org1.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpsr4qtwfz.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 3: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp5__g341t.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmp0qgmyrzf.lean']

model=deepseek-ai_DeepSeek-Prover-V2-7B prob=190  prev_success=✘:  36%|███▌      | 90/249 [47:01<1:35:34, 36.07s/pair]

Exception (TimeoutExpired) for variant 4: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmplx6c6d5n.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmph6u7igo7.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpyy8xw33m.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 0: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpvdw4si8e.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 1: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpe23auaqv.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmpysccij7x.lean']' timed out after 30 seconds
Exception (TimeoutExpired) for variant 2: Command '['lake', 'env', 'lean', '-DmaxRecDepth=100000', 'tmphziv8p6o.lean']

model=AI-MO_Kimina-Prover-Preview-Distill-7B prob=191  prev_success=✘:  37%|███▋      | 91/249 [47:37<1:34:34, 35.91s/pair]

In [19]:
import json
from typing import List

def results_to_nested_dict(
    results,                      # numpy array, shape: (num_problems, num_models, num_attempts)
    model_names: List[str],
    problem_ids: List[str],
) -> Dict[str, Dict[str, list]]:
    """Return {model_id: {problem_id: [0/1, ...]}}."""
    num_problems, num_models, num_attempts = results.shape
    assert len(model_names) == num_models
    assert len(problem_ids) == num_problems

    payload: Dict[str, Dict[str, list]] = {}
    for m_idx, model in enumerate(model_names):
        per_model: Dict[str, list] = {}
        for p_idx, prob in enumerate(problem_ids):
            per_model[str(prob)] = results[p_idx, m_idx, :].astype(int).tolist()
        payload[str(model)] = per_model
    return payload

def save_results_nested_json(
    results,
    model_names: List[str],
    problem_ids: List[str],
    out_path: str = "proof_outcomes_by_model.json",
) -> str:
    payload = results_to_nested_dict(results, model_names, problem_ids)
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(payload, f, indent=2, ensure_ascii=False)
    return out_path

# Example:
# save_results_nested_json(results, model_names, problem_ids, "proof_outcomes_by_model.json")


In [20]:
save_results_nested_json(results, model_names, problem_ids, "proof_outcomes_by_model.json")

NameError: name 'results' is not defined

In [None]:
def results_to_problem_nested_dict(
    results,                      # numpy array, shape: (num_problems, num_models, num_attempts)
    model_names: List[str],
    problem_ids: List[str],
) -> Dict[str, Dict[str, list]]:
    """
    Return a dict nested by problem, then model:

    {
      "<problem_id>": {
        "<model_id>": [0/1, 0/1, ...],
        ...
      },
      ...
    }
    """
    num_problems, num_models, _ = results.shape
    assert len(model_names) == num_models
    assert len(problem_ids) == num_problems

    payload: Dict[str, Dict[str, list]] = {}
    for p_idx, prob in enumerate(problem_ids):
        per_problem: Dict[str, list] = {}
        for m_idx, model in enumerate(model_names):
            per_problem[str(model)] = results[p_idx, m_idx, :].astype(int).tolist()
        payload[str(prob)] = per_problem
    return payload


def save_results_nested_by_problem_json(
    results,
    model_names: List[str],
    problem_ids: List[str],
    out_path: str = "proof_outcomes_by_problem.json",
) -> str:
    """
    Writes JSON nested by problem → model → outcomes.
    """
    payload = results_to_problem_nested_dict(results, model_names, problem_ids)
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(payload, f, indent=2, ensure_ascii=False)
    return out_path


In [None]:
save_results_nested_by_problem_json(results, model_names, problem_ids, "proof_outcomes_by_problem.json")


In [None]:
!zip -r corrected_proofs.zip corrected_proofs