In [4]:
import sys, pathlib, os, pickle, ast
import numpy as np
from tqdm import tqdm
from func_timeout import func_timeout, FunctionTimedOut

# Parameters

In [None]:
# Provide your OpenAI API key here
os.environ["OPENAI_API_KEY"] = ""

In [6]:
solver_model = "gpt-4.1-mini"
instance_model = "gpt-4.1-mini"
test_model = "gpt-4.1-mini"

SOLVERS_BATCH_SIZE = 10

# Load problem data

In [7]:
problem_folder = '../problems/WSCP_time_dependent'
problem_data_file = f"{problem_folder}/problem_data.pkl"

In [12]:
project_root = pathlib.Path().resolve().parent
sys.path.insert(0, str(project_root))
from utils.generation import generate_solvers, generate_instance, generate_validity_tests, execute_code, compute_outputs, run_validity_tests

problem_path = pathlib.Path().resolve() / problem_folder
sys.path.insert(0, str(problem_path))
from feasibility import evaluate_feasibility

In [13]:
if os.path.exists(problem_data_file):
    with open(problem_data_file, 'rb') as file:
        problem_data = pickle.load(file)
else:
    problem_data = {'name': problem_folder.split('/')[-1]}
    with open(f"{problem_folder}/problem_specification/description.txt", 'r') as file:
        problem_data['problem_description'] = file.read()
    with open(f"{problem_folder}/problem_specification/input_template.txt", 'r') as file:
        problem_data['input_template'] = file.read()
    with open(f"{problem_folder}/problem_specification/output_template.txt", 'r') as file:
        problem_data['output_template'] = file.read()
    with open(f"{problem_folder}/problem_specification/minimization.txt", 'r') as file:
        problem_data['minimization'] = ast.literal_eval(file.read())

problem_data['evaluate_feasibility'] = evaluate_feasibility 

with open(problem_data_file, 'wb') as file:
    pickle.dump(problem_data, file)

# Helper functions

**generate_elements**: produces n_solvers, n_instances, and n_validity_tests solvers, instances, and tests, respectively.

**evaluate**:
1. Evaluate the ground-truth of solvers (optimal, feasible, executable, interpretable) via the validation set.
2. Compute the solution produced by every solver-instance pairs.
3. Run all tests on all interpretable solutions.

In [14]:
def generate_components(problem_data, n_solvers, n_instances, n_tests):
    print("Generating components...")
    solvers_file = f"{problem_folder}/solvers_{solver_model}.pkl"
    instances_file = f"{problem_folder}/instances_{instance_model}.pkl"
    validity_tests_file = f"{problem_folder}/validity_tests_{test_model}.pkl"
    if not os.path.exists(solvers_file):
        os.makedirs(os.path.dirname(solvers_file), exist_ok=True)

    # Load existing components
    if os.path.exists(solvers_file):
        with open(solvers_file, "rb") as file:
            solvers = pickle.load(file)
    else:
        solvers = []
        with open(solvers_file, "wb") as file:
            pickle.dump(solvers, file)

    if os.path.exists(instances_file):
        with open(instances_file, "rb") as file:
            instances = pickle.load(file)
    else:
        instances = []
        with open(instances_file, "wb") as file:
            pickle.dump(instances, file)

    if os.path.exists(validity_tests_file):
        with open(validity_tests_file, "rb") as file:
            validity_tests = pickle.load(file)
    else:
        validity_tests = []
        with open(validity_tests_file, "wb") as file:
            pickle.dump(validity_tests, file)

    # Generate solvers by batches of SOLVERS_BATCH_SIZE
    while len(solvers) < n_solvers:
        solvers = generate_solvers(
            solvers_file,
            problem_data,
            model = solver_model,
            n_solvers = min(n_solvers, len(solvers) + SOLVERS_BATCH_SIZE),
            verification = False,
            helper = False,
            provided_llm=None
        )
        with open(solvers_file, "rb") as file:
            solvers = pickle.load(file)

    # Generate instances
    instances = generate_instance(
        problem_data,
        instances_file,
        instance_model,
        n_instances
    )

    # Generate validity tests
    validity_tests = generate_validity_tests(
        problem_data,
        validity_tests_file,
        test_model,
        n_tests
    )
    print("Components loaded/generated successfully.")


def evaluate(problem_data, verbose = False):
    solvers_file = f"{problem_folder}/solvers_{solver_model}.pkl"
    instances_file = f"{problem_folder}/instances_{instance_model}.pkl"
    validity_tests_file = f"{problem_folder}/validity_tests_{test_model}.pkl"

    with open(solvers_file, "rb") as file:
        solvers = pickle.load(file)

    for solver in tqdm(solvers, desc="Evaluating solvers"):
        is_correct = True
        is_feasible = True
        executable = False
        interpretable = False
        crash = False
        for instance in problem_data["validation_set"]:
            data = instance['data']
            try:
                with np.errstate(invalid="ignore"):
                    r = func_timeout(10, execute_code, args=(solver["code"], 'solve', [data]))
                    if r[0] == 0:
                        executable = True
                        if "status" in r[1]:
                            interpretable = True
                        else:
                            is_feasible = False
                    else:
                        is_correct = False
                        is_feasible = False
                        crash = True
                    r = r[1]
                    if r["status"] == "INFEASIBLE":
                        if instance['status'] != "INFEASIBLE":
                            is_correct = False
                    else:
                        if instance['status']=="INFEASIBLE" or abs(r["objective_value"] - instance['objective_value']) / instance['objective_value'] > 1e-3:
                            is_correct = False
                        if problem_data['evaluate_feasibility'](data, r) != True:
                            is_feasible = False
                            is_correct = False
            except FunctionTimedOut:
                is_correct = False
                is_feasible = False
                break
            except Exception as e:
                is_correct = False
                is_feasible = False
                crash = True
                break

        solver["correct"] = is_correct
        solver["executable"] = executable
        solver["interpretable"] = interpretable
        solver["crash"] = crash
        solver["feasible"] = is_feasible

        with open(solvers_file, "wb") as file:
            pickle.dump(solvers, file)

    if verbose:
        n_solvers = len(solvers)
        print('-'*30)
        print("# solvers =", n_solvers)
        print("% correct =", sum(solver['correct'] for solver in solvers) / n_solvers * 100)
        print("% feasible =", sum(solver['feasible'] for solver in solvers) / n_solvers * 100)
        print("% executable =", sum(solver['executable'] for solver in solvers) / n_solvers * 100)
        print("% interpretable =", sum(solver['interpretable'] for solver in solvers) / n_solvers * 100)
        print("% crashed =", sum(solver['crash'] for solver in solvers) / n_solvers * 100)
        print('-'*30)

    solvers = compute_outputs(solvers_file, instances_file)
    solvers = run_validity_tests(solvers_file, validity_tests_file, instances_file, time_limit=1)

    return solvers

# Generate, evaluate and save components

In [10]:
n_solvers = 100
n_instances = 100
n_validity_tests = 100

generate_components(problem_data, n_solvers, n_instances, n_validity_tests)
evaluate(problem_data, verbose=True);

Generating components...
Components loaded/generated successfully.


Evaluating solvers: 100%|██████████| 100/100 [00:54<00:00,  1.84it/s]


------------------------------
# solvers = 100
% correct = 3.0
% feasible = 12.0
% executable = 95.0
% interpretable = 95.0
% crashed = 5.0
------------------------------


Computing outputs of solver-instances pairs: 100%|██████████| 100/100 [00:27<00:00,  3.60it/s]
Running validity tests: 100%|██████████| 100/100 [12:38<00:00,  7.59s/it]
