In [32]:
import lzma
import json
import pandas as pd
from pathlib import Path
import tqdm
import os
import shutil

In [None]:
with lzma.open("04_summary_data.json.xz", "rt") as f:
    summary_data = json.load(f)

In [4]:
summary_df = pd.DataFrame(summary_data)

# Instances not solved to optimality in 10 minutes
## Excluding benchmark set

In [None]:
summary_df.query('lb != ub or time > 600').sort_values('time')

Unnamed: 0,lb,ub,time,mes,instance
76,34,34,600.810217,33,grg.xml
795,34,34,631.223206,33,ocelot.xml
778,35,35,700.147820,33,malta_mips64_5kc.xml
595,34,34,705.536300,33,edb7xxx.xml
92,11,11,737.385317,8,toybox_2016-10-05_13-29-55.xml
...,...,...,...,...,...
144,21,22,3601.325571,21,busybox_2017-08-14_12-25-55.xml
57,29,34,3601.586633,29,phycore.xml
44,21,22,3602.793450,21,busybox_2017-07-15_21-14-16.xml
650,59,61,3602.813319,59,freetz.dimacs


In [12]:
nontrivial_instances_excl_benchmark_set = set(summary_df.query('lb != ub or time > 600')["instance"])

## Benchmark set

In [17]:
default_benchmark_output = Path(".").resolve().parent / "02_run_default_params" / "02_output"

In [21]:
nontrivial_instances_benchmark_set = set()
files = list(default_benchmark_output.glob("*.json.xz"))
for result_file in tqdm.tqdm(files):
    with lzma.open(result_file, "rt") as f:
        result_data = json.load(f)
    instance = result_data["instance_name"]
    lb = result_data["lb"]
    ub = result_data["ub"]
    time = result_data["events"][-1]["time"]
    if lb != ub or time > 600:
        nontrivial_instances_benchmark_set.add(instance)

100%|██████████| 275/275 [08:34<00:00,  1.87s/it]


In [23]:
nontrivial_instances = nontrivial_instances_excl_benchmark_set.union(nontrivial_instances_benchmark_set)

# Filter out huge instances

In [27]:
nontrivial_instances = {i for i in nontrivial_instances 
                        if "Automotive02" not in i}
for i in sorted(nontrivial_instances):
    print(i)

Automotive01.xml
BattleofTanks.xml
E-Shop.xml
FreeBSD-8_0_0.xml
Violet.xml
XSEngine.xml
aaed2000.xml
aeb.xml
aim711.xml
am31_sim.xml
asb.xml
asb2305.xml
assabet.xml
at91sam7sek.xml
at91sam7xek.xml
atlas_mips32_4kc.xml
axTLS.xml
brutus.xml
busybox-1_18_0.xml
busybox_2017-07-15_21-14-16.xml
busybox_2017-07-21_09-50-55.xml
busybox_2017-07-27_02-59-13.xml
busybox_2017-08-03_03-46-14.xml
busybox_2017-08-14_12-25-55.xml
calm16_ceb.xml
calm32_ceb.xml
ceb_v850.xml
cerf.xml
cerfpda.xml
cma230.xml
cma28x.xml
cme555.xml
cq7750.xml
eCos-3-0_i386pc.xml
eShopFIDE.xml
eShopSplot.xml
ea2468.xml
eb40.xml
eb40a.xml
eb42.xml
eb55.xml
ec555.xml
edb7xxx.xml
fads.xml
flexanet.xml
freetz.dimacs
frv400.xml
gps4020.xml
grg.xml
h8300h_sim.xml
h8max.xml
h8s_sim.xml
integrator_arm7.xml
ipaq.xml
iq80321.xml
jmr3904.xml
jtst.xml
linux_2_6_33_3.xml
lpcmt.xml
m5272c3.xml
mac7100evb.xml
mace1.xml
main_light.xml
malta_mips32_4kc.xml
malta_mips64_5kc.xml
mb93091.xml
mb93093.xml
mcb2100.xml
moab.xml
mpc50.xml
ocelot.xml


In [28]:
instance_names = []
for instance in nontrivial_instances:
    if instance.endswith(".xml"):
        instance = instance[:-4]
    elif instance.endswith(".dimacs"):
        instance = instance[:-7]
    else:
        raise ValueError(f"Unexpected instance name format: {instance}")
    instance_names.append(instance)

In [30]:
instance_names = list(set(instance_names))
instance_names.sort()

In [36]:
output = Path("06_nontrivial_instances")
full_instance_set = Path(".").resolve().parent.parent / "full_instances"
os.makedirs(output, exist_ok=True)
for instance_name in instance_names:
    instance_w_ext = instance_name + ".scm.json.xz"
    fis = full_instance_set / instance_w_ext
    if not fis.exists():
        raise FileNotFoundError(f"Full instance file does not exist: {fis}")
    source_file = full_instance_set / instance_w_ext
    target_file = output / instance_w_ext
    shutil.copyfile(source_file, target_file)