# Filter the granular test bed for Section 5.3
required data:
- `collection_names.txt`: Names of all instances from MIPLIB 2017
- `The Infeasible Tag.xlsx`: Table, containing all infeasible instances from MIPLIB 2017
- `filter_collection_dataframe.pickle`: Dataframe with merged output of filter_instances.py
- `filter_collection_log.txt`: Logging file from the run of filter_instances.py

In [1]:
import pandas as pd
import pickle
import copy

In [2]:
collection = []
with open(r'filter_instances/collection_names.txt', 'r') as fp:
    for line in fp:
        x = line[:-1]
        collection.append(x)
print('There are '+str(len(set(collection)))+ ' instances in MIPLIB 2017')

There are 1065 instances in MIPLIB 2017


We remove all infesible instances

In [3]:
inf_inst = pd.read_excel('filter_instances/The Infeasible Tag.xlsx')
inf = list(inf_inst.iloc[:,0])
print('There are '+ str(len(inf))+' infeasible instances')

for instance in inf:
    collection.remove(instance)

print('There are '+ str(len(collection))+ ' instances remaining in our testbed')

There are 45 infeasible instances
There are 1020 instances remaining in our testbed


Read the results of filter_instances.py

In [4]:
df = pd.read_pickle('filter_instances/filter_collection_dataframe')

Filter instances, where the heuristic was not executed within half an hour

In [5]:
did_not_run = copy.copy(collection) 

for word in df.index:
    if word in did_not_run:
        did_not_run.remove(word)

print('There are '+str(len(did_not_run))+ ' instances, where SCIP did not start our heuristic within half an hour')

# Check: Was every instance from did_not_run called in our script?
datei = open('filter_instances/filter_collection_log','r')
words = []
for zeile in datei:
    words = words + zeile.split()
for instance in did_not_run:
    if instance+'.mps' not in words:
        print(str(instance)+' was not called in our script') 

There are 208 instances, where SCIP did not start our heuristic within half an hour


In [6]:
df_temp = df[df['eq_constrs']==True]
eq_constr = list(df_temp.index)
print('Filter ',len(eq_constr),' instances with equality constraints')
df_temp = df[(df['eq_constrs']==False) & (df['ips_nonempty'] == False)]
ips_empty = list(df_temp.index)
print('Filter ',len(ips_empty),' instances with empty inner parallel set in root not (non-granular)')
df_temp = df[(df['ips_nonempty'] == 'timelimit')] # FRA was called, but did not finish in time limit
time_limit = list(df_temp.index)
print('Filter ', len(time_limit), 'instances, where our heuristic did not finish within the run time')
df_temp = df[(df['eq_constrs']==False) & (df['ips_nonempty'] == True)]
granular = list(df_temp.index)
print('------------------------------------------------------------------')
print('The granular test bed contains ',len(granular), ' instances.')

Filter  491  instances with equality constraints
Filter  192  instances with empty inner parallel set in root not (non-granular)
Filter  1 instances, where our heuristic did not finish within the run time
------------------------------------------------------------------
The granular test bed contains  128  instances.


In [7]:
print('Then length of all our lists is ' +str(len(eq_constr+ips_empty+granular+did_not_run+inf+time_limit)))
print('They contain '+str(len(set(eq_constr+ips_empty+granular+did_not_run+inf+time_limit)))+ ' unique elements')
print('The collection set had 1065 instances')

Then length of all our lists is 1065
They contain 1065 unique elements
The collection set had 1065 instances


Write the test bed to a file

In [8]:
with open(r'../testbed/granular_testbed.txt', 'w') as fp:
    for name in granular:
        fp.write("%s\n" % name)

print the test bed:

In [9]:
print('Granular testbed:\n', str(granular))

Granular testbed:
 ['a2c1s1', '30_70_45_095_100', 'markshare_5_0', 'neos-787933', 'set3-09', 'p200x1188c', 'supportcase20', 'set3-20', 'iis-hc-cov', 'sp150x300d', 'neos-983171', 'b1c1s1', 'mik-250-20-75-3', 'neos-1445743', 'n13-3', 'khb05250', 'mik-250-20-75-5', 'gen-ip021', 'opm2-z12-s8', 'opm2-z6-s1', 'seymour', 'beasleyC2', 'gsvm2rl3', 'n3700', 'ger50-17-trans-dfn-3t', 'beasleyC3', 'buildingenergy', 'mik-250-20-75-2', 'neos-4954672-berkel', 'k16x240b', 'neos-3118745-obra', 'mc11', 'ger50-17-ptp-pop-6t', 'ger50-17-trans-pop-3t', 'markshare2', 'neos-3611689-kaihu', 'set3-10', 'mc7', 'n6-3', 'gen-ip054', 'adult-regularized', 'ns4-pr6', 'neos-1367061', 'bg512142', 'ger50-17-ptp-pop-3t', 'neos-1445765', 'gen-ip002', 'neos-3611447-jijia', 'gsvm2rl9', 'p500x2988', 'sorrell3', 'neos17', 'opm2-z10-s4', 'ramos3', 'sorrell7', 'b2c1s1', 'iis-glass-cov', 'breastcancer-regularized', 'app2-2', 'ran14x18-disj-8', 'opm2-z8-s0', 'g200x740', 'scpj4scip', 'sorrell8', 'gsvm2rl12', 'n370b', 'n7-3', 'beas