In [1]:
import gurobipy as gp
import math
from matplotlib import pyplot as plt
from matplotlib.ticker import FuncFormatter, PercentFormatter
import numpy as np
import pandas as pd
import re
import os
import shutil

In [2]:
# output generation for paper 2

In [3]:
# get input paths
test_set = "miplib_2017_5000_paper2"
instance_fldr = os.path.join("instances", test_set)
test_set_fldr = os.path.join("test_sets", test_set)
results_fldr = os.path.join("results", test_set)
out_fldr = os.path.join("outputs", test_set)

# set filters
seed_idxs = [0]  
max_indices = 6
degrees = [-1, 1]  # todo update this as needed
term_list = [4, 64]
filter_cbc = False
max_base_std = 1e10
min_termination_time = 1
short, medium, long = 60, 600, 3600
remove_status_changes = False
win_threshold = .1

generators = ["None", "New", "Farkas", "All"]  # , "Disjunction", "Matrix", "Term", "Basis"]  #, "NoDisjunction", "NoMatrix", "NoTerm", "NoBasis"]

# set up some mappings
cat_map_new_lines = {
    "None": "Default",
    "Farkas": "Param Disj,\nParam Cuts",
    "Old": "Param Disj,\nCalc Cuts",
    "New": "Calc Disj,\nCalc Cuts"
}
cat_map = {
    "None": "Default",
    "Farkas": "Param Disj, Param Cuts",
    "Old": "Param Disj, Calc Cuts",
    "New": "Calc Disj, Calc Cuts"
}
perturbation_map = {
    "matrix": "Coefficient Matrix",
    "rhs": "Right Hand Side",
    "objective": "Objective"
}
label = {
    "postRootTime": "Time after Processing Root nodes",
    "rootDualBoundTimeSansVpc": "Root Processing Time (Minus VPC Generation)",
    "terminationTimeSansVpc": "Time (Minus VPC Generation)",
    "terminationTime": "Time",
    "nodes": "Nodes Processed",
    "iterations": "LP iterations",
}
unit = {
    "postRootTime": "(seconds)",
    "rootDualBoundTimeSansVpc": "(seconds)",
    "terminationTimeSansVpc": "(seconds)",
    "terminationTime": "(seconds)",
    "nodes": "(1000 nodes)",
    "iterations": "(1000 iterations)",
}
limits = {
    "postRootTime": 7200,
    "terminationTimeSansVpc": 7200,
    "terminationTime": 7200,
    "rootDualBoundTimeSansVpc": 5,
    "nodes": 10000,
    "iterations": 37500
}
bracket_bounds = {
    "short": (min_termination_time, short),
    "medium": (short, medium),
    "long": (medium, long)
}
param_map = {
    "degree": "Degree of Perturbation",
    "terms": "Number of Disjunctive Terms",
}

In [4]:
# matplotlib settings
plt.rc('text', usetex=True)  # use latex fonts
plt.rcParams['font.size'] = 18
plt.rcParams['figure.titlesize'] = 24
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16
plt.rcParams['legend.fontsize'] = 14

## Check run failures

In [5]:
# check if each folder in test_set_fldr has a corresponding .mps file in instance_fldr
# for instance in os.listdir(test_set_fldr):
#     if not os.path.isdir(os.path.join(test_set_fldr, instance)):
#         continue
#     if not os.path.exists(os.path.join(instance_fldr, f"{instance}.mps")):
#         # remove the folder if the instance is missing
#         # shutil.rmtree(os.path.join(test_set_fldr, instance))
#         print(f"Removed {instance} from test set")

In [6]:
# running list of strings contained by different error codes
# last two are catchalls
err = {
    "walltime": [],
    "bad_alloc": [],
    "out of memory": [],
    "vmem": [],
    "takeoffcuts": [],
    "solver is dual infeasible": [],
    "solver must be optimal": [],
    "segmentation fault": [],
    "no vpcs were made from a new disjunction": [],
    "must have primalbound >= root lp objective": [],
    "objective at parent nodes": [],
    "failed to optimize mip": [],
    "disjunction does not represent a full binary tree": [],
    "solver not proven optimal for nodes": [],
    "unable to open": [],
    "license": [],
    "dot product with obj differs from solver": [],
    "gurobi: error during callback: addCut": [],
    "cglvpc::setupconstraints: objective at disjunctive term": [],
    "unable to read file": [],
    "stats.id == stats_vec": [],
    "size of our disjunction is not what we expected it to be": [],
    "dimension must stay fixed": [],
    "vpcgenerator must be": [],
    "objective values must match": [],
    "objective at disjunctive term": [],
}

# read in cbc acceptable instances from cbc.txt
with open("cbc.txt", "r") as f:
    cbc_instances = f.read().split("\n")

# runs that errored out with new error code
other = []

# runs that had no errors
empty = []

# runs that only had warnings
warn_strs = ["warning", "prlp is primal infeasible", "farkas", "x:", "x[", "b:",
             "b[", "v:", "v[", "cut:", "A_i . x", "dot product with obj differs from solver"]
warning = []

# series that didn't run
no_go = []

# track sizes of instances
rows, cols, density = {}, {}, {}

# map the names
names = {}

# counts
count_series = 0
count_instances = 0
number_instances = {}

# iterate over all expected runs
for instance in os.listdir(test_set_fldr):
    if not os.path.isdir(os.path.join(test_set_fldr, instance)):
        continue
    # only look at cbc instances if we ran with cbc
    if instance not in cbc_instances and "gurobi" not in test_set and filter_cbc:
        continue
        
    # get the number of rows and columns in the instance
    mdl = gp.read(os.path.join(instance_fldr, f"{instance}.mps"))
    rows[instance] = mdl.NumConstrs
    cols[instance] = mdl.NumVars
    density[instance] = mdl.NumNZs / (mdl.NumConstrs * mdl.NumVars)
        
    for perturbation in os.listdir(os.path.join(test_set_fldr, instance)):
        if not os.path.isdir(os.path.join(test_set_fldr, instance, perturbation)):
            continue
        # only look at perturbations that were run
        p, d = perturbation.split("_")
        if int(d) not in degrees:
            continue
        for terms in term_list:
            for generator in generators:
                for seed_idx in seed_idxs:

                    # set variables for this iterations
                    count_series += 1
                    stem = f"{instance}_{perturbation}_{terms}_{generator}_{seed_idx}"
                    file_pth = os.path.join(results_fldr, f"{stem}.err")
                    series_fldr = os.path.join(test_set_fldr, instance, perturbation)
                    current_count = len([f for f in os.listdir(series_fldr) if f.endswith(".mps")])
                    count_instances += current_count
                    names[stem] = instance
                    number_instances[stem] = {
                        "expected": current_count,
                        "recorded": 0,
                        "generator": generator,
                        "error": "N/A"
                    }
    
                    # check if the series wasn't run
                    if not os.path.exists(file_pth):
                        number_instances[stem]["error"] = "no go"
                        no_go.append(stem)
                    
                    # check if the series ran with no errors or warnings
                    elif os.path.getsize(file_pth) == 0:
                        number_instances[stem]["error"] = "empty"
                        empty.append(stem)
                    
                    # track which error codes were thrown
                    else:
                        # read the file
                        with open(file_pth, "r") as f:
                            text = f.read().lower()
                        
                        # assign the error file to the appropriate list
                        found_code = False
                        for code in err:
                            if code in text:
                                if code == "dot product with obj differs from solver":
                                    pattern = r"obj viol from solver: (-?\d+\.\d+)\. calculated: (-?\d+\.\d+)"
                                    s, c = re.findall(pattern, text)[-1]
                                    # if we didn't terminate, this isn't an error, so keep going
                                    if abs(float(s) - float(c)) < 1e-3:
                                        continue
                                err[code].append(stem)
                                found_code = True
                                number_instances[stem]["error"] = code
                                break
                        if not found_code:
                            if all(not line or any(w in line for w in warn_strs) for line in text.splitlines()):
                                warning.append(stem)
                                number_instances[stem]["error"] = "warning"
                            else:
                                other.append(stem)
                                number_instances[stem]["error"] = "other"

Set parameter Username
Academic license - for non-commercial use only - expires 2025-08-21
Read MPS format model from file instances/miplib_2017_5000_paper2/bienst2.mps
Reading time = 0.00 seconds
bienst2: 576 rows, 505 columns, 2184 nonzeros
Read MPS format model from file instances/miplib_2017_5000_paper2/set3-15.mps
Reading time = 0.01 seconds
set3-15: 3747 rows, 4019 columns, 13747 nonzeros
Read MPS format model from file instances/miplib_2017_5000_paper2/f2gap801600.mps
Reading time = 0.00 seconds
f2gap801600: 80 rows, 1600 columns, 3200 nonzeros
Read MPS format model from file instances/miplib_2017_5000_paper2/stein15inf.mps
Reading time = 0.00 seconds
stein15inf: 37 rows, 15 columns, 135 nonzeros
Read MPS format model from file instances/miplib_2017_5000_paper2/neos-3610173-itata.mps
Reading time = 0.00 seconds
neos-3610173-itata: 747 rows, 844 columns, 2130 nonzeros
Read MPS format model from file instances/miplib_2017_5000_paper2/10teams.mps
Reading time = 0.00 seconds
10TEAMS

In [7]:
# check which series didn't run
print(no_go)

[]


In [8]:
# get the proportion of series that at least got started
1 - (len(no_go) / count_series)

1.0

In [9]:
# out of time - got hung up in code somewhere - ok
print(err["walltime"])
len(err["walltime"]) / count_series

['cod105_rhs_1_64_New_0', 'cod105_rhs_1_64_Farkas_0', 'cod105_rhs_1_64_All_0', 'cod105_objective_1_64_New_0', 'cod105_objective_1_64_Farkas_0', 'cod105_objective_1_64_All_0', 'cod105_matrix_1_64_New_0', 'cod105_matrix_1_64_Farkas_0', 'cod105_matrix_1_64_All_0', 'cod105_matrix_-1_64_All_0', 'cod105_objective_-1_64_New_0', 'cod105_objective_-1_64_All_0', 'cod105_bound_1_64_Farkas_0', 'cod105_bound_1_64_All_0', 'irp_bound_-1_4_New_0', 'neos-1445743_bound_1_4_New_0', 'neos-1605061_rhs_1_64_New_0', 'neos-1605061_rhs_1_64_All_0', 'neos-1605061_bound_1_64_All_0', 'neos-1445765_bound_1_4_New_0', 'neos-1445738_bound_1_64_New_0']


0.0019901440485216074

In [10]:
# out of memory - memory is maxed already - this is what it is
# todo: figure out where we ran short on memory so we can explain why we dropped them
print(err["bad_alloc"] + err["out of memory"] + err["vmem"])
len(err["bad_alloc"] + err["out of memory"] + err["vmem"]) / count_series

['f2gap801600_objective_1_64_New_0', 'f2gap801600_bound_1_64_New_0', '10teams_objective_-1_64_New_0', '10teams_bound_1_64_New_0', '10teams_bound_1_64_All_0', 'piperout-d27_objective_1_64_New_0', 'piperout-d27_objective_1_64_Farkas_0', 'piperout-d27_objective_1_64_All_0', 'piperout-d27_objective_-1_64_New_0', 'piperout-d27_objective_-1_64_Farkas_0', 'piperout-d27_objective_-1_64_All_0', 'piperout-d20_objective_1_64_New_0', 'piperout-d20_objective_1_64_All_0', 'piperout-d20_objective_-1_64_New_0', 'piperout-d20_objective_-1_64_Farkas_0', 'piperout-d20_objective_-1_64_All_0', 'piperout-d20_bound_1_4_New_0', 'piperout-d20_bound_1_64_New_0', 'piperout-d20_bound_1_64_Farkas_0', 'piperout-d20_bound_1_64_All_0', 'qnet1_matrix_1_64_New_0', 'neos-2328163-agri_objective_1_64_New_0', 'neos-2328163-agri_matrix_1_64_New_0', 'neos-2328163-agri_objective_-1_64_New_0', 'neos-2328163-agri_bound_1_64_New_0', 'mod010_objective_1_64_New_0', 'mod010_objective_1_64_Farkas_0', 'mod010_objective_1_64_All_0', '

0.027482941622441245

In [11]:
# rerun this if want to give more memory to some instances
# bad_alloc_names = set(n.split("_")[0] for n in err["bad_alloc"])
# mem = pd.read_csv("more_memory.csv", index_col=0)
# mem["reason"] = "hard solve" 
# 
# for n in bad_alloc_names:
#     if f"{n}.mps" not in mem.index:
#         new_row = pd.DataFrame([{'file_name': f"{n}.mps", 'memory': 16.0, 'reason': 'big disjunction'}]).set_index('file_name')
#         mem = pd.concat([mem, new_row])
#     else:
#         mem.loc[f'{n}.mps', 'memory'] = 16.0
# 
# mem.to_csv("more_memory.csv")

In [12]:
# this is an issue with John's bookkeeping - not much we can do here
print(err["takeoffcuts"])
len(err["takeoffcuts"]) / count_series

[]


0.0

In [13]:
print(err["solver is dual infeasible"])
len(err["solver is dual infeasible"]) / count_series

[]


0.0

In [14]:
# these are usually issues with CLP finding optimality - not much we can do here
print(err["solver must be optimal"])
len(err["solver must be optimal"]) / count_series

[]


0.0

In [15]:
print(err["segmentation fault"])
len(err["segmentation fault"]) / count_series

['neos-3665875-lesum_rhs_1_64_New_0', 'neos-3665875-lesum_matrix_1_64_All_0']


0.00018953752843062926

In [16]:
# seg_err = {
#     "Bad image at line": [],
# }
# 
# seg_other = []
# 
# for stem in err["segmentation fault"]:
#     file_pth = os.path.join(results_fldr, f"{stem}.out")
# 
#     with open(file_pth, "r") as f:
#         text = f.read()
#     
#     # assign the error file to the appropriate list
#     found_code = False
#     for code in seg_err:
#         if code in text:
#             seg_err[code].append(stem)
#             found_code = True
#             break
#     if not found_code:
#         seg_other.append(stem)

In [17]:
# print(seg_err["Bad image at line"])
# len(seg_err["Bad image at line"]) / len(err["segmentation fault"]) if err["segmentation fault"] else 0

In [18]:
# print(seg_other)
# len(seg_other)/len(err["segmentation fault"]) if err["segmentation fault"] else 0

In [19]:
# # get breakdown of why vpc generation failed - mostly from lack of provisioning
# for code, exps in seg_err.items():
#     print(f"{code}: {len(exps) / len(err['segmentation fault']) if err['segmentation fault'] else 0}")
# 
# print(f"other: {len(seg_other) / len(err['segmentation fault']) if err['segmentation fault'] else 0}")

In [20]:
# todo: check aleks' removals and drop those below for similar reasons
# todo: check size of disjunctions and decide what to do with those that are too big
# these should all be from the problem being too big and hitting the time limit or integer solutions
print(err["no vpcs were made from a new disjunction"])
missing_4_term = [n for n in err["no vpcs were made from a new disjunction"] if "_4_" in n]
missing_64_term = [n for n in err["no vpcs were made from a new disjunction"] if "_64_" in n]
print(f'4 term: {len(missing_4_term) / count_series}')
print(f'64 term: {len(missing_64_term) / count_series}')

['bienst2_rhs_1_64_New_0', 'bienst2_rhs_1_64_Farkas_0', 'bienst2_rhs_1_64_All_0', 'bienst2_objective_1_64_New_0', 'bienst2_objective_1_64_Farkas_0', 'bienst2_objective_1_64_All_0', 'bienst2_matrix_-1_64_New_0', 'bienst2_matrix_-1_64_Farkas_0', 'bienst2_matrix_-1_64_All_0', 'bienst2_objective_-1_64_New_0', 'bienst2_objective_-1_64_Farkas_0', 'bienst2_objective_-1_64_All_0', 'neos-555343_rhs_1_4_New_0', 'neos-555343_rhs_1_4_Farkas_0', 'neos-555343_rhs_1_4_All_0', 'neos-555343_rhs_1_64_New_0', 'neos-555343_rhs_1_64_Farkas_0', 'neos-555343_rhs_1_64_All_0', 'neos-555343_objective_1_4_New_0', 'neos-555343_objective_1_4_Farkas_0', 'neos-555343_objective_1_4_All_0', 'neos-555343_objective_1_64_New_0', 'neos-555343_objective_1_64_Farkas_0', 'neos-555343_objective_1_64_All_0', 'neos-555343_matrix_1_4_New_0', 'neos-555343_matrix_1_4_Farkas_0', 'neos-555343_matrix_1_4_All_0', 'neos-555343_matrix_1_64_New_0', 'neos-555343_matrix_1_64_Farkas_0', 'neos-555343_matrix_1_64_All_0', 'neos-555343_bound_1_

In [21]:
# vpc_err = {
#     "CglVPC: Finishing with exit reason: PRLP_TIME_LIMIT": [],
#     "CglVPC: Finishing with exit reason: TIME_LIMIT": [],
#     "CglVPC: Finishing with exit reason: NO_CUTS_LIKELY": [],
#     "CglVPC: Finishing with exit reason: PRLP_INFEASIBLE": [],
#     "CglVPC: Finishing with exit reason: SUCCESS": [],
#     "CglVPC: Finishing with exit reason: OPTIMAL_SOLUTION_FOUND": [],
#     "CglVPC: Finishing with exit reason: FAIL_LIMIT": [],
#     "CglVPC: Finishing with exit reason: NO_DISJUNCTION": [],
# }
# 
# vpc_other = []
# 
# for stem in err["no vpcs were made from a new disjunction"]:
#     file_pth = os.path.join(results_fldr, f"{stem}.out")
# 
#     with open(file_pth, "r") as f:
#         text = f.read()
#     
#     # assign the error file to the appropriate list
#     found_code = False
#     for code in vpc_err:
#         if code in text:
#             vpc_err[code].append(stem)
#             found_code = True
#             break
#     if not found_code:
#         vpc_other.append(stem)

In [22]:
# print(vpc_err["CglVPC: Finishing with exit reason: PRLP_TIME_LIMIT"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: PRLP_TIME_LIMIT"]) / len(err["no vpcs were made from a new disjunction"])

In [23]:
# print(vpc_err["CglVPC: Finishing with exit reason: TIME_LIMIT"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: TIME_LIMIT"]) / len(err["no vpcs were made from a new disjunction"])

In [24]:
# print(vpc_err["CglVPC: Finishing with exit reason: NO_CUTS_LIKELY"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: NO_CUTS_LIKELY"]) / len(err["no vpcs were made from a new disjunction"])

In [25]:
# print(vpc_err["CglVPC: Finishing with exit reason: PRLP_INFEASIBLE"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: PRLP_INFEASIBLE"]) / len(err["no vpcs were made from a new disjunction"])

In [26]:
# print(vpc_err["CglVPC: Finishing with exit reason: SUCCESS"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: SUCCESS"]) / len(err["no vpcs were made from a new disjunction"])

In [27]:
# print(vpc_err["CglVPC: Finishing with exit reason: OPTIMAL_SOLUTION_FOUND"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: OPTIMAL_SOLUTION_FOUND"]) / len(err["no vpcs were made from a new disjunction"])

In [28]:
# print(vpc_err["CglVPC: Finishing with exit reason: FAIL_LIMIT"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: FAIL_LIMIT"]) / len(err["no vpcs were made from a new disjunction"])

In [29]:
# print(vpc_err["CglVPC: Finishing with exit reason: NO_DISJUNCTION"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: NO_DISJUNCTION"]) / len(err["no vpcs were made from a new disjunction"])

In [30]:
# vpc_other

In [31]:
# # get breakdown of why vpc generation failed - mostly from lack of provisioning/problem being too large
# if err["no vpcs were made from a new disjunction"]:
#     for code, exps in vpc_err.items():
#         print(f"{code}: {len(exps) / len(err['no vpcs were made from a new disjunction'])}")
#     
#     print(f"other: {len(vpc_other) / len(err['no vpcs were made from a new disjunction'])}")

In [32]:
print(err["must have primalbound >= root lp objective"])
len(err["must have primalbound >= root lp objective"]) / count_series

['neos4_objective_1_4_None_0', 'neos4_objective_1_64_None_0', 'supportcase25_bound_1_4_None_0', 'supportcase25_bound_1_4_New_0', 'supportcase25_bound_1_4_Farkas_0', 'supportcase25_bound_1_4_All_0', 'supportcase25_bound_1_64_None_0', 'supportcase25_bound_1_64_All_0']


0.000758150113722517

In [33]:
# LP relaxation objective is not going to match root nodes objective when warm starting 
print(err["objective at parent nodes"])
len(err["objective at parent nodes"]) / count_series

[]


0.0

In [34]:
# not enough tolerance added to bound (or we hit time limit) - element 2 from 5 and 4 from 4
print(err["failed to optimize mip"])
len(err["failed to optimize mip"]) / count_series

[]


0.0

In [35]:
# todo: figure out why
print(err["disjunction does not represent a full binary tree"])
len(err["disjunction does not represent a full binary tree"]) / count_series

[]


0.0

In [36]:
# again issue with not getting through vpc generation in time
# todo: handle this gracefully
print(err["solver not proven optimal for nodes"])
len(err["solver not proven optimal for nodes"]) / count_series

[]


0.0

In [37]:
print(err["unable to open"])
len(err["unable to open"]) / count_series

[]


0.0

In [38]:
print(err["license"])
len(err["license"]) / count_series

[]


0.0

In [39]:
print(warning)
len(warning) / count_series

['bienst2_rhs_1_4_New_0', 'bienst2_rhs_1_4_Farkas_0', 'bienst2_rhs_1_4_All_0', 'bienst2_objective_1_4_New_0', 'bienst2_objective_1_4_Farkas_0', 'bienst2_objective_1_4_All_0', 'bienst2_matrix_-1_4_New_0', 'bienst2_objective_-1_4_New_0', 'bienst2_objective_-1_4_Farkas_0', '10teams_objective_1_4_New_0', '10teams_matrix_1_64_New_0', '10teams_matrix_1_64_Farkas_0', '10teams_matrix_1_64_All_0', '10teams_objective_-1_4_New_0', 'gmu-35-40_objective_1_4_New_0', 'gmu-35-40_objective_1_4_Farkas_0', 'gmu-35-40_objective_1_4_All_0', 'gmu-35-40_objective_1_64_New_0', 'gmu-35-40_matrix_-1_4_New_0', 'gmu-35-40_matrix_-1_4_Farkas_0', 'gmu-35-40_matrix_-1_4_All_0', 'gmu-35-40_objective_-1_4_New_0', 'gmu-35-40_objective_-1_4_Farkas_0', 'gmu-35-40_objective_-1_4_All_0', 'gmu-35-40_objective_-1_64_New_0', 'neos-3610051-istra_rhs_-1_64_New_0', 'neos-3610051-istra_matrix_-1_4_New_0', 'neos-3610051-istra_objective_-1_64_New_0', 'ci-s4_objective_1_64_New_0', 'ci-s4_objective_-1_64_New_0', 'neos-585192_objectiv

0.05525018953752843

In [40]:
# errors unaccounted for
print(other)
len(other) / count_series

['neos-3083819-nubu_matrix_-1_4_New_0', 'neos-3083819-nubu_matrix_-1_64_New_0', 'eil33-2_objective_1_4_New_0']


0.0002843062926459439

In [41]:
# proportion of series that were improperly provisioned
(len(err["bad_alloc"] + err["out of memory"] + err["walltime"] + err["vmem"])) / count_series

0.02947308567096285

In [42]:
# todo handle this
print(err["dot product with obj differs from solver"])
len(err["dot product with obj differs from solver"]) / count_series

['neos-3592146-hawea_matrix_-1_4_New_0']


9.476876421531463e-05

In [43]:
# changed code to ignore this error
print(err["gurobi: error during callback: addCut"])
len(err["gurobi: error during callback: addCut"]) / count_series

[]


0.0

In [44]:
# largely not replicating - only issue I could find was aleks missing updated objective from CLP when resolving to check this
print(err["cglvpc::setupconstraints: objective at disjunctive term"])
len(err["cglvpc::setupconstraints: objective at disjunctive term"]) / count_series

[]


0.0

In [45]:
# not replicating - rerun
print(err["unable to read file"])
len(err["unable to read file"]) / count_series

[]


0.0

In [46]:
# not replicating - rerun
print(err["stats.id == stats_vec"])
len(err["stats.id == stats_vec"]) / count_series

[]


0.0

In [47]:
print(err["size of our disjunction is not what we expected it to be"])
len(err["size of our disjunction is not what we expected it to be"]) / count_series

[]


0.0

In [48]:
print(err["vpcgenerator must be"])
len(err["vpcgenerator must be"]) / count_series

[]


0.0

In [49]:
print(err["dimension must stay fixed"])
len(err["dimension must stay fixed"]) / count_series

[]


0.0

In [50]:
print(err["objective values must match"])
len(err["objective values must match"]) / count_series

['f2gap801600_objective_-1_64_New_0', 'neos-3610173-itata_matrix_-1_4_New_0', 'neos-3610051-istra_matrix_-1_64_New_0', 'f2gap401600_objective_1_64_New_0', 'traininstance6_objective_-1_4_New_0', 'traininstance6_objective_-1_64_New_0', 'mas74_matrix_1_4_New_0', 'mas74_matrix_1_64_New_0', 'mas74_matrix_-1_64_New_0', 'rentacar_objective_-1_4_New_0', 'neos-3421095-cinca_objective_1_64_New_0', 'irp_objective_-1_4_New_0', 'aligninq_matrix_1_4_New_0', 'neos-631517_matrix_-1_64_New_0', 'neos-3610040-iskar_matrix_-1_64_New_0', 'neos-3627168-kasai_matrix_-1_64_New_0', 'pg_rhs_1_64_New_0', 'neos-3611689-kaihu_matrix_-1_64_New_0', 'neos-3611689-kaihu_objective_-1_64_New_0', 'mas76_matrix_1_64_New_0', 'mas76_matrix_-1_64_New_0', 'neos-3754480-nidda_objective_1_4_New_0', 'neos-3754480-nidda_objective_1_64_New_0', 'neos-3754480-nidda_rhs_-1_4_New_0', 'neos-3754480-nidda_rhs_-1_64_New_0', 'neos-3754480-nidda_objective_-1_64_New_0', 'control30-3-2-3_matrix_-1_64_New_0', 'control30-3-2-3_objective_-1_64_

0.003316906747536012

In [51]:
print(err["objective at disjunctive term"])
len(err["objective at disjunctive term"]) / count_series

['neos-631517_matrix_1_4_New_0', 'neos-631517_matrix_1_64_New_0', 'gus-sch_matrix_1_4_New_0', 'gus-sch_matrix_1_64_New_0', 'neos-5182409-nasivi_matrix_-1_4_New_0', 'neos-5182409-nasivi_matrix_-1_64_New_0', 'roll3000_matrix_-1_4_New_0', 'roll3000_matrix_-1_64_New_0', 'control30-3-2-3_matrix_-1_4_New_0']


0.0008529188779378317

In [52]:
# get breakdown of errors
for code, exps in err.items():
    print(f"{code}: {len(exps) / count_series}")

print(f"other: {len(other) / count_series}")

print(f"warning: {len(warning) / count_series}")

print(f"no errors/warnings: {len(empty) / count_series}")

print(f"no go: {len(no_go) / count_series}")

walltime: 0.0019901440485216074
bad_alloc: 0.023407884761182714
out of memory: 0.0014215314632297195
vmem: 0.00265352539802881
takeoffcuts: 0.0
solver is dual infeasible: 0.0
solver must be optimal: 0.0
segmentation fault: 0.00018953752843062926
no vpcs were made from a new disjunction: 0.2967210007581501
must have primalbound >= root lp objective: 0.000758150113722517
objective at parent nodes: 0.0
failed to optimize mip: 0.0
disjunction does not represent a full binary tree: 0.0
solver not proven optimal for nodes: 0.0
unable to open: 0.0
license: 0.0
dot product with obj differs from solver: 9.476876421531463e-05
gurobi: error during callback: addCut: 0.0
cglvpc::setupconstraints: objective at disjunctive term: 0.0
unable to read file: 0.0
stats.id == stats_vec: 0.0
size of our disjunction is not what we expected it to be: 0.0
dimension must stay fixed: 0.0
vpcgenerator must be: 0.0
objective values must match: 0.003316906747536012
objective at disjunctive term: 0.000852918877937831

## Read in data

In [66]:
# map generator names to the corresponding data frames
df_map = {g: pd.DataFrame() for g in generators} 
gap_map = {g: pd.DataFrame() for g in generators}
regex = re.compile(r'([a-zA-Z0-9-]+(?:_o)?)_([a-z]+)_([0-9-]+)_([0-9]+)_([a-zA-Z ]+)')
solution_pattern = r"_(\d+)\.pb"

# declaring types as needed
column_types = {
    "lpBound": float,
    "lpBoundPostVpc": float,
    "disjunctiveDualBound": float,
    "primalBound": float,
    "rootDualBound": float,
    "dualBound": float
}

skipped_instances = set()
primal_bounds = {}
same_solution = {}

# iterate over all files in the folder
for file_name in os.listdir(results_fldr):
    
    file_pth = os.path.join(results_fldr, file_name)
    
    # if the file is not a nonempty csv, skip it
    if not file_name.endswith(".csv") or os.path.getsize(file_pth) == 0:
        continue
    
    # get the experimental set up
    match = regex.search(file_name)
    instance_name = names.get(file_name[:-4])
    if not instance_name:
        skipped_instances.add(file_name[:-4].split("_")[0])
        os.remove(file_pth)
        continue
    # instance_name = match.group(1)
    perturbation = match.group(2)
    assert perturbation in ["matrix", "rhs", "bound", "objective"], f"Unknown perturbation: {perturbation}"
    expo = int(match.group(3))
    assert expo in degrees, f"Unknown degree: {expo}"
    degree = 2**int(expo)
    terms = int(match.group(4))
    assert terms in term_list, f"Unknown number of terms: {terms}"
    generator = match.group(5)
    assert generator in generators, f"Unknown generator: {generator}"
    base_name = f"{instance_name}_0"
    
    # get the primal bounds for this experiment
    cur_instance_test_set_fldr = os.path.join(test_set_fldr, instance_name, f"{perturbation}_{expo}")
    for test_set_file in os.listdir(cur_instance_test_set_fldr):
        if test_set_file.endswith(".pb"):
            with open(os.path.join(cur_instance_test_set_fldr, test_set_file), "r") as f:
                primal_bounds[perturbation, expo, ".".join(test_set_file.split(".")[:-1])] = float(f.read())
                
    # see if solution changed
    for test_set_file in os.listdir(cur_instance_test_set_fldr):
        if test_set_file.endswith(".pb"):
            perturbation_name = ".".join(test_set_file.split(".")[:-1])
            same_solution[perturbation, expo, perturbation_name] = \
                primal_bounds[perturbation, expo, base_name] == primal_bounds[perturbation, expo, perturbation_name]
            
    # read the file
    df = pd.read_csv(file_pth, keep_default_na=False, dtype=column_types, index_col=0)
    
    for instance_idx in df.index:
        
        # fill in primal bounds if missing
        # df.loc[instance_idx, "primalBound"] = min(primal_bounds.get(stem_map.get(instance_idx), 1e100), df.loc[instance_idx, "primalBound"])
        df.loc[instance_idx, "primalBound"] = min(
            primal_bounds[perturbation, expo, f"{instance_name}_{instance_idx}"], df.loc[instance_idx, "primalBound"]
        )
        
        # same with root dual bound
        df.loc[instance_idx, "rootDualBound"] = df.loc[instance_idx, "rootDualBound"] if df.loc[instance_idx, "rootDualBound"] < 1e100 else df.loc[instance_idx, "lpBoundPostVpc"] 
    
    # get rid of the index so the rest of the notebook works
    df.reset_index(inplace=True)
    
    # add some identifying columns
    df["instance"] = instance_name
    df["perturbation"] = perturbation
    df["degree"] = degree
    df["terms"] = terms
    df["rows"] = rows[instance_name]
    df["cols"] = cols[instance_name]
    df["density"] = density[instance_name]
    
    # append to the appropriate data frame
    df_map[generator] = pd.concat([df_map[generator], df])
    
    # track recorded vs expected experiments
    number_instances[file_name[:-4]]["recorded"] = len(df)

In [67]:
# convert number_instances to dataframe
frame = pd.DataFrame(number_instances).T
frame.head()

Unnamed: 0,expected,recorded,generator,error
bienst2_rhs_1_4_None_0,6,6,,empty
bienst2_rhs_1_4_New_0,6,6,New,warning
bienst2_rhs_1_4_Farkas_0,6,6,Farkas,warning
bienst2_rhs_1_4_All_0,6,6,All,warning
bienst2_rhs_1_64_None_0,6,6,,empty


In [68]:
# redo the runs that have incomplete data that we're not sure should be that way
redos = frame.loc[(frame["expected"] > frame["recorded"]) & (frame["error"] != "no vpcs were made from a new disjunction")].index.tolist()
redos = pd.DataFrame({"experiment": redos})
redos.to_csv("redos.csv", index=False)

In [69]:
if "miplib" in test_set or "quick" in test_set:
    # group frame by generator and sum remaining columns
    gb = frame.groupby(["generator", "error"]).sum().reset_index()
    gb["missing"] = gb["expected"] - gb["recorded"]
    total = gb.groupby("generator")[["expected", "missing"]].sum().reset_index()
    gb = pd.merge(gb, total, on="generator", suffixes=("", " total"))
    gb["ratio missing (by generator)"] = gb["missing"] / gb["missing total"]
    gb["ratio missing (by generator)"] = gb["ratio missing (by generator)"].apply(lambda x: round(x, 4))
    gb = gb.loc[:, ~gb.columns.str.contains("total")]  # get rid of the total columns
    gb.set_index(["generator", "error"], inplace=True)
    gb.to_csv(os.path.join(out_fldr, "missing_table.csv"), index=False, mode="w")
else:
    gb = None
gb

Unnamed: 0_level_0,Unnamed: 1_level_0,expected,recorded,missing,ratio missing (by generator)
generator,error,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
All,bad_alloc,376,106,270,0.0448
All,empty,6849,6266,583,0.0967
All,must have primalbound >= root lp objective,10,8,2,0.0003
All,no vpcs were made from a new disjunction,5055,0,5055,0.8389
All,out of memory,15,8,7,0.0012
All,segmentation fault,5,0,5,0.0008
All,vmem,39,12,27,0.0045
All,walltime,33,0,33,0.0055
All,warning,816,772,44,0.0073
Farkas,bad_alloc,323,101,222,0.0414


In [70]:
for gen in generators:
    masks = {
        0: -1e20 > df_map[gen]["lpBound"],
        1: df_map[gen]["lpBound"] - 1e-3 > df_map[gen]["lpBoundPostVpc"],
        2: (df_map[gen]["lpBoundPostVpc"] - 1e-3 > df_map[gen]["disjunctiveDualBound"]) & ((gen == "None") | (gen == "New")),
        3: df_map[gen]["rootDualBound"] - 1e-3 > df_map[gen]["dualBound"],
        4: (df_map[gen]["dualBound"] - 1e-3 > df_map[gen]["primalBound"]) & (df_map[gen]["dualBound"] / df_map[gen]["primalBound"] > 1 + 1e-3),
        5: df_map[gen]["primalBound"] > 1e20,
        6: 0 > df_map[gen]["vpcGenerationTime"],
        7: df_map[gen]["vpcGenerationTime"] - 1e-3 > df_map[gen]["rootDualBoundTime"],
        8: df_map[gen]["rootDualBoundTime"] - 1e-3 > df_map[gen]["terminationTime"],
        9: df_map[gen]["vpcGenerationTime"] - 1e-3 > df_map[gen]["bestSolutionTime"],
        10: df_map[gen]["bestSolutionTime"] - 1e-3 > df_map[gen]["terminationTime"]
    }
    for i, mask in masks.items():
        print(f"{gen} {i}: {mask.sum() / len(df_map[gen])}")

None 0: 0.0
None 1: 0.0
None 2: 0.0
None 3: 0.0
None 4: 0.0009123393902531741
None 5: 0.0
None 6: 0.0
None 7: 0.0
None 8: 0.0
None 9: 0.0
None 10: 0.0
New 0: 0.0
New 1: 0.0
New 2: 0.0
New 3: 0.0
New 4: 0.0011240691302515104
New 5: 0.0
New 6: 0.0
New 7: 0.0
New 8: 0.0
New 9: 0.0
New 10: 0.0
Farkas 0: 0.0
Farkas 1: 0.0
Farkas 2: 0.0
Farkas 3: 0.0
Farkas 4: 0.0010206685378923194
Farkas 5: 0.0
Farkas 6: 0.0
Farkas 7: 0.0
Farkas 8: 0.0
Farkas 9: 0.0
Farkas 10: 0.0
All 0: 0.0
All 1: 0.0
All 2: 0.0
All 3: 0.0
All 4: 0.0012548800892359174
All 5: 0.0
All 6: 0.0
All 7: 0.0
All 8: 0.0
All 9: 0.0
All 10: 0.0


In [71]:
# it shouldn't be possible that dual bound > primal bound. this only happens when we use the saved primal bound, which was used to set the dual bound
df_map["Farkas"][masks[0]]

  df_map["Farkas"][masks[0]]


Unnamed: 0,instanceIndex,seedIndex,vpcGenerator,terms,lpBound,disjunctiveDualBound,lpBoundPostVpc,rootDualBound,dualBound,primalBound,...,tighten_disjunction,tighten_matrix_perturbation,tighten_infeasible_to_feasible_term,tighten_feasible_to_infeasible_basis,instance,perturbation,degree,rows,cols,density


In [72]:
for gen in df_map:
    mask = (-1e20 > df_map[gen]["lpBound"]) | \
        (df_map[gen]["lpBound"] - 1e-3 > df_map[gen]["lpBoundPostVpc"]) | \
        ((df_map[gen]["lpBoundPostVpc"] - 1e-3 > df_map[gen]["disjunctiveDualBound"]) & (gen != "Farkas")) | \
        (df_map[gen]["rootDualBound"] - 1e-3 > df_map[gen]["dualBound"]) | \
        ((df_map[gen]["dualBound"] - 1e-3 > df_map[gen]["primalBound"]) & (df_map[gen]["dualBound"] / df_map[gen]["primalBound"] > 1 + 1e-3)) | \
        (df_map[gen]["primalBound"] > 1e20) | \
        (0 > df_map[gen]["vpcGenerationTime"]) | \
        (df_map[gen]["vpcGenerationTime"] - 1e-3 > df_map[gen]["rootDualBoundTime"]) | \
        (df_map[gen]["rootDualBoundTime"] - 1e-3 > df_map[gen]["terminationTime"]) | \
        (df_map[gen]["vpcGenerationTime"] - 1e-3 > df_map[gen]["bestSolutionTime"]) | \
        (df_map[gen]["bestSolutionTime"] - 1e-3 > df_map[gen]["terminationTime"])
    print(f"{gen}: {mask.sum() / len(df_map[gen])}")
    df_map[gen] = df_map[gen][~mask]

None: 0.0009123393902531741
New: 0.0011240691302515104
Farkas: 0.0010206685378923194
All: 0.0013943112102621305


In [73]:
# merge the different data frames into one
join_cols = ["instance", "perturbation", "degree", "terms", "instanceIndex", "seedIndex"]
df = df_map[generators[0]].merge(df_map[generators[1]], on=join_cols, suffixes=(f" {generators[0]}", None))
for g1, g2 in zip(generators[1:-1], generators[2:]):
    df = df.merge(df_map[g2], on=join_cols, suffixes=(f" {g1}", None if g2 != generators[-1] else f" {g2}"))
df.head()

Unnamed: 0,instanceIndex,seedIndex,vpcGenerator None,terms,lpBound None,disjunctiveDualBound None,lpBoundPostVpc None,rootDualBound None,dualBound None,primalBound None,...,termRemainsFeasibleBasisInfeasible All,cutsChangedCoefficients All,feasibleTermsPrunedByBound All,tighten_disjunction All,tighten_matrix_perturbation All,tighten_infeasible_to_feasible_term All,tighten_feasible_to_infeasible_basis All,rows All,cols All,density All
0,0,0,,64,-120.0,-120.0,-120.0,-120.0,-120.0,-120.0,...,0,0,0,0,0,0,0,8357,10735,0.000534
1,1,0,,64,-120.0,-120.0,-120.0,-120.0,-120.0,-120.0,...,0,0,0,1,1,1,1,8357,10735,0.000534
2,2,0,,64,-120.5,-120.5,-120.5,-120.5,-120.5,-120.5,...,0,0,0,1,1,1,1,8357,10735,0.000534
3,0,0,,64,-4632.298153,-4632.298153,-4632.298153,-4631.571278,-4607.140232,-4606.67961,...,0,0,0,0,0,0,0,46,29,0.976762
4,1,0,,64,-4628.667162,-4628.667162,-4628.667162,-4627.808946,-4604.833773,-4604.373375,...,0,0,0,1,1,1,1,46,29,0.976762


In [74]:
# get proportion of tests run to completion
len(generators) * len(df) / count_instances

0.4865888771025913

In [75]:
def gap_closed(df, col):
    gap = abs(df[col] - df["lpBound None"]) / abs(df['primalBound None'] - df["lpBound None"])
    gap[(gap > 1) | (gap == np.nan)] = 1  # get corner cases
    return gap

# Function to map values based on a dictionary
def check_same_solution(row):
    # Create a tuple of the key based on the key_columns
    return same_solution[row["perturbation"], int(math.log2(row["degree"])), f'{row["instance"]}_{row["instanceIndex"]}']

In [76]:
# find the optimality gap closed by each generator
df["Disjunction (New)"] = gap_closed(df, "disjunctiveDualBound New")
df["Disjunction (Old)"] = gap_closed(df, "disjunctiveDualBound Farkas")
for g in generators:
    if g != "None":
        df[f"VPCs ({g})"] = gap_closed(df, f"lpBoundPostVpc {g}")        
    df[f"Root Cuts ({g})"] = gap_closed(df, f"rootDualBound {g}")

df["Root Optimality Gap Improvement"] = df["Root Cuts (Farkas)"] - df["Root Cuts (None)"] 
# df = df.dropna()

In [78]:
# find times without vpc generation
df["terminationTimeSansVpc None"] = df["terminationTime None"]
df["rootDualBoundTimeSansVpc None"] = df["rootDualBoundTime None"]
for gen in generators:
    if gen != "None":
        df[f"terminationTimeSansVpc {gen}"] = df[f"terminationTime {gen}"] - df[f"vpcGenerationTime {gen}"]
        df[f"rootDualBoundTimeSansVpc {gen}"] = df[f"rootDualBoundTime {gen}"] - df[f"vpcGenerationTime {gen}"]
    df[f"postRootTime {gen}"] = df[f"terminationTime {gen}"] - df[f"rootDualBoundTime {gen}"]
    if gen not in ["None", "New"]:
        df[f"terminationTimeImprovement {gen}"] = (df["terminationTime None"] - df[f"terminationTime {gen}"]) / df["terminationTime None"]
        df[f"terminationTimeSansVpcImprovement {gen}"] = (df["terminationTimeSansVpc None"] - df[f"terminationTimeSansVpc {gen}"]) / df["terminationTimeSansVpc None"]
        df[f"nodesImprovement {gen}"] = (df["nodes None"] - df[f"nodes {gen}"]) / df["nodes None"] 
        df[f"iterationsImprovement {gen}"] = (df["iterations None"] - df[f"iterations {gen}"]) / df["iterations None"] 
        df[f"terminationTimeRatio {gen}"] = df[f"terminationTime {gen}"] / df["terminationTime None"]
        df[f"terminationTimeSansVpcRatio {gen}"] = df[f"terminationTimeSansVpc {gen}"] / df["terminationTimeSansVpc None"]
        df[f"nodesRatio {gen}"] = df[f"nodes {gen}"] / df["nodes None"] 
        df[f"iterationsRatio {gen}"] = df[f"iterations {gen}"] / df["iterations None"]
        df[f"nodesImproves {gen}"] = df["nodes None"] > df[f"nodes {gen}"]
        df[f"terminationTimeImproves {gen}"] = df["terminationTime None"] > df[f"terminationTime {gen}"]
        df[f"terminationTimeSansVpcImproves {gen}"] = df["terminationTimeSansVpc None"] > df[f"terminationTimeSansVpc {gen}"]
        df[f"iterationsImproves {gen}"] = df["iterations None"] > df[f"iterations {gen}"]
        
for gen in generators:
    # make wins columns
    if gen != "New":
        compare_gens = [gen2 for gen2 in generators if gen2 not in ["New", gen]]
        df[f'nodesWin{gen}'] = pd.concat(
            [(df[f'nodes {gen2}'] * (1 - win_threshold) > df[f'nodes {gen}']) for gen2 in compare_gens], axis=1
        ).all(axis=1)
        df[f'terminationTimeWin{gen}'] = pd.concat(
            [(df[f'terminationTime {gen2}'] * (1 - win_threshold) > df[f'terminationTime {gen}']) for gen2 in compare_gens], axis=1
        ).all(axis=1)
        df[f'terminationTimeSansVpcWin{gen}'] = pd.concat(
            [(df[f'terminationTimeSansVpc {gen2}'] * (1 - win_threshold) > df[f'terminationTimeSansVpc {gen}']) for gen2 in compare_gens], axis=1
        ).all(axis=1)
        df[f'iterationsWin{gen}'] = pd.concat(
            [(df[f'iterations {gen2}'] * (1 - win_threshold) > df[f'iterations {gen}']) for gen2 in compare_gens], axis=1
        ).all(axis=1)

df["bracket"] = ["short" if t <= short else "medium" if t <= medium else "long" for t in df["terminationTime None"]]
df["sameSolution"] = df.apply(check_same_solution, axis=1)

  df[f'nodesWin{gen}'] = pd.concat(
  df[f'terminationTimeWin{gen}'] = pd.concat(
  df[f'terminationTimeSansVpcWin{gen}'] = pd.concat(
  df[f'iterationsWin{gen}'] = pd.concat(
  df["bracket"] = ["short" if t <= short else "medium" if t <= medium else "long" for t in df["terminationTime None"]]
  df["sameSolution"] = df.apply(check_same_solution, axis=1)


In [79]:
# get sensitivity stats as ratios
for gen_name in generators:
    if gen_name == "None":
        continue
    df[f"infeasibleTermsRatio {gen_name}"] = df[f"infeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
    df[f"infeasibleToFeasibleTermsRatio {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
    df[f"zeroInfeasibleToFeasibleTerms {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] == 0
    df[f"feasibleToInfeasibleTermsRatio {gen_name}"] = df[f"feasibleToInfeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]

  df[f"infeasibleTermsRatio {gen_name}"] = df[f"infeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"infeasibleToFeasibleTermsRatio {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"zeroInfeasibleToFeasibleTerms {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] == 0
  df[f"feasibleToInfeasibleTermsRatio {gen_name}"] = df[f"feasibleToInfeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"infeasibleTermsRatio {gen_name}"] = df[f"infeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"infeasibleToFeasibleTermsRatio {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"zeroInfeasibleToFeasibleTerms {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] == 0
  df[f"feasibleToInfeasibleTermsRatio {gen_name}"] = df[f"feasibleToInfeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"infeasibleTermsRatio {gen_name}"] = df[f"infeasibleTerms 

In [80]:
def optimality_gap(df, generator=None):
    if generator:
        return abs(df[f"primalBound {generator}"] - df[f"dualBound {generator}"]) / \
            abs(df[f"primalBound {generator}"])
    else:
        return abs(df[f"primalBound"] - df[f"dualBound"]) / abs(df[f"primalBound"])

In [81]:
# aleks filters
# df = df.loc[df["terms"] == df["actualTerms Farkas"]]
# df = df.loc[df["zeroInfeasibleToFeasibleTerms Farkas"]]

In [82]:
df.head()

Unnamed: 0,instanceIndex,seedIndex,vpcGenerator None,terms,lpBound None,disjunctiveDualBound None,lpBoundPostVpc None,rootDualBound None,dualBound None,primalBound None,...,zeroInfeasibleToFeasibleTerms New,feasibleToInfeasibleTermsRatio New,infeasibleTermsRatio Farkas,infeasibleToFeasibleTermsRatio Farkas,zeroInfeasibleToFeasibleTerms Farkas,feasibleToInfeasibleTermsRatio Farkas,infeasibleTermsRatio All,infeasibleToFeasibleTermsRatio All,zeroInfeasibleToFeasibleTerms All,feasibleToInfeasibleTermsRatio All
0,0,0,,64,-120.0,-120.0,-120.0,-120.0,-120.0,-120.0,...,True,0.0,0.886364,0.0,True,0.0,0.886364,0.0,True,0.0
1,1,0,,64,-120.0,-120.0,-120.0,-120.0,-120.0,-120.0,...,True,0.0,0.886364,0.0,True,0.0,0.886364,0.0,True,0.0
2,2,0,,64,-120.5,-120.5,-120.5,-120.5,-120.5,-120.5,...,True,0.0,0.886364,0.0,True,0.0,0.886364,0.0,True,0.0
3,0,0,,64,-4632.298153,-4632.298153,-4632.298153,-4631.571278,-4607.140232,-4606.67961,...,True,0.0,0.852273,0.0,True,0.0,0.852273,0.0,True,0.0
4,1,0,,64,-4628.667162,-4628.667162,-4628.667162,-4627.808946,-4604.833773,-4604.373375,...,True,0.0,0.852273,0.0,True,0.0,0.852273,0.0,True,0.0


In [83]:
# set aside core columns and filter for all subsequent dataframes
group_cols = ["instance", "perturbation", "bracket", "degree", "terms"]
id_cols = ["instanceIndex"]

# keep the instance, perturbation, instanceIndex triples that exist for all combinations of degree and terms
# where VPC did not find the optimal solution
full_df = df.loc[df["Disjunction (New)"] < .9999]
triples = (full_df.groupby(
        ["instance", "perturbation", "instanceIndex"]
    ).size().reset_index().rename(columns={0: "count"}))
triples.head()

Unnamed: 0,instance,perturbation,instanceIndex,count
0,10teams,bound,0,2
1,10teams,bound,1,2
2,10teams,bound,2,2
3,10teams,bound,3,1
4,10teams,bound,4,1


In [84]:
# uncomment to filter for only the triples that exist for all combinations of degree and terms (and seed index)
triples = triples[triples["count"] == len(degrees) * len(term_list) * len(seed_idxs)]
full_df = full_df.merge(triples, on=["instance", "perturbation", "instanceIndex"])
full_df.to_csv(os.path.join(out_fldr, "cleaned_combined_complete.csv"), index=False, mode="w")

## Check Root Node Stats

In [85]:
def interleave(list_of_lists):
    return [item for sublist in zip(*list_of_lists) for item in sublist]

In [86]:
# additional filtering for dataframe on bounds
fields = ["Disjunction (New)", "Disjunction (Old)"] + [f"VPCs ({gen_name})" for gen_name in generators if gen_name != "None"] + \
    interleave([[f"Root Cuts ({gen_name})", f"terminationTime {gen_name}", f"nodes {gen_name}",
                 f"iterations {gen_name}", f"terminationTimeSansVpc {gen_name}", f"vpcGenerationTime {gen_name}", 
                 f"rootDualBoundTime {gen_name}"]
                for gen_name in generators]) + \
    interleave([[f"infeasibleTermsRatio {gen_name}", f"infeasibleToFeasibleTermsRatio {gen_name}",
                 f"zeroInfeasibleToFeasibleTerms {gen_name}", f"feasibleToInfeasibleTermsRatio {gen_name}"]
                for gen_name in generators if gen_name != "None"])

# now reduce bound_df to just the perturbed instances - make > -1 to include base instance
bound_df = full_df.loc[full_df["instanceIndex"] > 0, group_cols + id_cols + fields]  #  & (full_df["Disjunction (Old)"] > .1)

In [87]:
def geometric_mean(series, offset=1e-6):
    adjusted_series = series + offset  # Add a small offset to avoid zeros
    return np.exp(np.log(adjusted_series).mean())

# paper currently uses mean, but we can switch to geometric mean if we want
aggregations = {f: "mean" for f in fields}  # geometric_mean if f not in ["sameSolution"] else
aggregations["instance"] = "nunique"
aggregations["instanceIndex"] = "count"

In [88]:
# get gap closed by degree and term
out = bound_df.groupby(["degree", "terms"]).agg(aggregations).reset_index()
out.to_csv(os.path.join(out_fldr, "bound_table.csv"), index=False, mode="w")
out

Unnamed: 0,degree,terms,Disjunction (New),Disjunction (Old),VPCs (New),VPCs (Farkas),VPCs (All),Root Cuts (None),Root Cuts (New),Root Cuts (Farkas),...,infeasibleToFeasibleTermsRatio Farkas,infeasibleToFeasibleTermsRatio All,zeroInfeasibleToFeasibleTerms New,zeroInfeasibleToFeasibleTerms Farkas,zeroInfeasibleToFeasibleTerms All,feasibleToInfeasibleTermsRatio New,feasibleToInfeasibleTermsRatio Farkas,feasibleToInfeasibleTermsRatio All,instance,instanceIndex
0,0.5,4,0.052658,0.038267,0.033176,0.026918,0.026855,0.607281,0.618786,0.616637,...,0.0,0.0,1.0,1.0,1.0,0.0,0.007745,0.007745,107,581
1,0.5,64,0.124877,0.101308,0.073162,0.054245,0.055715,0.610662,0.625234,0.620877,...,0.004177,0.004123,1.0,0.958692,0.960413,0.0,0.011519,0.011519,107,581
2,2.0,4,0.071336,0.033584,0.036953,0.020106,0.020051,0.635432,0.636771,0.642332,...,0.000861,0.000861,1.0,0.998279,0.998279,0.0,0.024096,0.024096,107,581
3,2.0,64,0.152333,0.085527,0.077945,0.035098,0.036726,0.636363,0.646929,0.639888,...,0.015018,0.015018,1.0,0.891566,0.891566,0.0,0.040103,0.040103,107,581


In [89]:
# now break it down by type of perturbation
out = bound_df.groupby(["degree", "terms", "perturbation"]).agg(aggregations).reset_index()
out.to_csv(os.path.join(out_fldr, "bound_table_by_perturbation.csv"), index=False, mode="w")
out

Unnamed: 0,degree,terms,perturbation,Disjunction (New),Disjunction (Old),VPCs (New),VPCs (Farkas),VPCs (All),Root Cuts (None),Root Cuts (New),...,infeasibleToFeasibleTermsRatio Farkas,infeasibleToFeasibleTermsRatio All,zeroInfeasibleToFeasibleTerms New,zeroInfeasibleToFeasibleTerms Farkas,zeroInfeasibleToFeasibleTerms All,feasibleToInfeasibleTermsRatio New,feasibleToInfeasibleTermsRatio Farkas,feasibleToInfeasibleTermsRatio All,instance,instanceIndex
0,0.5,4,matrix,0.058387,0.034501,0.03631,0.026262,0.026038,0.713973,0.714525,...,0.0,0.0,1.0,1.0,1.0,0.0,0.020979,0.020979,50,143
1,0.5,4,objective,0.046098,0.040164,0.031799,0.027771,0.027757,0.569434,0.585759,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,97,354
2,0.5,4,rhs,0.070552,0.036682,0.033645,0.024443,0.024443,0.58515,0.594985,...,0.0,0.0,1.0,1.0,1.0,0.0,0.017857,0.017857,31,84
3,0.5,64,matrix,0.130634,0.095592,0.066492,0.044537,0.046366,0.711832,0.720161,...,0.011375,0.011156,1.0,0.895105,0.902098,0.0,0.014739,0.014739,50,143
4,0.5,64,objective,0.115638,0.103245,0.070946,0.057394,0.057324,0.57499,0.590926,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,97,354
5,0.5,64,rhs,0.154015,0.102879,0.093853,0.0575,0.06485,0.588765,0.608218,...,0.009526,0.009526,1.0,0.892857,0.892857,0.0,0.054585,0.054585,31,84
6,2.0,4,matrix,0.076885,0.02798,0.042574,0.013938,0.013796,0.770987,0.759314,...,0.003497,0.003497,1.0,0.993007,0.993007,0.0,0.059441,0.059441,50,143
7,2.0,4,objective,0.050085,0.036394,0.035615,0.024137,0.024103,0.587509,0.595638,...,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,97,354
8,2.0,4,rhs,0.151447,0.031284,0.033024,0.013621,0.013623,0.606626,0.6015,...,0.0,0.0,1.0,1.0,1.0,0.0,0.065476,0.065476,31,84
9,2.0,64,matrix,0.155109,0.065452,0.076367,0.007852,0.014849,0.768435,0.763635,...,0.051082,0.051082,1.0,0.65035,0.65035,0.0,0.065802,0.065802,50,143


## High Performing Root Stats

In [90]:
# # example table for VPC strength
# out.loc[out["degree"] != 16, ["degree", "terms", "perturbation"] + [c for c in out.columns if "VPCs" in c and "No" not in c]]

In [91]:
# # example table for root cut strength
# out.loc[out["degree"] != 16, ["degree", "terms", "perturbation"] + [c for c in out.columns if "Root Cuts" in c and ("No" not in c or "None" in c)]]

In [92]:
# # example table for root cut generation time
# out.loc[out["degree"] != 16, ["degree", "terms", "perturbation"] + [c for c in out.columns if "rootDualBoundTime" in c and ("No" not in c or "None" in c)]]

## Check Termination Stats

In [93]:
# only check perturbed instances that solve to optimality and VPC didn't find optimal solution
mask = (df["Disjunction (New)"] < .9999) & (df["instanceIndex"] > 0) & (optimality_gap(df, "New") <= 1e-4) & \
    (optimality_gap(df, "None") <= 1e-4) & (optimality_gap(df, "Farkas") <= 1e-4) & \
    (optimality_gap(df, "All") <= 1e-4) & (df["terminationTime None"] > min_termination_time)
# if remove_status_changes:
#     mask = mask & (df["infeasibleToFeasibleTermsRatio Farkas"] == 0) & (df["feasibleToInfeasibleTermsRatio Farkas"] == 0)

gap_df = df.loc[mask]

In [94]:
def rename_cols(cols, feature):
    new_cols = []
    for col in cols:
        if f"{feature}Win" in col:
            col = col.replace(f"{feature}Win", f"{label[feature]} Win % ")
        elif "instanceIndex" in col:
            col = col.replace("instanceIndex", "Test Instances")
        elif "instance" in col:
            col = col.replace("instance", "Base Instances")
        new_cols.append(col)
    return new_cols

In [95]:
# get the win percentages for each feature on average and broken down by grouping type
features = ["nodes", "terminationTime", "terminationTimeSansVpc"]

wins = {}

for feature in features:
    
    # define aggregating operations
    aggregations = {f"{feature}Win{gen}": "mean" for gen in generators if gen != "New"}
    if feature == features[-1]:
        aggregations = aggregations | {"instance": "nunique", "instanceIndex": "count"}
    
    # find the average wins for the feature grouped by degree and terms
    feature_wins = gap_df[gap_df["perturbation"] != "bound"].groupby(["degree", "terms"]).agg(aggregations)
    
    # clean up formatting
    feature_wins.columns = rename_cols(feature_wins.columns, feature)
    win_cols = [c for c in feature_wins.columns if "Win" in c]
    feature_wins[win_cols] = feature_wins[win_cols].applymap(lambda x: round(x * 100, 2))
    instance_cols = [c for c in feature_wins.columns if "instance" in c]
    feature_wins[instance_cols] = feature_wins[instance_cols].applymap(lambda x: int(x))
    
    # save the df
    wins[feature] = feature_wins
    
# bring them all together now
all_wins = pd.concat(wins.values(), axis=1)
all_wins.reset_index().to_csv(os.path.join(out_fldr, f"branch_and_bound_wins.csv"), index=False, mode="w")
all_wins

Unnamed: 0_level_0,Unnamed: 1_level_0,Nodes Processed Win % None,Nodes Processed Win % Farkas,Nodes Processed Win % All,Time Win % None,Time Win % Farkas,Time Win % All,Time (Minus VPC Generation) Win % None,Time (Minus VPC Generation) Win % Farkas,Time (Minus VPC Generation) Win % All,Base Instances,Test Instances
degree,terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0.5,4,22.35,17.67,15.8,25.26,21.62,20.37,24.22,21.62,21.41,129,962
0.5,64,22.44,16.2,15.6,40.71,15.9,14.41,27.34,19.17,19.47,106,673
2.0,4,22.74,15.78,15.43,27.84,19.84,18.1,25.99,19.61,19.26,142,862
2.0,64,23.25,15.77,16.1,44.72,15.28,13.33,28.78,18.05,19.51,109,615


In [101]:
def aggregate_wins(gap_df, feature, grouping):

    def custom_key(col):
        # bump improvement columns second to win percentage columns
        offset = int("Improvement %" in col)
        # check groupings
        if 'matrix' in col or 'short' in col:
            return (2 + offset, col)
        elif 'objective' in col or 'medium' in col:
            return (4 + offset, col)
        elif 'rhs' in col or 'long' in col:
            return (6 + offset, col)    
        return (offset, col)
    
    # find the average wins for the feature grouped by degree, terms and grouping type
    win_aggregations = {f"{feature}Win{gen}": "mean" for gen in generators if gen != "New"}
    feature_wins = gap_df[gap_df["perturbation"] != "bound"].groupby(["degree", "terms", grouping]).agg(win_aggregations).reset_index().pivot(
        index=['degree', 'terms'], columns=grouping, values=[f"{feature}Win{gen}" for gen in generators if gen != "New"]
    )
    feature_wins.columns = rename_cols([' '.join(col).strip() for col in feature_wins.columns.values], feature)
    feature_wins = feature_wins[sorted(feature_wins.columns, key=custom_key)]
    feature_wins = feature_wins.applymap(lambda x: round(x * 100, 2))
    
    # get the counts for the feature grouped by degree, terms and grouping type
    count_aggregations = {"instance": "nunique", "instanceIndex": "count"}
    feature_counts = gap_df[gap_df["perturbation"] != "bound"].groupby(["degree", "terms", grouping]).agg(count_aggregations).reset_index().pivot(
        index=['degree', 'terms'], columns=grouping, values=["instance", "instanceIndex"]
    )
    feature_counts.columns = rename_cols([' '.join(col).strip() for col in feature_counts.columns.values], feature)
    feature_counts = feature_counts[sorted(feature_counts.columns, key=custom_key)]
    feature_counts = feature_counts.applymap(lambda x: int(x))
    
    # save wins, base instance counts, and test instance counts to csv
    feature_wins.reset_index().to_csv(os.path.join(out_fldr, f"branch_and_bound_wins_{feature}_{grouping}.csv"), index=False, mode="w")
    feature_counts.reset_index().to_csv(os.path.join(out_fldr, f"branch_and_bound_counts_{grouping}.csv"), index=False, mode="w")
    
    return feature_wins, feature_counts

In [102]:
wins, counts = aggregate_wins(gap_df, "terminationTimeSansVpc", "bracket")
wins

Unnamed: 0_level_0,Unnamed: 1_level_0,Time (Minus VPC Generation) Win % All short,Time (Minus VPC Generation) Win % Farkas short,Time (Minus VPC Generation) Win % None short,Time (Minus VPC Generation) Win % All medium,Time (Minus VPC Generation) Win % Farkas medium,Time (Minus VPC Generation) Win % None medium,Time (Minus VPC Generation) Win % All long,Time (Minus VPC Generation) Win % Farkas long,Time (Minus VPC Generation) Win % None long
degree,terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.5,4,20.22,18.05,27.44,23.77,24.15,21.13,21.68,30.77,17.48
0.5,64,20.2,21.46,25.0,19.47,14.74,32.63,16.09,18.39,26.44
2.0,4,19.8,17.8,27.0,20.49,22.13,27.46,14.41,22.03,18.64
2.0,64,19.45,16.71,28.77,18.24,21.18,30.0,22.5,17.5,26.25


In [103]:
counts

Unnamed: 0_level_0,Unnamed: 1_level_0,Base Instances short,Test Instances short,Base Instances medium,Test Instances medium,Base Instances long,Test Instances long
degree,terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.5,4,103,554,62,265,36,143
0.5,64,85,396,49,190,27,87
2.0,4,107,500,63,244,35,118
2.0,64,84,365,46,170,26,80


In [104]:
wins, counts = aggregate_wins(gap_df, "terminationTimeSansVpc", "perturbation")
wins

Unnamed: 0_level_0,Unnamed: 1_level_0,Time (Minus VPC Generation) Win % All matrix,Time (Minus VPC Generation) Win % Farkas matrix,Time (Minus VPC Generation) Win % None matrix,Time (Minus VPC Generation) Win % All objective,Time (Minus VPC Generation) Win % Farkas objective,Time (Minus VPC Generation) Win % None objective,Time (Minus VPC Generation) Win % All rhs,Time (Minus VPC Generation) Win % Farkas rhs,Time (Minus VPC Generation) Win % None rhs
degree,terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.5,4,20.77,18.31,26.76,22.79,21.85,22.79,17.69,27.21,24.49
0.5,64,22.68,17.01,28.87,16.57,17.73,30.81,22.22,25.93,16.3
2.0,4,17.87,22.71,22.71,19.12,18.36,27.15,21.97,19.7,26.52
2.0,64,27.21,19.05,21.77,18.23,20.44,28.73,13.21,8.49,38.68


In [105]:
counts

Unnamed: 0_level_0,Unnamed: 1_level_0,Base Instances matrix,Test Instances matrix,Base Instances objective,Test Instances objective,Base Instances rhs,Test Instances rhs
degree,terms,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.5,4,89,284,105,531,53,147
0.5,64,71,194,83,344,50,135
2.0,4,71,207,117,523,48,132
2.0,64,50,147,88,362,41,106


## High Performing Run Time Subset

In [78]:
# additional filtering for dataframe on run time
fields = [f"terminationTime {gen}" for gen in generators] + \
         [f"terminationTimeImprovement {gen}" for gen in generators if gen not in ["None", "New"]]
# only check perturbed instances that solve to optimality and VPC didn't find optimal solution
mask = (df["Disjunction (New)"] < .9999) & (df["instanceIndex"] > 0) & (optimality_gap(df, "New") <= 1e-4) & \
    (optimality_gap(df, "None") <= 1e-4) & (optimality_gap(df, "Farkas") <= 1e-4) & \
       (df["terminationTime None"] > min_termination_time)

# create time dataframe
time_df = df.loc[mask, group_cols + id_cols + fields]

In [79]:
aggregations = {f"Average Time {gen}": (f"terminationTime {gen}", geometric_mean) for gen in generators} | \
    {f"Average Improvement {gen}": (f"terminationTimeImprovement {gen}", "mean") for gen in generators if gen not in ["None", "New"]} | \
    {"count": ("terminationTimeImprovement Farkas", "size")}

tmp = time_df.groupby(["instance", "perturbation", "degree", "terms"]).agg(**aggregations).reset_index()
tmp = tmp[(tmp["count"] > 1)]
tmp.to_csv(os.path.join(out_fldr, "high_perform_all.csv"), index=False, mode="w")
tmp.head()

Unnamed: 0,instance,perturbation,degree,terms,Average Time None,Average Time New,Average Time Farkas,Average Time All,Average Time Disjunction,Average Time Matrix,Average Time Term,Average Time Basis,Average Improvement Farkas,Average Improvement All,Average Improvement Disjunction,Average Improvement Matrix,Average Improvement Term,Average Improvement Basis,count
0,aflow30a,matrix,1,4,3.397194,6.908022,3.193351,2.461512,3.536989,3.335433,2.450183,2.44232,-0.014592,0.244497,-0.19257,-0.114798,0.22128,0.223497,9
1,aflow30a,matrix,1,64,5.262113,8.67061,5.42308,5.206116,4.875512,4.878356,4.999452,4.997785,-0.143554,-0.123315,-0.052529,-0.051905,-0.029195,-0.028607,8
2,aflow30a,rhs,1,4,2.071641,8.422406,2.870855,2.752171,2.24098,2.53226,2.320147,2.639981,-0.42038,-0.489788,-0.205667,-0.278145,-0.12408,-0.288904,3
3,aflow30a,rhs,1,64,3.12403,7.63577,3.190507,3.526027,3.366901,2.722832,2.450773,4.550764,-0.056778,-0.207908,-0.272866,0.085129,0.172113,-0.47032,3
4,aligninq,matrix,1,4,129.830683,130.857454,123.017751,113.325551,126.388712,126.584325,102.625044,126.977951,-0.113077,-0.018756,-0.076447,-0.078747,0.158329,-0.087549,10


In [80]:
def make_improvement_table(tmp, generator):
    
    # columns we always choose
    key_cols = ["degree", "terms", "perturbation", "instance"]
    time_cols = [f"Average Time {g}" for g in ["None", "New", "Farkas"]]
    
    # subset the ones we want
    all_df = tmp[
        key_cols + time_cols + [f"Average Time {generator}", f"Average Improvement Farkas", f"Average Improvement {generator}", "count"]
    ].sort_values(f"Average Improvement {generator}", ascending=False)
    all_df = all_df[all_df[f"Average Improvement {generator}"] > 0]
    best_df = all_df.loc[
        all_df.groupby(['perturbation', 'degree', 'terms'])[f'Average Improvement {generator}'].idxmax()
    ].sort_values(f"Average Improvement {generator}", ascending=False)
    
    # save all the winners
    all_df.to_csv(os.path.join(out_fldr, f"high_perform_{generator.lower()}.csv"), index=False, mode="w")
    
    # return just the best
    return all_df, best_df

In [81]:
all_df, best_df = make_improvement_table(tmp, "Disjunction")
best_df.loc[[285, 392, 213, 373, 186, 172], [c for c in best_df.columns if "Improvement" not in c]]

Unnamed: 0,degree,terms,perturbation,instance,Average Time None,Average Time New,Average Time Farkas,Average Time Disjunction,count
285,1,4,matrix,neos-860300,129.235105,97.853468,70.46698,40.607542,10
392,1,4,objective,ran13x13,35.537751,14.976852,17.016891,13.095101,10
213,4,4,rhs,neos-3046615-murg,231.78326,168.801212,200.152352,150.464393,2
373,4,64,rhs,pg5_34,3.150931,71.660436,2.063292,1.944108,8
186,16,4,matrix,n7-3,8.888611,30.491732,13.156323,5.293075,2
172,16,64,objective,misc07,83.671824,47.174409,41.371435,28.676896,10


In [82]:
all_df, best_df = make_improvement_table(tmp, "Matrix")
best_df.loc[best_df["perturbation"] == "matrix", [c for c in best_df.columns if "Improvement" not in c]].sort_values(["degree", "terms"])

Unnamed: 0,degree,terms,perturbation,instance,Average Time None,Average Time New,Average Time Farkas,Average Time Matrix,count
285,1,4,matrix,neos-860300,129.235105,97.853468,70.46698,50.04503,10
388,1,64,matrix,ran13x13,22.612178,47.155872,20.394313,15.924265,10
332,4,4,matrix,neos18,5.268503,11.776036,4.612012,3.310361,9
75,4,64,matrix,g200x740,26.799084,62.360644,33.217829,20.611519,2
186,16,4,matrix,n7-3,8.888611,30.491732,13.156323,5.340057,2
296,16,64,matrix,neos-911970,29.294123,26.23094,26.757343,19.56126,6


In [83]:
all_df, best_df = make_improvement_table(tmp, "Term")
best_df = best_df.loc[(best_df["perturbation"] == "matrix") | (best_df["perturbation"] == "rhs"),
                      [c for c in best_df.columns if "Improvement" not in c]]
best_df.loc[[285, 373, 410, 296, 131, 341]].sort_values(["degree", "terms"])

Unnamed: 0,degree,terms,perturbation,instance,Average Time None,Average Time New,Average Time Farkas,Average Time Term,count
285,1,4,matrix,neos-860300,129.235105,97.853468,70.46698,39.316548,10
341,1,64,rhs,neos18,13.65517,638.616928,16.640351,9.637685,2
410,4,4,matrix,rout,10.08579,8.247897,10.493955,6.188331,10
373,4,64,rhs,pg5_34,3.150931,71.660436,2.063292,1.975149,8
131,16,4,rhs,mas76,47.049209,63.572653,44.410446,38.407041,4
296,16,64,matrix,neos-911970,29.294123,26.23094,26.757343,19.431839,6


In [84]:
all_df, best_df = make_improvement_table(tmp, "Basis")
best_df = best_df.loc[(best_df["perturbation"] == "matrix") | (best_df["perturbation"] == "rhs"),
                      [c for c in best_df.columns if "Improvement" not in c]]
best_df.loc[[285, 341, 195, 390, 296, 295]].sort_values(["degree", "terms"])

Unnamed: 0,degree,terms,perturbation,instance,Average Time None,Average Time New,Average Time Farkas,Average Time Basis,count
285,1,4,matrix,neos-860300,129.235105,97.853468,70.46698,37.981369,10
341,1,64,rhs,neos18,13.65517,638.616928,16.640351,9.832116,2
195,4,4,rhs,neos-1445743,43.887964,2706.963349,36.087721,24.29644,4
390,4,64,matrix,ran13x13,54.497417,66.367305,77.418971,42.131783,10
295,16,4,matrix,neos-911970,19.315416,18.745702,17.694233,10.563624,5
296,16,64,matrix,neos-911970,29.294123,26.23094,26.757343,16.078533,6


In [87]:
all_df, best_df = make_improvement_table(tmp, "All")
# swap 335 and 120 for poster vs paper
best_df.loc[[285, 335, 35, 390, 301, 172], [c for c in best_df.columns if "Improvement" not in c]].sort_values(["degree", "terms"])

Unnamed: 0,degree,terms,perturbation,instance,Average Time None,Average Time New,Average Time Farkas,Average Time All,count
285,1,4,matrix,neos-860300,129.235105,97.853468,70.46698,56.302859,10
335,1,64,objective,neos18,26.496431,51.959339,24.825103,20.318973,3
35,4,4,rhs,blp-ir98,12.691349,143.31459,8.665618,7.336168,2
390,4,64,matrix,ran13x13,54.497417,66.367305,77.418971,45.969478,10
301,16,4,objective,neos-911970,102.516404,69.198447,139.244088,41.855894,2
172,16,64,objective,misc07,83.671824,47.174409,41.371435,35.810067,10


In [86]:
best_df

Unnamed: 0,degree,terms,perturbation,instance,Average Time None,Average Time New,Average Time Farkas,Average Time All,Average Improvement Farkas,Average Improvement All,count
172,16,64,objective,misc07,83.671824,47.174409,41.371435,35.810067,0.488652,0.555762,10
285,1,4,matrix,neos-860300,129.235105,97.853468,70.46698,56.302859,0.324082,0.507047,10
301,16,4,objective,neos-911970,102.516404,69.198447,139.244088,41.855894,-1.864602,0.483036,2
188,4,4,objective,n7-3,3.373049,6.932989,1.356729,1.35382,0.420758,0.421294,3
35,4,4,rhs,blp-ir98,12.691349,143.31459,8.665618,7.336168,0.306335,0.398933,2
346,1,4,objective,pg,26.701193,20.936635,15.808465,17.704975,0.39458,0.322339,10
120,1,64,matrix,mas76,17.747258,38.971226,13.788161,9.943748,0.021632,0.298938,4
349,4,64,objective,pg,10.368741,46.866594,7.412535,7.476839,0.277314,0.266978,10
74,4,4,matrix,g200x740,41.13818,57.774963,36.850937,29.597412,0.104088,0.262586,2
290,1,4,rhs,neos-860300,10.12323,78.90281,16.034225,7.10235,-0.727623,0.252821,7


In [93]:
# find ratios of all vs farkas
# ijoc santanu and prachi paper on adding one cut and tree blows up
# are the cuts getting stronger?
# does time (excluding cut generation) improve when tightening improves