In [1]:
import gurobipy as gp
import math
from matplotlib import pyplot as plt
from matplotlib.ticker import FuncFormatter, PercentFormatter
import numpy as np
import pandas as pd
import re
import os
import shutil

In [2]:
# output generation for paper 2

In [153]:
# get input paths
test_set = "bm23"
instance_fldr = os.path.join("instances", test_set)
test_set_fldr = os.path.join("test_sets", test_set)
results_fldr = os.path.join("results", test_set)
out_fldr = os.path.join("outputs", test_set)

# set filters
seed_idxs = [0]  
max_indices = 11
degrees = [0, 2, 4]
term_list = [4, 64]
filter_cbc = False
max_base_std = 1e10
min_termination_time = 1
short, medium, long = 60, 600, 3600
remove_status_changes = False
win_threshold = .1

generators = ["None", "New", "Farkas", "All", "NoDisjunction", "NoMatrix", "NoTerm", "NoBasis"]

# set up some mappings
cat_map_new_lines = {
    "None": "Default",
    "Farkas": "Param Disj,\nParam Cuts",
    "Old": "Param Disj,\nCalc Cuts",
    "New": "Calc Disj,\nCalc Cuts"
}
cat_map = {
    "None": "Default",
    "Farkas": "Param Disj, Param Cuts",
    "Old": "Param Disj, Calc Cuts",
    "New": "Calc Disj, Calc Cuts"
}
perturbation_map = {
    "matrix": "Coefficient Matrix",
    "rhs": "Right Hand Side",
    "bounds": "Variable Bounds",
    "objective": "Objective"
}
label = {
    "postRootTime": "Time after Processing Root nodes",
    "rootDualBoundTimeSansVpc": "Root Processing Time (Minus VPC Generation)",
    "terminationTimeSansVpc": "Time (Minus VPC Generation)",
    "terminationTime": "Time",
    "nodes": "Nodes Processed",
    "iterations": "LP iterations",
}
unit = {
    "postRootTime": "(seconds)",
    "rootDualBoundTimeSansVpc": "(seconds)",
    "terminationTimeSansVpc": "(seconds)",
    "terminationTime": "(seconds)",
    "nodes": "(1000 nodes)",
    "iterations": "(1000 iterations)",
}
limits = {
    "postRootTime": 7200,
    "terminationTimeSansVpc": 7200,
    "terminationTime": 7200,
    "rootDualBoundTimeSansVpc": 5,
    "nodes": 10000,
    "iterations": 37500
}
bracket_bounds = {
    "short": (min_termination_time, short),
    "medium": (short, medium),
    "long": (medium, long)
}
param_map = {
    "degree": "Degree of Perturbation",
    "terms": "Number of Disjunctive Terms",
}

In [154]:
# matplotlib settings
plt.rc('text', usetex=True)  # use latex fonts
plt.rcParams['font.size'] = 18
plt.rcParams['figure.titlesize'] = 24
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16
plt.rcParams['legend.fontsize'] = 14

## Check run failures

In [155]:
# check if each folder in test_set_fldr has a corresponding .mps file in instance_fldr
# for instance in os.listdir(test_set_fldr):
#     if not os.path.isdir(os.path.join(test_set_fldr, instance)):
#         continue
#     if not os.path.exists(os.path.join(instance_fldr, f"{instance}.mps")):
#         # remove the folder if the instance is missing
#         # shutil.rmtree(os.path.join(test_set_fldr, instance))
#         print(f"Removed {instance} from test set")

In [156]:
# running list of strings contained by different error codes
# last two are catchalls
err = {
    "walltime": [],
    "bad_alloc": [],
    "out of memory": [],
    "takeoffcuts": [],
    "solver is dual infeasible": [],
    "solver must be optimal": [],
    "segmentation fault": [],
    "no vpcs were made from a new disjunction": [],
    "must have primalbound >= root lp objective": [],
    "objective at parent nodes": [],
    "failed to optimize mip": [],
    "disjunction does not represent a full binary tree": [],
    "solver not proven optimal for nodes": [],
    "unable to open": [],
    "license": [],
    "dot product with obj differs from solver": [],
    "gurobi: error during callback: addCut": [],
    "cglvpc::setupconstraints: objective at disjunctive term": [],
    "unable to read file": [],
    "stats.id == stats_vec": [],
    "size of our disjunction is not what we expected it to be": [],
    "dimension must stay fixed": [],
    "vpcgenerator must be": [],
}

# read in cbc acceptable instances from cbc.txt
with open("cbc.txt", "r") as f:
    cbc_instances = f.read().split("\n")

# runs that errored out with new error code
other = []

# runs that had no errors
empty = []

# runs that only had warnings
warn_strs = ["warning", "prlp is primal infeasible", "farkas", "x:", "x[", "b:",
             "b[", "v:", "v[", "cut:", "A_i . x", "dot product with obj differs from solver"]
warning = []

# series that didn't run
no_go = []

# track sizes of instances
rows, cols, density = {}, {}, {}

# map the names
names = {}

# counts
count_series = 0
count_instances = 0
number_instances = {}

# iterate over all expected runs
for instance in os.listdir(test_set_fldr):
    if not os.path.isdir(os.path.join(test_set_fldr, instance)):
        continue
    # only look at cbc instances if we ran with cbc
    if instance not in cbc_instances and "gurobi" not in test_set and filter_cbc:
        continue
        
    # get the number of rows and columns in the instance
    mdl = gp.read(os.path.join(instance_fldr, f"{instance}.mps"))
    rows[instance] = mdl.NumConstrs
    cols[instance] = mdl.NumVars
    density[instance] = mdl.NumNZs / (mdl.NumConstrs * mdl.NumVars)
        
    for perturbation in os.listdir(os.path.join(test_set_fldr, instance)):
        if not os.path.isdir(os.path.join(test_set_fldr, instance, perturbation)):
            continue
        # only look at perturbations that were run
        p, d = perturbation.split("_")
        if int(d) not in degrees:
            continue
        for terms in term_list:
            for generator in generators:
                for seed_idx in seed_idxs:

                    # set variables for this iterations
                    count_series += 1
                    stem = f"{instance}_{perturbation}_{terms}_{generator}_{seed_idx}"
                    file_pth = os.path.join(results_fldr, f"{stem}.err")
                    series_fldr = os.path.join(test_set_fldr, instance, perturbation)
                    current_count = len([f for f in os.listdir(series_fldr) if f.endswith(".mps")])
                    count_instances += current_count
                    names[stem] = instance
                    number_instances[stem] = {
                        "expected": current_count,
                        "recorded": 0,
                        "generator": generator,
                        "error": "N/A"
                    }
    
                    # check if the series wasn't run
                    if not os.path.exists(file_pth):
                        number_instances[stem]["error"] = "no go"
                        no_go.append(stem)
                    
                    # check if the series ran with no errors or warnings
                    elif os.path.getsize(file_pth) == 0:
                        number_instances[stem]["error"] = "empty"
                        empty.append(stem)
                    
                    # track which error codes were thrown
                    else:
                        # read the file
                        with open(file_pth, "r") as f:
                            text = f.read().lower()
                        
                        # assign the error file to the appropriate list
                        found_code = False
                        for code in err:
                            if code in text:
                                if code == "dot product with obj differs from solver":
                                    pattern = r"obj viol from solver: (-?\d+\.\d+)\. calculated: (-?\d+\.\d+)"
                                    s, c = re.findall(pattern, text)[-1]
                                    # if we didn't terminate, this isn't an error, so keep going
                                    if abs(float(s) - float(c)) < 1e-3:
                                        continue
                                err[code].append(stem)
                                found_code = True
                                number_instances[stem]["error"] = code
                                break
                        if not found_code:
                            if all(not line or any(w in line for w in warn_strs) for line in text.splitlines()):
                                warning.append(stem)
                                number_instances[stem]["error"] = "warning"
                            else:
                                other.append(stem)
                                number_instances[stem]["error"] = "other"

Read MPS format model from file instances/bm23/bm23.mps
Reading time = 0.00 seconds
BM23: 20 rows, 27 columns, 478 nonzeros


In [157]:
# check which series didn't run
print(no_go)

[]


In [158]:
# get the proportion of series that at least got started
1 - (len(no_go) / count_series)

1.0

In [159]:
# out of time - got hung up in code somewhere - ok
print(err["walltime"])
len(err["walltime"]) / count_series

[]


0.0

In [160]:
# out of memory - memory is maxed already - this is what it is
# todo: figure out where we ran short on memory so we can explain why we dropped them
print(err["bad_alloc"] + err["out of memory"])
len(err["bad_alloc"] + err["out of memory"]) / count_series

[]


0.0

In [161]:
# rerun this if want to give more memory to some instances
# bad_alloc_names = set(n.split("_")[0] for n in err["bad_alloc"])
# mem = pd.read_csv("more_memory.csv", index_col=0)
# mem["reason"] = "hard solve" 
# 
# for n in bad_alloc_names:
#     if f"{n}.mps" not in mem.index:
#         new_row = pd.DataFrame([{'file_name': f"{n}.mps", 'memory': 16.0, 'reason': 'big disjunction'}]).set_index('file_name')
#         mem = pd.concat([mem, new_row])
#     else:
#         mem.loc[f'{n}.mps', 'memory'] = 16.0
# 
# mem.to_csv("more_memory.csv")

In [162]:
# this is an issue with John's bookkeeping - not much we can do here
print(err["takeoffcuts"])
len(err["takeoffcuts"]) / count_series

[]


0.0

In [163]:
print(err["solver is dual infeasible"])
len(err["solver is dual infeasible"]) / count_series

[]


0.0

In [164]:
# these are usually issues with CLP finding optimality - not much we can do here
print(err["solver must be optimal"])
len(err["solver must be optimal"]) / count_series

[]


0.0

In [165]:
print(err["segmentation fault"])
len(err["segmentation fault"]) / count_series

[]


0.0

In [166]:
# seg_err = {
#     "Bad image at line": [],
# }
# 
# seg_other = []
# 
# for stem in err["segmentation fault"]:
#     file_pth = os.path.join(results_fldr, f"{stem}.out")
# 
#     with open(file_pth, "r") as f:
#         text = f.read()
#     
#     # assign the error file to the appropriate list
#     found_code = False
#     for code in seg_err:
#         if code in text:
#             seg_err[code].append(stem)
#             found_code = True
#             break
#     if not found_code:
#         seg_other.append(stem)

In [167]:
# print(seg_err["Bad image at line"])
# len(seg_err["Bad image at line"]) / len(err["segmentation fault"]) if err["segmentation fault"] else 0

In [168]:
# print(seg_other)
# len(seg_other)/len(err["segmentation fault"]) if err["segmentation fault"] else 0

In [169]:
# # get breakdown of why vpc generation failed - mostly from lack of provisioning
# for code, exps in seg_err.items():
#     print(f"{code}: {len(exps) / len(err['segmentation fault']) if err['segmentation fault'] else 0}")
# 
# print(f"other: {len(seg_other) / len(err['segmentation fault']) if err['segmentation fault'] else 0}")

In [170]:
# todo: check aleks' removals and drop those below for similar reasons
# todo: check size of disjunctions and decide what to do with those that are too big
# these should all be from the problem being too big and hitting the time limit or integer solutions
print(err["no vpcs were made from a new disjunction"])
missing_4_term = [n for n in err["no vpcs were made from a new disjunction"] if "_4_" in n]
missing_64_term = [n for n in err["no vpcs were made from a new disjunction"] if "_64_" in n]
print(f'4 term: {len(missing_4_term) / count_series}')
print(f'64 term: {len(missing_64_term) / count_series}')

[]
4 term: 0.0
64 term: 0.0


In [171]:
# vpc_err = {
#     "CglVPC: Finishing with exit reason: PRLP_TIME_LIMIT": [],
#     "CglVPC: Finishing with exit reason: TIME_LIMIT": [],
#     "CglVPC: Finishing with exit reason: NO_CUTS_LIKELY": [],
#     "CglVPC: Finishing with exit reason: PRLP_INFEASIBLE": [],
#     "CglVPC: Finishing with exit reason: SUCCESS": [],
#     "CglVPC: Finishing with exit reason: OPTIMAL_SOLUTION_FOUND": [],
#     "CglVPC: Finishing with exit reason: FAIL_LIMIT": [],
#     "CglVPC: Finishing with exit reason: NO_DISJUNCTION": [],
# }
# 
# vpc_other = []
# 
# for stem in err["no vpcs were made from a new disjunction"]:
#     file_pth = os.path.join(results_fldr, f"{stem}.out")
# 
#     with open(file_pth, "r") as f:
#         text = f.read()
#     
#     # assign the error file to the appropriate list
#     found_code = False
#     for code in vpc_err:
#         if code in text:
#             vpc_err[code].append(stem)
#             found_code = True
#             break
#     if not found_code:
#         vpc_other.append(stem)

In [172]:
# print(vpc_err["CglVPC: Finishing with exit reason: PRLP_TIME_LIMIT"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: PRLP_TIME_LIMIT"]) / len(err["no vpcs were made from a new disjunction"])

In [173]:
# print(vpc_err["CglVPC: Finishing with exit reason: TIME_LIMIT"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: TIME_LIMIT"]) / len(err["no vpcs were made from a new disjunction"])

In [174]:
# print(vpc_err["CglVPC: Finishing with exit reason: NO_CUTS_LIKELY"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: NO_CUTS_LIKELY"]) / len(err["no vpcs were made from a new disjunction"])

In [175]:
# print(vpc_err["CglVPC: Finishing with exit reason: PRLP_INFEASIBLE"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: PRLP_INFEASIBLE"]) / len(err["no vpcs were made from a new disjunction"])

In [176]:
# print(vpc_err["CglVPC: Finishing with exit reason: SUCCESS"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: SUCCESS"]) / len(err["no vpcs were made from a new disjunction"])

In [177]:
# print(vpc_err["CglVPC: Finishing with exit reason: OPTIMAL_SOLUTION_FOUND"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: OPTIMAL_SOLUTION_FOUND"]) / len(err["no vpcs were made from a new disjunction"])

In [178]:
# print(vpc_err["CglVPC: Finishing with exit reason: FAIL_LIMIT"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: FAIL_LIMIT"]) / len(err["no vpcs were made from a new disjunction"])

In [179]:
# print(vpc_err["CglVPC: Finishing with exit reason: NO_DISJUNCTION"])
# if err["no vpcs were made from a new disjunction"]:
#     len(vpc_err["CglVPC: Finishing with exit reason: NO_DISJUNCTION"]) / len(err["no vpcs were made from a new disjunction"])

In [180]:
# vpc_other

In [181]:
# # get breakdown of why vpc generation failed - mostly from lack of provisioning/problem being too large
# if err["no vpcs were made from a new disjunction"]:
#     for code, exps in vpc_err.items():
#         print(f"{code}: {len(exps) / len(err['no vpcs were made from a new disjunction'])}")
#     
#     print(f"other: {len(vpc_other) / len(err['no vpcs were made from a new disjunction'])}")

In [182]:
print(err["must have primalbound >= root lp objective"])
len(err["must have primalbound >= root lp objective"]) / count_series

[]


0.0

In [183]:
# LP relaxation objective is not going to match root nodes objective when warm starting 
print(err["objective at parent nodes"])
len(err["objective at parent nodes"]) / count_series

[]


0.0

In [184]:
# not enough tolerance added to bound (or we hit time limit) - element 2 from 5 and 4 from 4
print(err["failed to optimize mip"])
len(err["failed to optimize mip"]) / count_series

[]


0.0

In [185]:
# todo: figure out why
print(err["disjunction does not represent a full binary tree"])
len(err["disjunction does not represent a full binary tree"]) / count_series

['bm23_matrix_4_64_New_0']


0.006944444444444444

In [186]:
# again issue with not getting through vpc generation in time
# todo: handle this gracefully
print(err["solver not proven optimal for nodes"])
len(err["solver not proven optimal for nodes"]) / count_series

[]


0.0

In [187]:
print(err["unable to open"])
len(err["unable to open"]) / count_series

[]


0.0

In [188]:
print(err["license"])
len(err["license"]) / count_series

[]


0.0

In [189]:
print(warning)
len(warning) / count_series

['bm23_rhs_0_64_New_0', 'bm23_rhs_4_64_New_0', 'bm23_rhs_2_64_New_0', 'bm23_objective_2_64_New_0', 'bm23_objective_4_64_New_0']


0.034722222222222224

In [190]:
# errors unaccounted for
print(other)
len(other) / count_series

['bm23_objective_0_64_NoDisjunction_0', 'bm23_rhs_4_4_NoBasis_0', 'bm23_matrix_2_64_New_0']


0.020833333333333332

In [191]:
# proportion of series that were improperly provisioned
(len(err["bad_alloc"] + err["out of memory"] + err["walltime"])) / count_series

0.0

In [192]:
# todo handle this
print(err["dot product with obj differs from solver"])
len(err["dot product with obj differs from solver"]) / count_series

[]


0.0

In [193]:
# changed code to ignore this error
print(err["gurobi: error during callback: addCut"])
len(err["gurobi: error during callback: addCut"]) / count_series

[]


0.0

In [194]:
# largely not replicating - only issue I could find was aleks missing updated objective from CLP when resolving to check this
print(err["cglvpc::setupconstraints: objective at disjunctive term"])
len(err["cglvpc::setupconstraints: objective at disjunctive term"]) / count_series

[]


0.0

In [195]:
# not replicating - rerun
print(err["unable to read file"])
len(err["unable to read file"]) / count_series

[]


0.0

In [196]:
# not replicating - rerun
print(err["stats.id == stats_vec"])
len(err["stats.id == stats_vec"]) / count_series

[]


0.0

In [197]:
print(err["size of our disjunction is not what we expected it to be"])
len(err["size of our disjunction is not what we expected it to be"]) / count_series

[]


0.0

In [198]:
print(err["vpcgenerator must be"])
len(err["vpcgenerator must be"]) / count_series

[]


0.0

In [199]:
print(err["dimension must stay fixed"])
len(err["dimension must stay fixed"]) / count_series

[]


0.0

In [200]:
# get breakdown of errors
for code, exps in err.items():
    print(f"{code}: {len(exps) / count_series}")

print(f"other: {len(other) / count_series}")

print(f"warning: {len(warning) / count_series}")

print(f"no errors/warnings: {len(empty) / count_series}")

print(f"no go: {len(no_go) / count_series}")

walltime: 0.0
bad_alloc: 0.0
out of memory: 0.0
takeoffcuts: 0.0
solver is dual infeasible: 0.0
solver must be optimal: 0.0
segmentation fault: 0.0
no vpcs were made from a new disjunction: 0.0
must have primalbound >= root lp objective: 0.0
objective at parent nodes: 0.0
failed to optimize mip: 0.0
disjunction does not represent a full binary tree: 0.006944444444444444
solver not proven optimal for nodes: 0.0
unable to open: 0.0
license: 0.0
dot product with obj differs from solver: 0.0
gurobi: error during callback: addCut: 0.0
cglvpc::setupconstraints: objective at disjunctive term: 0.0
unable to read file: 0.0
stats.id == stats_vec: 0.0
size of our disjunction is not what we expected it to be: 0.0
dimension must stay fixed: 0.0
vpcgenerator must be: 0.0
other: 0.020833333333333332
no go: 0.0


## Read in data

In [201]:
# map generator names to the corresponding data frames
df_map = {
    "None": pd.DataFrame(),
    "Farkas": pd.DataFrame(),
    "New": pd.DataFrame(),
    "All": pd.DataFrame(),
    "NoDisjunction": pd.DataFrame(),
    "NoMatrix": pd.DataFrame(),
    "NoTerm": pd.DataFrame(),
    "NoBasis": pd.DataFrame()
}
gap_map = {
    "None": pd.DataFrame(),
    "Farkas": pd.DataFrame(),
    "New": pd.DataFrame(),
    "All": pd.DataFrame(),
    "NoDisjunction": pd.DataFrame(),
    "NoMatrix": pd.DataFrame(),
    "NoTerm": pd.DataFrame(),
    "NoBasis": pd.DataFrame()
}
regex = re.compile(r'([a-zA-Z0-9-]+(?:_o)?)_([a-z]+)_([0-9-]+)_([0-9]+)_([a-zA-Z ]+)')
solution_pattern = r"_(\d+)\.pb"

# declaring types as needed
column_types = {
    "lpBound": float,
    "lpBoundPostVpc": float,
    "disjunctiveDualBound": float,
    "primalBound": float,
    "rootDualBound": float,
    "dualBound": float
}

skipped_instances = set()
primal_bounds = {}
same_solution = {}

# iterate over all files in the folder
for file_name in os.listdir(results_fldr):
    
    file_pth = os.path.join(results_fldr, file_name)
    
    # if the file is not a nonempty csv, skip it
    if not file_name.endswith(".csv") or os.path.getsize(file_pth) == 0:
        continue
    
    # get the experimental set up
    match = regex.search(file_name)
    instance_name = names.get(file_name[:-4])
    if not instance_name:
        skipped_instances.add(file_name[:-4].split("_")[0])
        os.remove(file_pth)
        continue
    # instance_name = match.group(1)
    perturbation = match.group(2)
    assert perturbation in ["matrix", "rhs", "bounds", "objective"], f"Unknown perturbation: {perturbation}"
    expo = int(match.group(3))
    assert expo in degrees, f"Unknown degree: {expo}"
    degree = 2**int(expo)
    terms = int(match.group(4))
    assert terms in term_list, f"Unknown number of terms: {terms}"
    generator = match.group(5)
    assert generator in generators, f"Unknown generator: {generator}"
    base_name = f"{instance_name}_0"
    
    # get the primal bounds for this experiment
    cur_instance_test_set_fldr = os.path.join(test_set_fldr, instance_name, f"{perturbation}_{expo}")
    for test_set_file in os.listdir(cur_instance_test_set_fldr):
        if test_set_file.endswith(".pb"):
            with open(os.path.join(cur_instance_test_set_fldr, test_set_file), "r") as f:
                primal_bounds[perturbation, expo, ".".join(test_set_file.split(".")[:-1])] = float(f.read())
                
    # see if solution changed
    for test_set_file in os.listdir(cur_instance_test_set_fldr):
        if test_set_file.endswith(".pb"):
            perturbation_name = ".".join(test_set_file.split(".")[:-1])
            same_solution[perturbation, expo, perturbation_name] = \
                primal_bounds[perturbation, expo, base_name] == primal_bounds[perturbation, expo, perturbation_name]
            
    # read the file
    df = pd.read_csv(file_pth, keep_default_na=False, dtype=column_types, index_col=0)
    
    for instance_idx in df.index:
        
        # fill in primal bounds if missing
        # df.loc[instance_idx, "primalBound"] = min(primal_bounds.get(stem_map.get(instance_idx), 1e100), df.loc[instance_idx, "primalBound"])
        df.loc[instance_idx, "primalBound"] = min(
            primal_bounds[perturbation, expo, f"{instance_name}_{instance_idx}"], df.loc[instance_idx, "primalBound"]
        )
        
        # same with root dual bound
        df.loc[instance_idx, "rootDualBound"] = df.loc[instance_idx, "rootDualBound"] if df.loc[instance_idx, "rootDualBound"] < 1e100 else df.loc[instance_idx, "lpBoundPostVpc"] 
    
    # get rid of the index so the rest of the notebook works
    df.reset_index(inplace=True)
    
    # add some identifying columns
    df["instance"] = instance_name
    df["perturbation"] = perturbation
    df["degree"] = degree
    df["terms"] = terms
    df["rows"] = rows[instance_name]
    df["cols"] = cols[instance_name]
    df["density"] = density[instance_name]
    
    # append to the appropriate data frame
    df_map[generator] = pd.concat([df_map[generator], df])
    
    # track recorded vs expected experiments
    number_instances[file_name[:-4]]["recorded"] = len(df)

In [202]:
# convert number_instances to dataframe
frame = pd.DataFrame(number_instances).T
frame.head()

Unnamed: 0,expected,recorded,generator,error
bm23_rhs_0_4_None_0,21,21,,empty
bm23_rhs_0_4_New_0,21,21,New,empty
bm23_rhs_0_4_Farkas_0,21,21,Farkas,empty
bm23_rhs_0_4_All_0,21,21,All,empty
bm23_rhs_0_4_NoDisjunction_0,21,21,NoDisjunction,empty


In [203]:
# redo the runs that have incomplete data that we're not sure should be that way
redos = frame.loc[(frame["expected"] > frame["recorded"]) & (frame["error"] != "no vpcs were made from a new disjunction")].index.tolist()
redos = pd.DataFrame({"experiment": redos})
redos.to_csv("redos.csv", index=False)

In [204]:
if "miplib" in test_set or "quick" in test_set:
    # group frame by generator and sum remaining columns
    gb = frame.groupby(["generator", "error"]).sum().reset_index()
    gb["missing"] = gb["expected"] - gb["recorded"]
    total = gb.groupby("generator")[["expected", "missing"]].sum().reset_index()
    gb = pd.merge(gb, total, on="generator", suffixes=("", " total"))
    gb["ratio missing (by generator)"] = gb["missing"] / gb["missing total"]
    gb["ratio missing (by generator)"] = gb["ratio missing (by generator)"].apply(lambda x: round(x, 4))
    gb = gb.loc[:, ~gb.columns.str.contains("total")]  # get rid of the total columns
    gb.set_index(["generator", "error"], inplace=True)
    gb.to_csv(os.path.join(out_fldr, "missing_table.csv"), index=False, mode="w")
else:
    gb = None
gb

In [205]:
for gen in generators:
    masks = {
        0: -1e20 > df_map[gen]["lpBound"],
        1: df_map[gen]["lpBound"] - 1e-3 > df_map[gen]["lpBoundPostVpc"],
        2: (df_map[gen]["lpBoundPostVpc"] - 1e-3 > df_map[gen]["disjunctiveDualBound"]) & ((gen == "None") | (gen == "New")),
        3: df_map[gen]["rootDualBound"] - 1e-3 > df_map[gen]["dualBound"],
        4: (df_map[gen]["dualBound"] - 1e-3 > df_map[gen]["primalBound"]) & (df_map[gen]["dualBound"] / df_map[gen]["primalBound"] > 1 + 1e-3),
        5: df_map[gen]["primalBound"] > 1e20,
        6: 0 > df_map[gen]["vpcGenerationTime"],
        7: df_map[gen]["vpcGenerationTime"] - 1e-3 > df_map[gen]["rootDualBoundTime"],
        8: df_map[gen]["rootDualBoundTime"] - 1e-3 > df_map[gen]["terminationTime"],
        9: df_map[gen]["vpcGenerationTime"] - 1e-3 > df_map[gen]["bestSolutionTime"],
        10: df_map[gen]["bestSolutionTime"] - 1e-3 > df_map[gen]["terminationTime"]
    }
    for i, mask in masks.items():
        print(f"{gen} {i}: {mask.sum() / len(df_map[gen])}")

None 0: 0.0
None 1: 0.0
None 2: 0.0
None 3: 0.0
None 4: 0.0
None 5: 0.0
None 6: 0.0
None 7: 0.0
None 8: 0.0
None 9: 0.0
None 10: 0.0
New 0: 0.0
New 1: 0.0
New 2: 0.0
New 3: 0.0
New 4: 0.0
New 5: 0.0
New 6: 0.0
New 7: 0.0
New 8: 0.0
New 9: 0.0
New 10: 0.0
Farkas 0: 0.0
Farkas 1: 0.0
Farkas 2: 0.0
Farkas 3: 0.0
Farkas 4: 0.0
Farkas 5: 0.0
Farkas 6: 0.0
Farkas 7: 0.0
Farkas 8: 0.0
Farkas 9: 0.0
Farkas 10: 0.0
All 0: 0.0
All 1: 0.0
All 2: 0.0
All 3: 0.0
All 4: 0.0
All 5: 0.0
All 6: 0.0
All 7: 0.0
All 8: 0.0
All 9: 0.0
All 10: 0.0
NoDisjunction 0: 0.0
NoDisjunction 1: 0.0
NoDisjunction 2: 0.0
NoDisjunction 3: 0.0
NoDisjunction 4: 0.0
NoDisjunction 5: 0.0
NoDisjunction 6: 0.0
NoDisjunction 7: 0.0
NoDisjunction 8: 0.0
NoDisjunction 9: 0.0
NoDisjunction 10: 0.0
NoMatrix 0: 0.0
NoMatrix 1: 0.0
NoMatrix 2: 0.0
NoMatrix 3: 0.0
NoMatrix 4: 0.0
NoMatrix 5: 0.0
NoMatrix 6: 0.0
NoMatrix 7: 0.0
NoMatrix 8: 0.0
NoMatrix 9: 0.0
NoMatrix 10: 0.0
NoTerm 0: 0.0
NoTerm 1: 0.0
NoTerm 2: 0.0
NoTerm 3: 0.0
NoT

In [206]:
# it shouldn't be possible that dual bound > primal bound. this only happens when we use the saved primal bound, which was used to set the dual bound
df_map["Farkas"][masks[0]]

Unnamed: 0,instanceIndex,seedIndex,vpcGenerator,terms,lpBound,disjunctiveDualBound,lpBoundPostVpc,rootDualBound,dualBound,primalBound,...,tighten_disjunction,tighten_matrix_perturbation,tighten_infeasible_to_feasible_term,tighten_feasible_to_infeasible_basis,instance,perturbation,degree,rows,cols,density


In [207]:
for gen in df_map:
    mask = (-1e20 > df_map[gen]["lpBound"]) | \
        (df_map[gen]["lpBound"] - 1e-3 > df_map[gen]["lpBoundPostVpc"]) | \
        ((df_map[gen]["lpBoundPostVpc"] - 1e-3 > df_map[gen]["disjunctiveDualBound"]) & (gen != "Farkas")) | \
        (df_map[gen]["rootDualBound"] - 1e-3 > df_map[gen]["dualBound"]) | \
        ((df_map[gen]["dualBound"] - 1e-3 > df_map[gen]["primalBound"]) & (df_map[gen]["dualBound"] / df_map[gen]["primalBound"] > 1 + 1e-3)) | \
        (df_map[gen]["primalBound"] > 1e20) | \
        (0 > df_map[gen]["vpcGenerationTime"]) | \
        (df_map[gen]["vpcGenerationTime"] - 1e-3 > df_map[gen]["rootDualBoundTime"]) | \
        (df_map[gen]["rootDualBoundTime"] - 1e-3 > df_map[gen]["terminationTime"]) | \
        (df_map[gen]["vpcGenerationTime"] - 1e-3 > df_map[gen]["bestSolutionTime"]) | \
        (df_map[gen]["bestSolutionTime"] - 1e-3 > df_map[gen]["terminationTime"])
    print(f"{gen}: {mask.sum() / len(df_map[gen])}")
    df_map[gen] = df_map[gen][~mask]

None: 0.0
Farkas: 0.0
New: 0.0
All: 0.0
NoDisjunction: 0.0
NoMatrix: 0.0
NoTerm: 0.0
NoBasis: 0.0


In [208]:
# merge the 4 different data frames into one
join_cols = ["instance", "perturbation", "degree", "terms", "instanceIndex", "seedIndex"]
df = df_map["None"].merge(df_map["New"], on=join_cols, suffixes=(" None", None))
df = df.merge(df_map["Farkas"], on=join_cols, suffixes=(" New", None))
df = df.merge(df_map["All"], on=join_cols, suffixes=(" Farkas", None))
df = df.merge(df_map["NoDisjunction"], on=join_cols, suffixes=(" All", None))
df = df.merge(df_map["NoMatrix"], on=join_cols, suffixes=(" NoDisjunction", None))
df = df.merge(df_map["NoTerm"], on=join_cols, suffixes=(" NoMatrix", None))
df = df.merge(df_map["NoBasis"], on=join_cols, suffixes=(" NoTerm", " NoBasis"))
df.head()

Unnamed: 0,instanceIndex,seedIndex,vpcGenerator None,terms,lpBound None,disjunctiveDualBound None,lpBoundPostVpc None,rootDualBound None,dualBound None,primalBound None,...,termRemainsFeasibleBasisInfeasible NoBasis,cutsChangedCoefficients NoBasis,feasibleTermsPrunedByBound NoBasis,tighten_disjunction NoBasis,tighten_matrix_perturbation NoBasis,tighten_infeasible_to_feasible_term NoBasis,tighten_feasible_to_infeasible_basis NoBasis,rows NoBasis,cols NoBasis,density NoBasis
0,0,0,,64,20.570922,20.570922,20.570922,26.427088,34.0,34.0,...,0,0,0,0,0,0,0,20,27,0.885185
1,10,0,,64,20.661706,20.661706,20.661706,25.938525,34.0,34.0,...,4,0,34,1,1,1,0,20,27,0.885185
2,11,0,,64,20.570922,20.570922,20.570922,26.427088,34.0,34.0,...,7,0,32,1,1,1,0,20,27,0.885185
3,12,0,,64,20.077726,20.077726,20.077726,25.242914,33.0,33.0,...,6,0,30,1,1,1,0,20,27,0.885185
4,15,0,,64,20.570922,20.570922,20.570922,26.427088,34.0,34.0,...,0,0,32,1,1,1,0,20,27,0.885185


In [209]:
# get proportion of tests run to completion
4 * len(df) / count_instances

0.43915343915343913

In [210]:
def gap_closed(df, col):
    gap = abs(df[col] - df["lpBound None"]) / abs(df['primalBound None'] - df["lpBound None"])
    gap[(gap > 1) | (gap == np.nan)] = 1  # get corner cases
    return gap

# Function to map values based on a dictionary
def check_same_solution(row):
    # Create a tuple of the key based on the key_columns
    return same_solution[row["perturbation"], int(math.log2(row["degree"])), f'{row["instance"]}_{row["instanceIndex"]}']

In [211]:
# find the optimality gap closed by each generator
df["Disjunction (New)"] = gap_closed(df, "disjunctiveDualBound New")
df["VPCs (New)"] = gap_closed(df, "lpBoundPostVpc New")
df["VPCs (Farkas)"] = gap_closed(df, "lpBoundPostVpc Farkas")
df["VPCs (All)"] = gap_closed(df, "lpBoundPostVpc All")
df["VPCs (NoDisjunction)"] = gap_closed(df, "lpBoundPostVpc NoDisjunction")
df["VPCs (NoMatrix)"] = gap_closed(df, "lpBoundPostVpc NoMatrix")
df["VPCs (NoTerm)"] = gap_closed(df, "lpBoundPostVpc NoTerm")
df["VPCs (NoBasis)"] = gap_closed(df, "lpBoundPostVpc NoBasis")
df["Root Cuts (None)"] = gap_closed(df, "rootDualBound None")
df["Root Cuts (New)"] = gap_closed(df, "rootDualBound New")
df["Root Cuts (Farkas)"] = gap_closed(df, "rootDualBound Farkas")
df["Root Cuts (All)"] = gap_closed(df, "rootDualBound All")
df["Root Cuts (NoDisjunction)"] = gap_closed(df, "rootDualBound NoDisjunction")
df["Root Cuts (NoMatrix)"] = gap_closed(df, "rootDualBound NoMatrix")
df["Root Cuts (NoTerm)"] = gap_closed(df, "rootDualBound NoTerm")
df["Root Cuts (NoBasis)"] = gap_closed(df, "rootDualBound NoBasis")
df["Root Optimality Gap Improvement"] = df["Root Cuts (Farkas)"] - df["Root Cuts (None)"] 
# df = df.dropna()

In [212]:
# find times without vpc generation
df["terminationTimeSansVpc None"] = df["terminationTime None"]
df["rootDualBoundTimeSansVpc None"] = df["rootDualBoundTime None"]
for gen in generators:
    if gen != "None":
        df[f"terminationTimeSansVpc {gen}"] = df[f"terminationTime {gen}"] - df["vpcGenerationTime New"]
        df[f"rootDualBoundTimeSansVpc {gen}"] = df[f"rootDualBoundTime {gen}"] - df[f"vpcGenerationTime {gen}"]
    df[f"postRootTime {gen}"] = df[f"terminationTime {gen}"] - df[f"rootDualBoundTime {gen}"]
    if gen not in ["None", "New"]:
        df[f"terminationTimeImprovement {gen}"] = (df["terminationTime None"] - df[f"terminationTime {gen}"]) / df["terminationTime None"]
        df[f"terminationTimeSansVpcImprovement {gen}"] = (df["terminationTimeSansVpc None"] - df[f"terminationTimeSansVpc {gen}"]) / df["terminationTimeSansVpc None"]
        df[f"nodesImprovement {gen}"] = (df["nodes None"] - df[f"nodes {gen}"]) / df["nodes None"] 
        df[f"iterationsImprovement {gen}"] = (df["iterations None"] - df[f"iterations {gen}"]) / df["iterations None"] 
        df[f"terminationTimeRatio {gen}"] = df[f"terminationTime {gen}"] / df["terminationTime None"]
        df[f"terminationTimeSansVpcRatio {gen}"] = df[f"terminationTimeSansVpc {gen}"] / df["terminationTimeSansVpc None"]
        df[f"nodesRatio {gen}"] = df[f"nodes {gen}"] / df["nodes None"] 
        df[f"iterationsRatio {gen}"] = df[f"iterations {gen}"] / df["iterations None"]
        df[f"nodesImproves {gen}"] = df["nodes None"] > df[f"nodes {gen}"]
        df[f"terminationTimeImproves {gen}"] = df["terminationTime None"] > df[f"terminationTime {gen}"]
        df[f"terminationTimeSansVpcImproves {gen}"] = df["terminationTimeSansVpc None"] > df[f"terminationTimeSansVpc {gen}"]
        df[f"iterationsImproves {gen}"] = df["iterations None"] > df[f"iterations {gen}"]
        df[f'nodesWin{gen}'] = df['nodes None']*(1 - win_threshold) > df[f'nodes {gen}']
        df[f'terminationTimeWin{gen}'] = df['terminationTime None']*(1 - win_threshold) > df[f'terminationTime {gen}']
        df[f'terminationTimeSansVpcWin{gen}'] = df['terminationTimeSansVpc None']*(1 - win_threshold) > df[f'terminationTimeSansVpc {gen}']
        df[f'iterationsWin{gen}'] = df['iterations None']*(1 - win_threshold) > df[f'iterations {gen}']
        df[f'nodesWinNoneVs{gen}'] = df[f'nodes {gen}']*(1 - win_threshold) > df['nodes None']
        df[f'terminationTimeWinNoneVs{gen}'] = df[f'terminationTime {gen}']*(1 - win_threshold) > df['terminationTime None']
        df[f'terminationTimeSansVpcWinNoneVs{gen}'] = df[f'terminationTimeSansVpc {gen}']*(1 - win_threshold) > df['terminationTimeSansVpc None']
        df[f'iterationsWinNoneVs{gen}'] = df[f'iterations {gen}']*(1 - win_threshold) > df['iterations None']
df["bracket"] = ["short" if t <= short else "medium" if t <= medium else "long" for t in df["terminationTime None"]]
df["sameSolution"] = df.apply(check_same_solution, axis=1)

  df[f"terminationTimeRatio {gen}"] = df[f"terminationTime {gen}"] / df["terminationTime None"]
  df[f"terminationTimeSansVpcRatio {gen}"] = df[f"terminationTimeSansVpc {gen}"] / df["terminationTimeSansVpc None"]
  df[f"nodesRatio {gen}"] = df[f"nodes {gen}"] / df["nodes None"]
  df[f"iterationsRatio {gen}"] = df[f"iterations {gen}"] / df["iterations None"]
  df[f"nodesImproves {gen}"] = df["nodes None"] > df[f"nodes {gen}"]
  df[f"terminationTimeImproves {gen}"] = df["terminationTime None"] > df[f"terminationTime {gen}"]
  df[f"terminationTimeSansVpcImproves {gen}"] = df["terminationTimeSansVpc None"] > df[f"terminationTimeSansVpc {gen}"]
  df[f"iterationsImproves {gen}"] = df["iterations None"] > df[f"iterations {gen}"]
  df[f'nodesWin{gen}'] = df['nodes None']*(1 - win_threshold) > df[f'nodes {gen}']
  df[f'terminationTimeWin{gen}'] = df['terminationTime None']*(1 - win_threshold) > df[f'terminationTime {gen}']
  df[f'terminationTimeSansVpcWin{gen}'] = df['terminationTimeSansVpc Non

In [213]:
# get sensitivity stats as ratios
for gen_name in generators:
    if gen_name == "None":
        continue
    df[f"infeasibleTermsRatio {gen_name}"] = df[f"infeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
    df[f"infeasibleToFeasibleTermsRatio {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
    df[f"zeroInfeasibleToFeasibleTerms {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] == 0
    df[f"feasibleToInfeasibleTermsRatio {gen_name}"] = df[f"feasibleToInfeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]

  df[f"infeasibleTermsRatio {gen_name}"] = df[f"infeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"infeasibleToFeasibleTermsRatio {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"zeroInfeasibleToFeasibleTerms {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] == 0
  df[f"feasibleToInfeasibleTermsRatio {gen_name}"] = df[f"feasibleToInfeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"infeasibleTermsRatio {gen_name}"] = df[f"infeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"infeasibleToFeasibleTermsRatio {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"zeroInfeasibleToFeasibleTerms {gen_name}"] = df[f"infeasibleToFeasibleTerms {gen_name}"] == 0
  df[f"feasibleToInfeasibleTermsRatio {gen_name}"] = df[f"feasibleToInfeasibleTerms {gen_name}"] / df[f"actualTerms {gen_name}"]
  df[f"infeasibleTermsRatio {gen_name}"] = df[f"infeasibleTerms 

In [214]:
def optimality_gap(df, generator=None):
    if generator:
        return abs(df[f"primalBound {generator}"] - df[f"dualBound {generator}"]) / \
            abs(df[f"primalBound {generator}"])
    else:
        return abs(df[f"primalBound"] - df[f"dualBound"]) / abs(df[f"primalBound"])

In [215]:
# aleks filters
# df = df.loc[df["terms"] == df["actualTerms Farkas"]]
# df = df.loc[df["zeroInfeasibleToFeasibleTerms Farkas"]]

In [216]:
df.head()

Unnamed: 0,instanceIndex,seedIndex,vpcGenerator None,terms,lpBound None,disjunctiveDualBound None,lpBoundPostVpc None,rootDualBound None,dualBound None,primalBound None,...,zeroInfeasibleToFeasibleTerms NoMatrix,feasibleToInfeasibleTermsRatio NoMatrix,infeasibleTermsRatio NoTerm,infeasibleToFeasibleTermsRatio NoTerm,zeroInfeasibleToFeasibleTerms NoTerm,feasibleToInfeasibleTermsRatio NoTerm,infeasibleTermsRatio NoBasis,infeasibleToFeasibleTermsRatio NoBasis,zeroInfeasibleToFeasibleTerms NoBasis,feasibleToInfeasibleTermsRatio NoBasis
0,0,0,,64,20.570922,20.570922,20.570922,26.427088,34.0,34.0,...,True,0.0,0.03125,0.0,True,0.0,0.03125,0.0,True,0.0
1,10,0,,64,20.661706,20.661706,20.661706,25.938525,34.0,34.0,...,True,0.015625,0.046875,0.0,True,0.015625,0.046875,0.0,True,0.015625
2,11,0,,64,20.570922,20.570922,20.570922,26.427088,34.0,34.0,...,True,0.0,0.03125,0.0,True,0.0,0.03125,0.0,True,0.0
3,12,0,,64,20.077726,20.077726,20.077726,25.242914,33.0,33.0,...,True,0.0,0.03125,0.0,True,0.0,0.03125,0.0,True,0.0
4,15,0,,64,20.570922,20.570922,20.570922,26.427088,34.0,34.0,...,True,0.0,0.03125,0.0,True,0.0,0.03125,0.0,True,0.0


In [217]:
# set aside core columns and filter for all subsequent dataframes
group_cols = ["instance", "perturbation", "bracket", "degree", "terms"]
id_cols = ["instanceIndex"]

# keep the instance, perturbation, instanceIndex triples that exist for all combinations of degree and terms
# where VPC did not find the optimal solution
full_df = df.loc[df["Disjunction (New)"] < .9999]
triples = (full_df.groupby(
        ["instance", "perturbation", "instanceIndex"]
    ).size().reset_index().rename(columns={0: "count"}))
triples.head()

Unnamed: 0,instance,perturbation,instanceIndex,count
0,bm23,matrix,0,6
1,bm23,matrix,1,6
2,bm23,matrix,2,4
3,bm23,matrix,3,5
4,bm23,matrix,4,5


In [218]:
# uncomment to filter for only the triples that exist for all combinations of degree and terms (and seed index)
# triples = triples[triples["count"] == len(degrees) * len(term_list) * len(seed_idxs)]
# full_df = full_df.merge(triples, on=["instance", "perturbation", "instanceIndex"])
full_df.to_csv(os.path.join(out_fldr, "cleaned_combined_complete.csv"), index=False, mode="w")

## Check Root Node Stats

In [219]:
def interleave(list_of_lists):
    return [item for sublist in zip(*list_of_lists) for item in sublist]

In [220]:
# additional filtering for dataframe on bounds
fields = ["Disjunction (New)"] + [f"VPCs ({gen_name})" for gen_name in generators if gen_name != "None"] + \
    interleave([[f"Root Cuts ({gen_name})", f"terminationTime {gen_name}", f"nodes {gen_name}",
                 f"iterations {gen_name}", f"terminationTimeSansVpc {gen_name}", f"vpcGenerationTime {gen_name}", 
                 f"rootDualBoundTime {gen_name}"]
                for gen_name in generators]) + \
    interleave([[f"infeasibleTermsRatio {gen_name}", f"infeasibleToFeasibleTermsRatio {gen_name}",
                 f"zeroInfeasibleToFeasibleTerms {gen_name}", f"feasibleToInfeasibleTermsRatio {gen_name}"]
                for gen_name in generators if gen_name != "None"])

# now reduce bound_df to just the perturbed instances - make > -1 to include base instance
bound_df = full_df.loc[full_df["instanceIndex"] > 0, group_cols + id_cols + fields]

In [221]:
def geometric_mean(series, offset=1e-6):
    adjusted_series = series + offset  # Add a small offset to avoid zeros
    return np.exp(np.log(adjusted_series).mean())

# paper currently uses mean, but we can switch to geometric mean if we want
aggregations = {f: geometric_mean if f not in ["sameSolution"] else "mean" for f in fields}
aggregations["instance"] = "nunique"
aggregations["instanceIndex"] = "count"

In [222]:
# get gap closed by degree and term
out = bound_df.groupby(["degree", "terms"]).agg(aggregations).reset_index()
out.to_csv(os.path.join(out_fldr, "bound_table.csv"), index=False, mode="w")
out

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,degree,terms,Disjunction (New),VPCs (New),VPCs (Farkas),VPCs (All),VPCs (NoDisjunction),VPCs (NoMatrix),VPCs (NoTerm),VPCs (NoBasis),...,zeroInfeasibleToFeasibleTerms NoBasis,feasibleToInfeasibleTermsRatio New,feasibleToInfeasibleTermsRatio Farkas,feasibleToInfeasibleTermsRatio All,feasibleToInfeasibleTermsRatio NoDisjunction,feasibleToInfeasibleTermsRatio NoMatrix,feasibleToInfeasibleTermsRatio NoTerm,feasibleToInfeasibleTermsRatio NoBasis,instance,instanceIndex
0,1,4,0.140507,0.140292,0.144259,0.14431,0.14431,0.144284,0.14431,0.14431,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,60
1,1,64,0.679605,0.663024,0.49428,0.663293,0.652605,0.662717,0.663293,0.663018,...,1.000001,1e-06,7e-06,7e-06,7e-06,7e-06,7e-06,7e-06,1,51
2,4,4,0.111304,0.108766,0.078547,0.086747,0.086583,0.084137,0.086747,0.086003,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,60
3,4,64,0.66351,0.649138,0.017885,0.527745,0.47866,0.491125,0.527745,0.510831,...,0.501188,1e-06,2.8e-05,2.8e-05,2.8e-05,2.8e-05,2.8e-05,2.8e-05,1,40
4,16,4,0.114902,0.104119,3.9e-05,6.5e-05,6.5e-05,5.6e-05,6.5e-05,6.5e-05,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,60
5,16,64,0.545098,0.524853,0.000205,0.000638,0.000827,0.000471,0.000638,0.000638,...,0.021163,1e-06,0.000693,0.000693,0.000693,0.000693,0.000693,0.000693,1,43


In [223]:
# now break it down by type of perturbation
out = bound_df.groupby(["degree", "terms", "perturbation"]).agg(aggregations).reset_index()
out.to_csv(os.path.join(out_fldr, "bound_table_by_perturbation.csv"), index=False, mode="w")
out

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,degree,terms,perturbation,Disjunction (New),VPCs (New),VPCs (Farkas),VPCs (All),VPCs (NoDisjunction),VPCs (NoMatrix),VPCs (NoTerm),...,zeroInfeasibleToFeasibleTerms NoBasis,feasibleToInfeasibleTermsRatio New,feasibleToInfeasibleTermsRatio Farkas,feasibleToInfeasibleTermsRatio All,feasibleToInfeasibleTermsRatio NoDisjunction,feasibleToInfeasibleTermsRatio NoMatrix,feasibleToInfeasibleTermsRatio NoTerm,feasibleToInfeasibleTermsRatio NoBasis,instance,instanceIndex
0,1,4,matrix,0.139849,0.139849,0.142625,0.142776,0.142776,0.142699,0.142776,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,20
1,1,4,objective,0.146033,0.146033,0.146033,0.146033,0.146033,0.146033,0.146033,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,20
2,1,4,rhs,0.135827,0.135204,0.144141,0.144141,0.144141,0.144141,0.144141,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,20
3,1,64,matrix,0.683543,0.665729,0.579877,0.631318,0.604382,0.629848,0.631318,...,1.000001,1e-06,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1.3e-05,1,19
4,1,64,objective,0.665545,0.648594,0.686673,0.686673,0.686673,0.686673,0.686673,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,20
5,1,64,rhs,0.697293,0.683371,0.221915,0.677023,0.677023,0.677023,0.677023,...,1.000001,1e-06,5.6e-05,5.6e-05,5.6e-05,5.6e-05,5.6e-05,5.6e-05,1,12
6,4,4,matrix,0.101753,0.100893,0.081293,0.106711,0.106109,0.097366,0.106711,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,20
7,4,4,objective,0.135393,0.134473,0.135029,0.135029,0.135029,0.135029,0.135029,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,20
8,4,4,rhs,0.10009,0.094838,0.044148,0.045303,0.045303,0.045303,0.045303,...,1.000001,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1,20
9,4,64,matrix,0.65056,0.63721,4.7e-05,0.38626,0.27897,0.30393,0.38626,...,0.1,1e-06,2.6e-05,2.6e-05,2.6e-05,2.6e-05,2.6e-05,2.6e-05,1,12


In [224]:
short_fields = ["Disjunction (New)"] + [f"VPCs ({gen_name})" for gen_name in ["New", "Farkas", "All"]] + \
    [f"Root Cuts ({gen_name})" for gen_name in ["None", "New", "Farkas", "All"]]
aggregations = {f: geometric_mean for f in short_fields}
full_df[(full_df["terms"] == 64) & (full_df["perturbation"] == "matrix") & (full_df["degree"] == 1)].groupby("instance").agg(aggregations)

Unnamed: 0_level_0,Disjunction (New),VPCs (New),VPCs (Farkas),VPCs (All),Root Cuts (None),Root Cuts (New),Root Cuts (Farkas),Root Cuts (All)
instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
bm23,0.685067,0.667193,0.585178,0.634388,0.400882,0.68401,0.631555,0.663422


## Check Termination Stats

In [225]:
# additional filtering for dataframe on run time
fields = [f"terminationTime {gen}" for gen in generators] + \
         [f"terminationTimeImprovement {gen}" for gen in generators if gen not in ["None", "New"]]
# only check perturbed instances that solve to optimality and VPC didn't find optimal solution
mask = (df["Disjunction (New)"] < .9999) & (df["instanceIndex"] > 0) & (optimality_gap(df, "New") <= 1e-4) & \
    (optimality_gap(df, "None") <= 1e-4) & (optimality_gap(df, "Farkas") <= 1e-4) & \
       (df["terminationTime None"] > min_termination_time)

# create time dataframe
time_df = df.loc[mask, group_cols + id_cols + fields]

In [226]:
tmp = time_df.groupby(["instance", "perturbation", "degree", "terms"]).agg(
    average_time_none=(f"terminationTime None", geometric_mean),
    average_time_new=(f"terminationTime New", geometric_mean),
    average_time_farkas=(f"terminationTime Farkas", geometric_mean),
    average_time_all=(f"terminationTime All", geometric_mean),
    average_improvement_farkas=(f"terminationTimeImprovement Farkas", "mean"),
    average_improvement_all=(f"terminationTimeImprovement All", "mean"),
    count=(f"terminationTimeImprovement {gen}", "size")
).sort_values("average_improvement_all", ascending=False).reset_index()
tmp = tmp[(tmp["count"] > 1) & (tmp['average_improvement_all'] > 0)]
tmp["average_improvement_all_vs_farkas"] = (tmp["average_time_farkas"] - tmp["average_time_all"]) / tmp["average_time_farkas"]
tmp.to_csv(os.path.join(out_fldr, "high_perform_all.csv"), index=False, mode="w")
tmp

Unnamed: 0,instance,perturbation,degree,terms,average_time_none,average_time_new,average_time_farkas,average_time_all,average_improvement_farkas,average_improvement_all,count,average_improvement_all_vs_farkas


In [93]:
# find ratios of all vs farkas
# ijoc santanu and prachi paper on adding one cut and tree blows up
# are the cuts getting stronger?
# does time (excluding cut generation) improve when tightening improves