In [1]:
import os
import pandas as pd
import re
from scipy.stats.mstats import gmean

In [2]:
# get input paths
test_set = "miplib_smaller_easy_binary"
instance_fldr = os.path.join("test_sets", test_set)
results_fldr = os.path.join("results", test_set)
out_fldr = os.path.join("outputs", test_set)

## Check run failures

In [3]:
# running list of strings contained by different error codes
# last two are catchalls
err = {
    "walltime": [],
    "bad_alloc": [],
    "takeoffcuts": [],
    "solver is dual infeasible": [],
    "solver must be optimal": [],
    "segmentation fault": [],
    "no vpcs were made from a new disjunction": [],
    "farkas": [],
    "prlp is primal infeasible": [],
    "branch variables must be integer": [],
    "warning": []
}

# runs that errored out with new error code
other = []

# runs that had no errors
empty = []

# series that didn't run
no_go = []

# counts
count_series = 0
count_instances = 0

# iterate over all expected runs
for instance in os.listdir(instance_fldr):
    if not os.path.isdir(os.path.join(instance_fldr, instance)):
        continue
    for perturbation in os.listdir(os.path.join(instance_fldr, instance)):
        if not os.path.isdir(os.path.join(instance_fldr, instance, perturbation)):
            continue
        for terms in [4, 16, 64]:
            for generator in ["None", "New", "Old", "Farkas"]:

                # set variables for this iteration
                count_series += 1
                stem = f"{instance}_{perturbation}_{terms}_{generator}"
                file_pth = os.path.join(results_fldr, f"{stem}.err")
                series_fldr = os.path.join(instance_fldr, instance, perturbation)
                count_instances += len([f for f in os.listdir(series_fldr) if
                                        f.endswith(".mps")])

                # check if the series wasn't run
                if not os.path.exists(file_pth):
                    no_go.append(stem)
                
                # check if the series ran with no errors or warnings
                elif os.path.getsize(file_pth) == 0:
                    empty.append(stem)
                
                # track which error codes were thrown
                else:
                    # read the file
                    with open(file_pth, "r") as f:
                        text = f.read().lower()
                    
                    # assign the error file to the appropriate list
                    for code in err:
                        if code in text:
                            err[code].append(stem)
                            break
                    else:
                        other.append(stem)

In [4]:
# check which series didn't run
print(no_go)

[]


In [5]:
# get the proportion of series that at least got started
1 - (len(no_go) / count_series)

1.0

In [6]:
err["walltime"]

['neos-3610173-itata_matrix_1_64_New', 'neos-3610173-itata_matrix_0_64_New']

In [7]:
err["bad_alloc"]

['neos-3610173-itata_rhs_0_64_Old',
 'neos-3610173-itata_rhs_0_64_Farkas',
 'neos-3610173-itata_rhs_1_64_Old',
 'neos-3610173-itata_rhs_1_64_Farkas',
 'neos-3610173-itata_matrix_1_64_Old',
 'neos-3610173-itata_matrix_1_64_Farkas',
 'neos-3610173-itata_matrix_0_64_Old',
 'neos-3610173-itata_matrix_0_64_Farkas',
 'neos-3610051-istra_rhs_0_64_Old',
 'neos-3610051-istra_rhs_0_64_Farkas',
 'neos-3610051-istra_rhs_1_64_Old',
 'neos-3610051-istra_rhs_1_64_Farkas',
 'neos-3610051-istra_matrix_1_64_Old',
 'neos-3610051-istra_matrix_1_64_Farkas',
 'neos-3610051-istra_matrix_0_64_Old',
 'neos-3610051-istra_matrix_0_64_Farkas',
 'neos-3611447-jijia_rhs_0_64_Old',
 'neos-3611447-jijia_rhs_0_64_Farkas',
 'neos-3611447-jijia_rhs_1_64_Old',
 'neos-3611447-jijia_rhs_1_64_Farkas',
 'neos-3611447-jijia_matrix_1_64_Old',
 'neos-3611447-jijia_matrix_1_64_Farkas',
 'neos-3611447-jijia_matrix_0_64_Old',
 'neos-3611447-jijia_matrix_0_64_Farkas',
 'neos-3610040-iskar_rhs_1_64_Old',
 'neos-3610040-iskar_rhs_1_6

In [8]:
err["takeoffcuts"]

['dcmulti_rhs_-1_4_Old',
 'dcmulti_rhs_-1_4_Farkas',
 'dcmulti_rhs_-1_16_Farkas',
 'dcmulti_rhs_-1_64_Farkas',
 'dcmulti_matrix_-1_4_Old',
 'dcmulti_matrix_-1_16_Old',
 'dcmulti_matrix_-1_64_Old']

In [9]:
err["solver is dual infeasible"]

[]

In [10]:
err["solver must be optimal"]

['neos-5140963-mincio_objective_1_4_New',
 'neos-5140963-mincio_objective_1_4_Old',
 'neos-5140963-mincio_objective_1_4_Farkas',
 'neos-5140963-mincio_objective_1_16_New',
 'neos-5140963-mincio_objective_1_16_Old',
 'neos-5140963-mincio_objective_1_16_Farkas',
 'neos-5140963-mincio_objective_1_64_New',
 'neos-5140963-mincio_objective_0_4_New',
 'neos-5140963-mincio_objective_0_4_Old',
 'neos-5140963-mincio_objective_0_4_Farkas',
 'neos-5140963-mincio_objective_0_16_New',
 'neos-5140963-mincio_objective_0_16_Old',
 'neos-5140963-mincio_objective_0_16_Farkas',
 'neos-5140963-mincio_objective_0_64_New',
 'neos-5140963-mincio_objective_0_64_Old',
 'neos-5140963-mincio_objective_0_64_Farkas',
 'neos-5140963-mincio_objective_-1_4_New',
 'neos-5140963-mincio_objective_-1_4_Old',
 'neos-5140963-mincio_objective_-1_4_Farkas',
 'neos-5140963-mincio_objective_-1_16_New',
 'neos-5140963-mincio_objective_-1_16_Old',
 'neos-5140963-mincio_objective_-1_16_Farkas',
 'neos-5140963-mincio_objective_-1_6

In [11]:
err["segmentation fault"]

[]

In [12]:
err["no vpcs were made from a new disjunction"]

['neos-3610173-itata_rhs_0_64_New',
 'neos-3610051-istra_rhs_1_64_New',
 'mas74_rhs_1_4_New',
 'mas74_rhs_1_16_New',
 'mas74_rhs_1_64_New',
 'mas74_matrix_1_4_New',
 'mas74_matrix_1_16_New',
 'mas74_matrix_1_64_New',
 'mas74_matrix_0_4_New',
 'mas74_matrix_0_16_New',
 'mas74_matrix_0_64_New',
 'mas74_matrix_-1_4_New',
 'mas74_matrix_-1_16_New',
 'mas74_matrix_-1_64_New',
 'neos-5140963-mincio_rhs_0_64_New',
 'bm23_rhs_0_64_New',
 'bm23_rhs_1_64_New',
 'bm23_matrix_1_64_New',
 'bm23_matrix_0_64_New',
 'bm23_matrix_-1_64_New',
 'dcmulti_matrix_1_4_New',
 'dcmulti_matrix_1_16_New',
 'dcmulti_matrix_1_64_New',
 'dcmulti_matrix_0_4_New',
 'dcmulti_matrix_0_16_New',
 'dcmulti_matrix_0_64_New',
 'dcmulti_matrix_-1_64_New',
 'mas76_matrix_1_4_New',
 'mas76_matrix_1_16_New',
 'mas76_matrix_1_64_New',
 'mas76_matrix_0_4_New',
 'mas76_matrix_0_16_New',
 'mas76_matrix_0_64_New',
 'mas76_matrix_-1_64_New',
 'prod1_rhs_0_4_New',
 'prod1_rhs_1_4_New']

In [14]:
err["farkas"]

['neos-3610173-itata_rhs_0_16_New',
 'neos-3610173-itata_rhs_1_64_New',
 'neos-3610051-istra_rhs_1_16_New',
 'neos-3610051-istra_matrix_1_16_New',
 'dcmulti_objective_1_64_New']

In [15]:
err["prlp is primal infeasible"]

['mas74_matrix_1_4_Old',
 'mas74_matrix_1_16_Old',
 'mas74_matrix_1_64_Old',
 'neos-5140963-mincio_matrix_1_4_Old',
 'dcmulti_matrix_1_4_Old',
 'dcmulti_matrix_1_16_Old',
 'dcmulti_matrix_0_4_Old',
 'dcmulti_matrix_0_16_Old',
 'mas76_rhs_0_4_Old',
 'mas76_rhs_1_4_Old',
 'mas76_matrix_1_4_Old',
 'mas76_matrix_1_16_Old',
 'mas76_matrix_1_64_Old',
 'mas76_matrix_0_4_Old',
 'mas76_matrix_0_16_Old',
 'mas76_matrix_0_64_Old',
 'mas76_matrix_-1_4_Old',
 'mas76_matrix_-1_64_Old']

In [16]:
err["warning"]

['neos-3610173-itata_rhs_1_4_New',
 'neos-3610173-itata_rhs_1_16_New',
 'neos-5140963-mincio_matrix_1_4_New',
 'neos-5140963-mincio_matrix_1_16_New',
 'neos-5140963-mincio_matrix_1_64_New',
 'neos-5140963-mincio_matrix_0_4_New',
 'neos-5140963-mincio_matrix_0_16_New',
 'neos-5140963-mincio_matrix_0_64_New',
 'neos-3611689-kaihu_matrix_1_4_New',
 'neos-3611689-kaihu_matrix_1_16_New',
 'neos-3611689-kaihu_matrix_1_64_New',
 'dcmulti_objective_1_4_New',
 'dcmulti_objective_1_16_New',
 'dcmulti_objective_0_4_New',
 'dcmulti_objective_0_16_New',
 'dcmulti_objective_0_64_New',
 'dcmulti_rhs_-1_4_New',
 'dcmulti_rhs_-1_16_New',
 'dcmulti_rhs_-1_64_New',
 'dcmulti_matrix_-1_4_New',
 'dcmulti_matrix_-1_16_New',
 'mas76_matrix_-1_16_Old',
 'prod1_rhs_0_4_Old',
 'prod1_rhs_0_4_Farkas',
 'prod1_rhs_1_4_Old',
 'prod1_rhs_1_4_Farkas',
 'prod1_rhs_1_16_New',
 'prod1_rhs_1_16_Old',
 'prod1_rhs_1_16_Farkas',
 'prod1_matrix_1_4_New',
 'prod1_matrix_1_4_Old',
 'prod1_matrix_1_4_Farkas',
 'prod1_matrix_1_

In [17]:
err["branch variables must be integer"]

['prod1_rhs_0_16_New',
 'prod1_rhs_0_64_New',
 'prod1_rhs_0_64_Old',
 'prod1_rhs_0_64_Farkas',
 'prod1_rhs_1_64_New',
 'prod1_rhs_1_64_Old',
 'prod1_rhs_1_64_Farkas',
 'prod1_matrix_1_64_New',
 'prod1_matrix_1_64_Old',
 'prod1_matrix_1_64_Farkas',
 'prod1_matrix_0_64_New',
 'prod1_matrix_0_64_Old',
 'prod1_matrix_0_64_Farkas']

In [18]:
# errors unaccounted for
other

[]

In [19]:
# proportion of series that had no issues
len(empty + err["warning"]) / count_series

0.6863636363636364

In [20]:
# proportion of series that had no issues broken down by generator
for generator in ["None", "New", "Old", "Farkas"]:
    g = [s for s in empty if s.endswith(generator)] + \
        [s for s in err["warning"] if s.endswith(generator)]
    print(f"{generator}: {4 * len(g) / count_series}")

None: 1.0
New: 0.6545454545454545
Old: 0.48484848484848486
Farkas: 0.6060606060606061


In [21]:
# proportion of series that were improperly provisioned
(len(err["bad_alloc"] + err["walltime"])) / count_series

0.11363636363636363

In [22]:
(len(err["no vpcs were made from a new disjunction"] +
     err["prlp is primal infeasible"])) / count_series

0.08181818181818182

In [23]:
# proportion of series that had didn't actually error
(len(empty + err["bad_alloc"] + err["walltime"] + err["warning"] +
     err["no vpcs were made from a new disjunction"] +
     err["prlp is primal infeasible"])) / count_series

0.8818181818181818

## Read in data

In [24]:
# map generator names to the corresponding data frames
df_map = {
    "None": pd.DataFrame(),
    "Farkas": pd.DataFrame(),
    "New": pd.DataFrame(),
    "Old": pd.DataFrame()
}
regex = re.compile(r'([a-zA-Z0-9-]+)_([a-z]+)_([0-9-]+)_([0-9]+)_([a-zA-Z ]+)')

# iterate over all files in the folder
for file_name in os.listdir(results_fldr):
    
    file_pth = os.path.join(results_fldr, file_name)
    
    # if the file is not a nonempty csv, skip it
    if not file_name.endswith(".csv") or os.path.getsize(file_pth) == 0:
        continue
    
    # get the experimental set up
    match = regex.search(file_name)
    instance_name = match.group(1)
    perturbation = match.group(2)
    degree = 2**int(match.group(3))
    terms = int(match.group(4))
    generator = match.group(5)
    
    # read the file
    df = pd.read_csv(file_pth, keep_default_na=False)
    
    # add some identifying columns
    df["problem number"] = df.index
    df["instance"] = instance_name
    df["perturbation"] = perturbation
    df["degree"] = degree
    df["terms"] = terms
    
    # append to the appropriate data frame
    df_map[generator] = pd.concat([df_map[generator], df])

In [25]:
# get proportion of instances run
for generator, df in df_map.items():
    print(f"{generator}: {4 * len(df) / count_instances}")

None: 1.0
Farkas: 0.8173356602675974
New: 0.7201861547411286
Old: 0.8155904595695171


In [46]:
gen = "Farkas" # make sure masks 3 and 9 are fixed rest should be fine
masks = {
        0: -1e20 > df_map[gen]["lpBound"],
        1: df_map[gen]["lpBound"] - 1e-4 > df_map[gen]["lpBoundPostVpc"],
        2: df_map[gen]["lpBoundPostVpc"] - 1e-4 > df_map[gen]["disjunctiveDualBound"],
        3: df_map[gen]["lpBoundPostVpc"] - 1e-4 > df_map[gen]["rootDualBound"],
        4: df_map[gen]["rootDualBound"] - 1e-4 > df_map[gen]["dualBound"],
        5: df_map[gen]["dualBound"] - 1e-4 > df_map[gen]["primalBound"],
        6: df_map[gen]["primalBound"] > 1e20,
        7: 0 > df_map[gen]["vpcGenerationTime"],
        8: df_map[gen]["vpcGenerationTime"] - 1e-4 > df_map[gen]["heuristicTime"],
        9: df_map[gen]["heuristicTime"] - 1e-4 > df_map[gen]["rootDualBoundTime"],
        10: df_map[gen]["rootDualBoundTime"] - 1e-4 > df_map[gen]["terminationTime"],
        11: 0 > df_map[gen]["firstSolutionTime"],
        12: df_map[gen]["firstSolutionTime"] - 1e-4 > df_map[gen]["bestSolutionTime"],
        13: df_map[gen]["bestSolutionTime"] - 1e-4 > df_map[gen]["terminationTime"],
        14: df_map[gen]["terminationTime"] > df_map[gen]["maxTime"] + 10,
        15: df_map[gen]["vpcGenerationTime"] > df_map[gen]["maxTime"]
    }

df_map[gen][masks[9]]

Unnamed: 0,lpBound,disjunctiveDualBound,lpBoundPostVpc,rootDualBound,dualBound,heuristicPrimalBound,primalBound,vpcGenerationTime,heuristicTime,rootDualBoundTime,...,maxTime,vpcGenerator,terms,iterations,nodes,actualTerms,problem number,instance,perturbation,degree


In [27]:
for gen in df_map:
    mask = (-1e20 > df_map[gen]["lpBound"]) | \
        (df_map[gen]["lpBound"] - 1e-4 > df_map[gen]["lpBoundPostVpc"]) | \
        (df_map[gen]["lpBoundPostVpc"] - 1e-4 > df_map[gen]["disjunctiveDualBound"]) | \
        (df_map[gen]["lpBoundPostVpc"] - 1e-4 > df_map[gen]["rootDualBound"]) | \
        (df_map[gen]["rootDualBound"] - 1e-4 > df_map[gen]["dualBound"]) | \
        (df_map[gen]["dualBound"] - 1e-4 > df_map[gen]["primalBound"]) | \
        (df_map[gen]["primalBound"] > 1e20) | \
        (0 > df_map[gen]["vpcGenerationTime"]) | \
        (df_map[gen]["vpcGenerationTime"] - 1e-4 > df_map[gen]["heuristicTime"]) | \
        (df_map[gen]["heuristicTime"] - 1e-4 > df_map[gen]["rootDualBoundTime"]) | \
        (df_map[gen]["rootDualBoundTime"] - 1e-4 > df_map[gen]["terminationTime"]) | \
        (0 > df_map[gen]["firstSolutionTime"]) | \
        (df_map[gen]["firstSolutionTime"] - 1e-4 > df_map[gen]["bestSolutionTime"]) | \
        (df_map[gen]["bestSolutionTime"] - 1e-4 > df_map[gen]["terminationTime"]) | \
        (df_map[gen]["terminationTime"] > df_map[gen]["maxTime"] + 10) | \
        (df_map[gen]["vpcGenerationTime"] > df_map[gen]["maxTime"])
    df_map[gen] = df_map[gen][~mask]

In [41]:
# merge the 4 different data frames into one
join_cols = ["instance", "perturbation", "degree", "terms", "problem number"]
df = df_map["None"].merge(df_map["New"], on=join_cols,
                                suffixes=(" None", None))
df = df.merge(df_map["Old"], on=join_cols,
                          suffixes=(" New", None))
df = df.merge(df_map["Farkas"], on=join_cols,
                          suffixes=(" Old", " Farkas"))
df

Unnamed: 0,lpBound None,disjunctiveDualBound None,lpBoundPostVpc None,rootDualBound None,dualBound None,heuristicPrimalBound None,primalBound None,vpcGenerationTime None,heuristicTime None,rootDualBoundTime None,...,heuristicTime Farkas,rootDualBoundTime Farkas,firstSolutionTime Farkas,bestSolutionTime Farkas,terminationTime Farkas,maxTime Farkas,vpcGenerator Farkas,iterations Farkas,nodes Farkas,actualTerms Farkas
0,134.915590,134.915590,134.915590,143.653501,145.015871,154.000000,151.000000,0.096006,1.206488,3.890045,...,4.556782,7.583520,4.556799,12.220595,300.515738,300.0,New,190757,7909,4
1,135.862243,135.862243,135.862243,144.515640,144.515640,153.000000,149.000000,0.094931,1.026769,4.449761,...,1.190663,4.470498,1.190665,4.573789,301.040320,300.0,Farkas,182442,9874,4
2,135.354844,135.354844,135.354844,143.841106,145.807495,152.000000,150.000000,0.091691,1.041764,4.332778,...,1.237440,4.343022,1.237443,4.480485,301.141948,300.0,Farkas,137678,11000,4
3,135.311224,135.311224,135.311224,142.730884,143.890196,151.000000,149.000000,0.091364,1.693356,5.167685,...,1.897233,5.480878,1.897237,15.567069,301.455145,300.0,Farkas,171474,11332,4
4,134.405958,134.405958,134.405958,141.990942,143.758420,153.000000,148.000000,0.089762,1.678831,4.389748,...,1.879234,4.473597,1.879235,5.083431,300.528704,300.0,Farkas,178901,8271,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
996,38893.442891,38893.442891,38893.442891,38893.442891,39378.409831,40560.054142,40005.054142,0.009592,0.430366,0.443382,...,0.967708,1.009255,0.967719,1.019741,300.632495,300.0,Farkas,315206,50927,65
997,26511.178608,26511.178608,26511.178608,26511.178608,27104.457594,28593.617817,27638.141720,0.016177,1.124101,1.139054,...,1.723236,1.738186,1.723240,106.677015,300.846082,300.0,Farkas,515262,86552,65
998,51687.033613,51687.033613,51687.033613,51687.033613,52023.712306,54752.387825,52771.123829,0.013327,0.691344,0.704919,...,1.322108,1.335609,1.322114,205.004800,301.060260,300.0,Farkas,579314,97852,65
999,38893.310241,38893.310241,38893.310241,38893.310241,39377.750503,40559.141700,40004.141700,0.009793,0.435453,0.448441,...,0.981189,1.022604,0.981194,7.611164,300.663957,300.0,Farkas,326375,52953,65


In [42]:
# get proportion of tests run
4 * len(df) / count_instances

0.5823152995927865

In [43]:
def gap_closed(df, col):
    return (df[col] - df["lpBound None"]) / \
        (df['primalBound None'] - df["lpBound None"]) 

In [44]:
# find the optimality gap closed by each generator
df["Disjunction (New)"] = gap_closed(df, "disjunctiveDualBound New")
df["Disjunction (Old)"] = gap_closed(df, "disjunctiveDualBound Old")
df["VPCs (New)"] = gap_closed(df, "lpBoundPostVpc New")
df["VPCs (Old)"] = gap_closed(df, "lpBoundPostVpc Old")
df["VPCs (Farkas)"] = gap_closed(df, "lpBoundPostVpc Farkas")
df["Root Cuts (None)"] = gap_closed(df, "rootDualBound None")
df["Root Cuts (New)"] = gap_closed(df, "rootDualBound New")
df["Root Cuts (Old)"] = gap_closed(df, "rootDualBound Old")
df["Root Cuts (Farkas)"] = gap_closed(df, "rootDualBound Farkas")

In [32]:
# set aside core columns and filter for all subsequent dataframes
group_cols = ["instance", "perturbation", "degree", "terms"]
id_cols = ["problem number"]
mask = (df["problem number"] > 0) & (df["Disjunction (New)"] < .9999)

## Make bound table

In [33]:
# additional filtering for dataframe on bounds
fields = ["Disjunction (New)", "Disjunction (Old)", "VPCs (New)", "VPCs (Old)",
          "VPCs (Farkas)", "Root Cuts (None)", "Root Cuts (New)", "Root Cuts (Old)",
          "Root Cuts (Farkas)"]
bound_df = df.loc[mask, group_cols + id_cols + fields]
bound_df

Unnamed: 0,instance,perturbation,degree,terms,problem number,Disjunction (New),Disjunction (Old),VPCs (New),VPCs (Old),VPCs (Farkas),Root Cuts (None),Root Cuts (New),Root Cuts (Old),Root Cuts (Farkas)
1,neos-3610173-itata,rhs,2.0,4,1,0.095260,0.067357,0.000000,0.024850,0.024850,0.658666,0.645240,0.675501,0.669352
2,neos-3610173-itata,rhs,2.0,4,2,0.169384,0.062536,0.000000,0.008756,0.023759,0.579459,0.591980,0.577645,0.700866
3,neos-3610173-itata,rhs,2.0,4,3,0.067271,0.080286,0.000000,0.025113,0.025113,0.542025,0.622162,0.548271,0.569447
4,neos-3610173-itata,rhs,2.0,4,4,0.065977,0.065020,0.003172,0.017348,0.017348,0.557964,0.570849,0.577828,0.575895
5,neos-3610173-itata,rhs,2.0,4,5,0.114146,0.081629,0.013967,0.026777,0.026777,0.677859,0.689113,0.628409,0.629432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,mas76,rhs,1.0,64,6,0.133031,0.133031,0.119257,0.119169,0.131210,0.000000,0.119257,0.119169,0.131210
996,mas76,rhs,1.0,64,7,0.133013,0.133013,0.131128,0.131133,0.131165,0.000000,0.131128,0.131133,0.131165
997,mas76,rhs,1.0,64,8,0.133671,0.021046,0.130681,0.020953,0.000000,0.000000,0.130681,0.020953,0.000000
998,mas76,rhs,1.0,64,9,0.077459,0.000742,0.071608,0.000593,0.000000,0.000000,0.071608,0.000593,0.000000


In [34]:
aggregations = {f: "mean" for f in fields}
aggregations["instance"] = "nunique"
aggregations["problem number"] = "count"

In [35]:
# get gap closed by degree and term
out = bound_df.groupby(["degree", "terms"]).agg(aggregations).reset_index()
out.to_csv(os.path.join(out_fldr, "bound_table.csv"), index=False, mode="w")
out

Unnamed: 0,degree,terms,Disjunction (New),Disjunction (Old),VPCs (New),VPCs (Old),VPCs (Farkas),Root Cuts (None),Root Cuts (New),Root Cuts (Old),Root Cuts (Farkas),instance,problem number
0,0.5,4,0.073791,0.060983,0.06278,0.051739,0.040636,0.20907,0.215763,0.21817,0.214809,5,75
1,0.5,16,0.205294,0.154929,0.177432,0.134458,0.142134,0.14835,0.222567,0.203945,0.198334,4,46
2,0.5,64,0.297801,0.161775,0.21531,0.12826,0.129062,0.156051,0.274371,0.243686,0.24048,4,32
3,1.0,4,0.097033,0.078607,0.041203,0.041055,0.036043,0.406271,0.419587,0.41547,0.40931,11,158
4,1.0,16,0.223158,0.165781,0.110703,0.102995,0.094399,0.396335,0.431415,0.427759,0.423479,9,115
5,1.0,64,0.295585,0.202876,0.148824,0.12358,0.106709,0.3518,0.424974,0.418049,0.41257,8,65
6,2.0,4,0.098286,0.08008,0.040835,0.03857,0.030918,0.423796,0.430226,0.437096,0.426722,11,163
7,2.0,16,0.229777,0.170339,0.101361,0.096245,0.082861,0.437452,0.463841,0.452121,0.447142,9,126
8,2.0,64,0.253268,0.152833,0.113004,0.073353,0.037973,0.348343,0.386804,0.377266,0.367054,9,60


In [36]:
# now break it down by type of perturbation
bound_df.groupby(["degree", "terms", "perturbation"]).agg(aggregations)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Disjunction (New),Disjunction (Old),VPCs (New),VPCs (Old),VPCs (Farkas),Root Cuts (None),Root Cuts (New),Root Cuts (Old),Root Cuts (Farkas),instance,problem number
degree,terms,perturbation,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0.5,4,matrix,0.064312,0.054201,0.065451,0.053346,0.044396,0.179617,0.183975,0.185115,0.178709,5,36
0.5,4,objective,0.161299,0.142478,0.125629,0.10427,0.070231,0.611309,0.588399,0.60744,0.609254,2,12
0.5,4,rhs,0.047538,0.033804,0.031286,0.026249,0.022469,0.069568,0.09253,0.089235,0.087633,4,27
0.5,16,matrix,0.235694,0.200318,0.227046,0.193769,0.185949,0.163773,0.256411,0.226665,0.220466,3,21
0.5,16,objective,0.620113,0.328254,0.424711,0.190914,0.302751,0.674395,0.716434,0.691712,0.673905,1,5
0.5,16,rhs,0.069669,0.063939,0.063517,0.058067,0.055975,0.000644,0.063563,0.058147,0.056203,2,20
0.5,64,matrix,0.26725,0.168922,0.25848,0.164792,0.152089,0.069757,0.261593,0.167739,0.156166,3,5
0.5,64,objective,0.829368,0.332815,0.535242,0.17844,0.184694,0.652244,0.745223,0.68918,0.678265,1,6
0.5,64,rhs,0.153198,0.111205,0.113622,0.105224,0.107685,0.034828,0.142884,0.134485,0.135473,3,21
1.0,4,matrix,0.090236,0.071212,0.033752,0.030098,0.028487,0.438365,0.438947,0.44998,0.440602,10,80


## Make time table

In [37]:
def optimality_gap(df, generator):
    return abs(df[f"primalBound {generator}"] - df[f"dualBound {generator}"]) / \
        (df[f"primalBound {generator}"])

In [38]:
# additional filtering for dataframe on run time
fields = ["terminationTime New", "terminationTime Old",
          "terminationTime Farkas", "terminationTime None",
          "vpcGenerationTime New", "vpcGenerationTime Old",
          "vpcGenerationTime Farkas"]
mask = mask & (optimality_gap(df, "New") < .02)  # only check the ones that solve to optimality
time_df = df.loc[mask, group_cols + id_cols + fields]

# define aggregating operations
aggregations = {f: "mean" for f in fields}
aggregations["instance"] = "nunique"
aggregations["problem number"] = "count"

# get gap closed by degree and term
out = time_df.groupby(["degree", "terms"]).agg(aggregations).reset_index()
out.to_csv(os.path.join(out_fldr, "time.csv"), index=False, mode="w")
out

Unnamed: 0,degree,terms,terminationTime New,terminationTime Old,terminationTime Farkas,terminationTime None,vpcGenerationTime New,vpcGenerationTime Old,vpcGenerationTime Farkas,instance,problem number
0,0.5,4,16.355662,12.822757,11.038034,13.778225,2.760208,0.687757,0.192316,5,25
1,0.5,16,22.791149,7.971118,4.462865,4.576061,17.250001,3.64591,0.614967,3,16
2,0.5,64,118.49504,73.594722,52.840414,50.518878,82.424231,27.025128,3.577249,2,7
3,1.0,4,91.669882,89.162975,86.463025,82.904498,4.61898,0.644236,0.258162,6,51
4,1.0,16,60.049696,47.124476,43.035859,42.464663,19.521022,3.709673,1.365795,4,32
5,1.0,64,31.664069,14.954202,10.414,5.775945,25.818715,9.426301,1.523261,2,10
6,2.0,4,59.051592,60.142018,64.818682,59.40946,3.63476,0.603724,0.229709,7,48
7,2.0,16,26.833795,10.408978,6.743516,6.089188,17.027402,2.894695,0.664367,2,30
8,2.0,64,21.10995,9.055443,3.47107,2.859631,18.662018,6.749112,1.342633,2,4


# Make node table

In [39]:
# additional filtering for dataframe on nodes processed
fields = ["nodes New", "nodes Old", "nodes Farkas",
          "nodes None"]
node_df = df.loc[mask, group_cols + id_cols + fields]

# define aggregating operations
aggregations = {f: "mean" for f in fields}
aggregations["instance"] = "nunique"
aggregations["problem number"] = "count"

# get gap closed by degree and term
out = node_df.groupby(["degree", "terms"]).agg(aggregations).reset_index()
out.to_csv(os.path.join(out_fldr, "node.csv"), index=False, mode="w")
out

Unnamed: 0,degree,terms,nodes New,nodes Old,nodes Farkas,nodes None,instance,problem number
0,0.5,4,775.48,621.6,495.36,1131.64,5,25
1,0.5,16,176.625,155.625,167.25,178.375,3,16
2,0.5,64,1665.428571,2177.142857,2145.857143,1830.0,2,7
3,1.0,4,3989.254902,3940.745098,3659.784314,3843.137255,6,51
4,1.0,16,1317.75,1565.71875,1566.90625,1535.46875,4,32
5,1.0,64,271.2,287.9,434.6,244.8,2,10
6,2.0,4,2143.541667,2338.75,2621.708333,2573.3125,7,48
7,2.0,16,251.1,307.333333,238.733333,248.733333,2,30
8,2.0,64,205.75,178.5,146.0,170.5,2,4


# Make iteration table

In [40]:
# additional filtering for dataframe on nodes processed
fields = ["iterations New", "iterations Old",
          "iterations Farkas", "iterations None"]
node_df = df.loc[mask, group_cols + id_cols + fields]

# define aggregating operations
aggregations = {f: "mean" for f in fields}
aggregations["instance"] = "nunique"
aggregations["problem number"] = "count"

# get gap closed by degree and term
out = node_df.groupby(["degree", "terms"]).agg(aggregations).reset_index()
out.to_csv(os.path.join(out_fldr, "iteration.csv"), index=False, mode="w")
out

Unnamed: 0,degree,terms,iterations New,iterations Old,iterations Farkas,iterations None,instance,problem number
0,0.5,4,10795.84,9656.16,8158.68,11932.64,5,25
1,0.5,16,3284.9375,3115.625,2957.8125,3321.8125,3,16
2,0.5,64,22263.285714,27118.571429,34794.571429,36691.714286,2,7
3,1.0,4,46896.745098,46816.333333,45516.784314,44171.470588,6,51
4,1.0,16,17698.25,21021.5,18389.875,18939.09375,4,32
5,1.0,64,4438.9,4906.5,7611.8,4616.7,2,10
6,2.0,4,26166.3125,27887.979167,31883.625,30375.229167,7,48
7,2.0,16,5669.166667,6484.066667,4889.333333,5219.433333,2,30
8,2.0,64,2452.75,2301.25,2189.75,2535.0,2,4
