In [96]:
%load_ext autoreload
%autoreload 2

import os
import json
import pandas as pd
import numpy as np
import sys
from pathlib import Path

root_folder = Path('/home/penlu/code/bespoke-gnn4do')
sys.path.insert(0, str(root_folder))

from utils.tabulate import load_datasets, load_train_outputs, load_baseline_outputs

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [97]:
# load datasets and calculate validation slices

import torch

datasets = load_datasets()
indices = {}
for name, dataset in datasets.items():
    torch.manual_seed(0)
    print(f"{name} dataset size: {len(dataset)}")
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
    indices[name] = val_dataset.indices

dataset_names = list(datasets.keys())
dataset_names.append('RANDOM 500')
dataset_names.append('ForcedRB')

loading RANDOM
loading PROTEINS
loading ENZYMES
loading COLLAB
loading IMDB-BINARY
loading MUTAG
RANDOM dataset size: 1000
PROTEINS dataset size: 1113
ENZYMES dataset size: 600
COLLAB dataset size: 5000
IMDB-BINARY dataset size: 1000
MUTAG dataset size: 188




In [98]:
# loading max cut results from files

print("loading model losses")
maxcut_models = load_train_outputs(root_folder / 'training_runs', '230823_test')
print("loading sdp lift losses")
maxcut_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230824_sdp', 'sdp', indices)
print("loading sdp proj losses")
maxcut_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230824_sdp', 'sdp|random_hyperplane', indices)
print("loading gurobi 1s")
maxcut_grb_1s = load_baseline_outputs(root_folder / 'baseline_runs', '230823_gurobi_1s', 'gurobi', indices)
print("loading gurobi 5s")
maxcut_grb_5s = load_baseline_outputs(root_folder / 'baseline_runs', '230823_gurobi_5s', 'gurobi', indices)
print("loading gurobi 20s")
maxcut_grb_20s = load_baseline_outputs(root_folder / 'baseline_runs', '230916_gurobi_max_cut_20s', 'gurobi', indices)

loading model losses
load_train_outputs: got GatedGCNN, PROTEINS (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GAT, MUTAG (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GCNN, RANDOM (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GCNN, ENZYMES (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GCNN, IMDB-BINARY (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GatedGCNN, RANDOM (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GAT, IMDB-BINARY (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got LiftMP, COLLAB (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GCNN, COLLAB (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GIN, IMDB-BINARY (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GIN, PROTEINS (positional encoding=laplacian_eigenvector, d

In [99]:
# putting max cut results in a table
maxcut_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in maxcut_models.items():
    maxcut_scores.at[model, dataset] = np.max(valid_scores)
for (model, dataset), (train_losses, valid_scores) in maxcut_models.items():
    print(f"{model} {dataset} loss: {train_losses[-1]}")

for dataset, score in maxcut_sdp_lift.items():
    maxcut_scores.at['SDP lift', dataset] = score

for dataset, score in maxcut_sdp_proj.items():
    maxcut_scores.at['SDP proj', dataset] = score

for dataset, score in maxcut_grb_1s.items():
    maxcut_scores.at['gurobi 1s', dataset] = score

for dataset, score in maxcut_grb_5s.items():
    maxcut_scores.at['gurobi 5s', dataset] = score

for dataset, score in maxcut_grb_20s.items():
    maxcut_scores.at['gurobi 20s', dataset] = score

for dataset in dataset_names:
    edges = 0
    count = 0
    if dataset not in datasets:
        continue
    for example in datasets[dataset]:
        edges += example.edge_index.shape[1]
        count += 1
    maxcut_scores.at['edge count', dataset] = float(edges) / count

#maxcut_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

GatedGCNN PROTEINS loss: -32.155372619628906
GAT MUTAG loss: -19.13991754705256
GCNN RANDOM loss: -179.67160034179688
GCNN ENZYMES loss: -23.810829162597656
GCNN IMDB-BINARY loss: 43.74989318847656
GatedGCNN RANDOM loss: -284.61016845703125
GAT IMDB-BINARY loss: 60.57119369506836
LiftMP COLLAB loss: -113.91009521484375
GCNN COLLAB loss: 432.3880615234375
GIN IMDB-BINARY loss: 36.690887451171875
GIN PROTEINS loss: -16.147174835205078
GatedGCNN IMDB-BINARY loss: -6.4940290451049805
GAT PROTEINS loss: -25.318279266357422
LiftMP RANDOM loss: -304.903564453125
LiftMP ENZYMES loss: -35.00230026245117
GatedGCNN MUTAG loss: -18.802248868075285
GIN COLLAB loss: 411.0694885253906
GAT ENZYMES loss: -30.625537872314453
LiftMP IMDB-BINARY loss: -14.079669952392578
GIN MUTAG loss: -15.512301358309658
GCNN PROTEINS loss: -29.71967887878418
GIN ENZYMES loss: -22.141845703125
LiftMP MUTAG loss: -18.82797518643466
LiftMP PROTEINS loss: -32.377655029296875
GAT COLLAB loss: 1296.400634765625
GIN RANDOM lo

In [100]:
# loading vertex cover results from files

print("loading model losses")
vc_models = load_train_outputs(root_folder / 'training_runs', '230913_VC') # 230901_VC for without PE
print("loading more model losses")
vc_models_2 = load_train_outputs(root_folder / 'training_runs', '230914_VC')
vc_models.update(vc_models_2)
print("loading model losses, forcedRB")
vc_models_forcedrb = load_train_outputs(root_folder / 'training_runs', '230910_VC_forcedrb')

print("loading sdp lift losses")
vc_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp', indices)
print("loading sdp proj losses")
vc_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp|random_hyperplane', indices)
print("loading gurobi 1s")
vc_grb_1s = load_baseline_outputs(root_folder / 'baseline_runs', '230902_gurobi_1s', 'gurobi', indices)
print("loading gurobi 5s")
vc_grb_5s = load_baseline_outputs(root_folder / 'baseline_runs', '230902_gurobi_5s', 'gurobi', indices)
print("loading gurobi 5s")
vc_grb_5s = load_baseline_outputs(root_folder / 'baseline_runs', '230902_gurobi_5s', 'gurobi', indices)
print("loading gurobi 20s")
vc_grb_20s = load_baseline_outputs(root_folder / 'baseline_runs', '230916_gurobi_vertex_cover_20s', 'gurobi', indices)

print("loading gurobi RANDOM 500")
vc_grb_bigrandom_1 = load_baseline_outputs(root_folder / 'baseline_runs', '230909_gurobi_1s', 'gurobi', indices)
vc_grb_bigrandom_5 = load_baseline_outputs(root_folder / 'baseline_runs', '230909_gurobi_5s', 'gurobi', indices)
print("loading sdp RANDOM 500")
vc_sdp_bigrandom_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230910_VC_sdp_500_RANDOM', 'sdp', indices)
vc_sdp_bigrandom_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230910_VC_sdp_500_RANDOM', 'sdp|random_hyperplane', indices)

print("loading gurobi ForcedRB")
vc_grb_forcedrb_1 = load_baseline_outputs(root_folder / 'baseline_runs', '230913_forcedrb_gurobi_1s', 'gurobi', indices)
vc_grb_forcedrb_5 = load_baseline_outputs(root_folder / 'baseline_runs', '230913_forcedrb_gurobi_5s', 'gurobi', indices)
print("loading SDP ForcedRB")
vc_sdp_forcedrb_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230914_VC_sdp_ForcedRB', 'sdp', indices)
vc_sdp_forcedrb_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230914_VC_sdp_ForcedRB', 'sdp|random_hyperplane', indices)

loading model losses
load_train_outputs: got LiftMP, PROTEINS (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got LiftMP, MUTAG (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got LiftMP, IMDB-BINARY (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got LiftMP, RANDOM (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got LiftMP, COLLAB (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got LiftMP, ENZYMES (positional encoding=laplacian_eigenvector, dim=8
loading more model losses
load_train_outputs: got GAT, IMDB-BINARY (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GatedGCNN, ENZYMES (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GAT, COLLAB (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GIN, COLLAB (positional encoding=laplacian_eigenvector, dim=8
load_train_outputs: got GIN, MUTAG (positional encoding=

In [101]:
# putting vertex cover results in a table
vc_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in vc_models.items():
    vc_scores.at[model, dataset] = np.max(valid_scores)

for (model, dataset), (train_losses, valid_scores) in vc_models_forcedrb.items():
    print(dataset)
    vc_scores.at[model, dataset] = np.max(valid_scores)

for dataset, score in vc_sdp_lift.items():
    vc_scores.at['SDP lift', dataset] = score

for dataset, score in vc_sdp_proj.items():
    vc_scores.at['SDP proj', dataset] = score

for dataset, score in vc_grb_1s.items():
    vc_scores.at['gurobi 1s', dataset] = score

for dataset, score in vc_grb_5s.items():
    vc_scores.at['gurobi 5s', dataset] = score

for dataset, score in vc_grb_20s.items():
    vc_scores.at['gurobi 20s', dataset] = score

for dataset, score in vc_grb_bigrandom_1.items():
    vc_scores.at['gurobi 1s', 'RANDOM 500'] = score
for dataset, score in vc_grb_bigrandom_5.items():
    vc_scores.at['gurobi 5s', 'RANDOM 500'] = score

for dataset, score in vc_sdp_bigrandom_lift.items():
    vc_scores.at['SDP lift', 'RANDOM 500'] = score
for dataset, score in vc_sdp_bigrandom_proj.items():
    vc_scores.at['SDP proj', 'RANDOM 500'] = score

for dataset, score in vc_grb_forcedrb_1.items():
    vc_scores.at['gurobi 1s', 'ForcedRB'] = score
for dataset, score in vc_grb_forcedrb_5.items():
    vc_scores.at['gurobi 5s', 'ForcedRB'] = score

for dataset, score in vc_sdp_forcedrb_lift.items():
    vc_scores.at['SDP lift', 'ForcedRB'] = score
for dataset, score in vc_sdp_forcedrb_proj.items():
    vc_scores.at['SDP proj', 'ForcedRB'] = score

for dataset in dataset_names:
    nodes = 0
    count = 0
    if dataset not in datasets:
        continue
    for example in datasets[dataset]:
        nodes += example.num_nodes
        count += 1
    vc_scores.at['vertex count', dataset] = -float(nodes) / count

#vc_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

ForcedRB
ForcedRB
ForcedRB
ForcedRB
ForcedRB


In [102]:
maxcut_scores

Unnamed: 0,RANDOM,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM 500,ForcedRB
GatedGCNN,867.9425,93.0,79.004167,2131.357,116.3025,28.855263,,
GAT,870.8775,94.376682,76.775,2525.2685,77.755,29.657895,,
GCNN,825.5575,84.309417,63.208333,2077.595,67.045,24.605263,,
LiftMP,871.4125,93.112108,79.204167,2706.7155,116.4325,28.855263,,
GIN,822.6375,92.421525,72.358333,2306.438,74.825,26.842105,,
SDP lift,898.268377,102.596135,79.682816,2534.011559,91.581074,29.823215,,
SDP proj,873.7725,101.049327,78.608333,2530.548,91.225,29.657895,,
gurobi 1s,875.8325,101.134529,78.608333,2530.888,91.225,29.657895,,
gurobi 5s,875.9925,101.134529,78.608333,2530.924,91.225,29.657895,,
gurobi 20s,876.0725,101.134529,78.608333,2530.961,91.225,29.657895,,6341.078


In [103]:
vc_scores

Unnamed: 0,RANDOM,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM 500,ForcedRB
LiftMP,-94.81,-25.376682,-19.458333,-66.572,-16.09,-8.236842,,-205.055
GAT,-80.92,-29.874439,-22.658333,-69.745,-18.245,-8.289474,,-200.75
GatedGCNN,-80.475,-25.90583,-20.008333,-73.101,-18.36,-8.236842,,-248.295
GIN,-89.875,-32.233184,-24.875,-72.053,-18.365,-9.736842,,-204.37
GCNN,-92.46,-32.883408,-27.475,-72.713,-18.515,-11.368421,,-205.81
SDP lift,-73.377361,-25.036219,-19.409491,-56.185211,-16.087033,-8.237257,-434.955242,-196.419424
SDP proj,-81.11,-25.125561,-19.458333,-56.363208,-16.1,-8.236842,-488.485,-197.206
gurobi 1s,-75.935,-25.076233,-19.458333,-66.454,-16.09,-8.236842,-472.62,-197.027
gurobi 5s,-75.935,-25.076233,-19.458333,-66.454,-16.09,-8.236842,-467.315,-196.481
gurobi 20s,-75.935,-25.076233,-19.458333,-66.454,-16.09,-8.236842,,-196.455


In [104]:
# get maxcut, vc scores normalized by gurobi 5s count
maxcut_norms = maxcut_scores.copy()
for dataset in dataset_names:
    maxcut_norms[dataset] = maxcut_scores[dataset] / maxcut_scores.at['gurobi 20s', dataset]
vc_norms = vc_scores.copy()
for dataset in dataset_names:
    vc_norms[dataset] = vc_scores[dataset] / vc_scores.at['gurobi 20s', dataset]

In [105]:
maxcut_norms

Unnamed: 0,RANDOM,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM 500,ForcedRB
GatedGCNN,0.99072,0.919567,1.005036,0.842114,1.274897,0.972937,,
GAT,0.99407,0.93318,0.976678,0.997751,0.852343,1.0,,
GCNN,0.942339,0.833636,0.804092,0.820872,0.734941,0.829636,,
LiftMP,0.994681,0.920676,1.00758,1.069442,1.276322,0.972937,,
GIN,0.939006,0.913847,0.920492,0.911289,0.820225,0.905058,,
SDP lift,1.025336,1.014452,1.013669,1.001205,1.003903,1.005574,,
SDP proj,0.997375,0.999158,1.0,0.999837,1.0,1.0,,
gurobi 1s,0.999726,1.0,1.0,0.999971,1.0,1.0,,
gurobi 5s,0.999909,1.0,1.0,0.999985,1.0,1.0,,
gurobi 20s,1.0,1.0,1.0,1.0,1.0,1.0,,1.0


In [77]:
vc_norms

Unnamed: 0,RANDOM,PROTEINS,ENZYMES,COLLAB,IMDB-BINARY,MUTAG,RANDOM 500,ForcedRB
LiftMP,1.248568,1.011981,1.0,1.001776,1.0,1.0,,
GAT,1.065648,1.191345,1.164454,1.049523,1.133934,1.00639,,
GatedGCNN,1.059788,1.033083,1.028266,1.100024,1.141081,1.0,,
GIN,1.183578,1.285408,1.278373,1.084254,1.141392,1.182109,,
GCNN,1.21762,1.311338,1.411991,1.094185,1.150715,1.380192,,
SDP lift,0.966318,0.998404,0.99749,0.845475,0.999816,1.00005,,
SDP proj,1.06815,1.001967,1.0,0.848154,1.000622,1.0,,
gurobi 1s,1.0,1.0,,1.0,1.0,1.0,,
gurobi 5s,1.0,1.0,,1.0,1.0,1.0,,
gurobi 20s,1.0,1.0,1.0,1.0,1.0,1.0,,
