In [34]:
%load_ext autoreload
%autoreload 2

import os
import json
import pandas as pd
import numpy as np
import sys
from pathlib import Path

root_folder = Path('/home/penlu/code/bespoke-gnn4do')
sys.path.insert(0, str(root_folder))

from utils.tabulate import load_datasets, load_train_outputs, load_baseline_outputs

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
# load datasets and calculate validation slices

import torch

datasets = load_datasets()
indices = {}
for name, dataset in datasets.items():
    torch.manual_seed(0)
    print(f"{name} dataset size: {len(dataset)}")
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
    indices[name] = val_dataset.indices

RANDOM dataset size: 1000
PROTEINS dataset size: 1113
ENZYMES dataset size: 600
COLLAB dataset size: 5000
IMDB-BINARY dataset size: 1000
MUTAG dataset size: 188


In [36]:
# loading max cut results from files

print("loading model losses")
maxcut_models = load_train_outputs(root_folder / 'training_runs', '230823_test')
print("loading sdp lift losses")
maxcut_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230824_sdp', 'sdp', indices)
print("loading sdp proj losses")
maxcut_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230824_sdp', 'sdp|random_hyperplane', indices)
print("loading gurobi 1s")
maxcut_grb_1s = load_baseline_outputs(root_folder / 'baseline_runs', '230823_gurobi_1s', 'gurobi', indices)
print("loading gurobi 5s")
maxcut_grb_5s = load_baseline_outputs(root_folder / 'baseline_runs', '230823_gurobi_5s', 'gurobi', indices)

loading model losses
load_train_outputs: got GatedGCNN, PROTEINS
load_train_outputs: got GAT, MUTAG
load_train_outputs: got GCNN, RANDOM
load_train_outputs: got GCNN, ENZYMES
load_train_outputs: got GCNN, IMDB-BINARY
load_train_outputs: got GatedGCNN, RANDOM
load_train_outputs: got GAT, IMDB-BINARY
load_train_outputs: got LiftMP, COLLAB
load_train_outputs: got GCNN, COLLAB
load_train_outputs: got GIN, IMDB-BINARY
load_train_outputs: got GIN, PROTEINS
load_train_outputs: got GatedGCNN, IMDB-BINARY
load_train_outputs: got GAT, PROTEINS
load_train_outputs: got LiftMP, RANDOM
load_train_outputs: got LiftMP, ENZYMES
load_train_outputs: got GatedGCNN, MUTAG
load_train_outputs: got GIN, COLLAB
load_train_outputs: got GAT, ENZYMES
load_train_outputs: got LiftMP, IMDB-BINARY
load_train_outputs: got GIN, MUTAG
load_train_outputs: got GCNN, PROTEINS
load_train_outputs: got GIN, ENZYMES
load_train_outputs: got LiftMP, MUTAG
load_train_outputs: got LiftMP, PROTEINS
load_train_outputs: got GAT, COLL

In [40]:
# putting max cut results in a table

dataset_names = ['RANDOM', 'ENZYMES', 'PROTEINS', 'IMDB-BINARY', 'MUTAG', 'COLLAB']
maxcut_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in maxcut_models.items():
    maxcut_scores.at[model, dataset] = valid_scores[-1]

for dataset, score in maxcut_sdp_lift.items():
    maxcut_scores.at['SDP lift', dataset] = score

for dataset, score in maxcut_sdp_proj.items():
    maxcut_scores.at['SDP proj', dataset] = score

for dataset, score in maxcut_grb_1s.items():
    maxcut_scores.at['gurobi 1s', dataset] = score

for dataset, score in maxcut_grb_5s.items():
    maxcut_scores.at['gurobi 5s', dataset] = score

for dataset in dataset_names:
    edges = 0
    count = 0
    for example in datasets[dataset]:
        edges += example.edge_index.shape[1]
        count += 1
    maxcut_scores.at['edge count', dataset] = float(edges) / count

#maxcut_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

In [41]:
# loading vertex cover results from files

print("loading model losses")
vc_models = load_train_outputs(root_folder / 'training_runs', '230901_VC')
print("loading sdp lift losses")
vc_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp', indices)
print("loading sdp proj losses")
vc_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp|random_hyperplane', indices)
print("loading gurobi 1s")
vc_grb_1s = load_baseline_outputs(root_folder / 'baseline_runs', '230902_gurobi_1s', 'gurobi', indices)
print("loading gurobi 5s")
vc_grb_5s = load_baseline_outputs(root_folder / 'baseline_runs', '230902_gurobi_5s', 'gurobi', indices)

loading model losses
load_train_outputs: got GAT, PROTEINS
load_train_outputs: got GCNN, PROTEINS
load_train_outputs: got GCNN, RANDOM
load_train_outputs: got NegationGAT, COLLAB
load_train_outputs: got GIN, COLLAB
load_train_outputs: got GatedGCNN, PROTEINS
load_train_outputs: got GatedGCNN, COLLAB
load_train_outputs: got GCNN, COLLAB
load_train_outputs: got GIN, RANDOM
load_train_outputs: got GCNN, MUTAG
load_train_outputs: got GIN, PROTEINS
load_train_outputs: got NegationGAT, MUTAG
load_train_outputs: got NegationGAT, RANDOM
load_train_outputs: got GatedGCNN, RANDOM
load_train_outputs: got LiftMP, ENZYMES
load_train_outputs: got GIN, MUTAG
load_train_outputs: got GAT, RANDOM
load_train_outputs: got GIN, ENZYMES
load_train_outputs: got NegationGAT, PROTEINS
load_train_outputs: got GatedGCNN, MUTAG
load_train_outputs: got LiftMP, MUTAG
load_train_outputs: got NegationGAT, IMDB-BINARY
load_train_outputs: got GCNN, ENZYMES
load_train_outputs: got LiftMP, PROTEINS
load_train_outputs: go

In [42]:
# putting vertex cover results in a table

dataset_names = ['RANDOM', 'ENZYMES', 'PROTEINS', 'IMDB-BINARY', 'MUTAG', 'COLLAB']
vc_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in vc_models.items():
    vc_scores.at[model, dataset] = valid_scores[-1]

for dataset, score in vc_sdp_lift.items():
    vc_scores.at['SDP lift', dataset] = score

for dataset, score in vc_sdp_proj.items():
    vc_scores.at['SDP proj', dataset] = score

for dataset, score in vc_grb_1s.items():
    vc_scores.at['gurobi 1s', dataset] = score

for dataset, score in vc_grb_5s.items():
    vc_scores.at['gurobi 5s', dataset] = score

for dataset in dataset_names:
    nodes = 0
    count = 0
    for example in datasets[dataset]:
        nodes += example.num_nodes
        count += 1
    vc_scores.at['vertex count', dataset] = -float(nodes) / count

#vc_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

In [43]:
maxcut_scores

Unnamed: 0,RANDOM,ENZYMES,PROTEINS,IMDB-BINARY,MUTAG,COLLAB
GatedGCNN,867.6525,78.879167,92.90583,116.0275,28.855263,1950.527
GAT,870.2025,76.241667,93.887892,73.245,29.605263,2504.0655
GCNN,824.1825,45.641667,72.484305,63.69,20.0,2002.211
LiftMP,871.0525,79.179167,93.049327,116.3825,28.855263,2704.1985
GIN,800.5025,65.5,88.040359,68.495,21.368421,2240.236
SDP lift,898.268377,79.682816,102.596135,91.581074,29.823215,2534.011559
SDP proj,873.7725,78.608333,101.049327,91.225,29.657895,2530.548
gurobi 1s,875.8325,78.608333,101.134529,91.225,29.657895,2530.888
gurobi 5s,875.9925,78.608333,101.134529,91.225,29.657895,2530.924
edge count,1485.718,124.273333,145.631626,193.062,39.585106,4914.4316


In [44]:
vc_scores

Unnamed: 0,RANDOM,ENZYMES,PROTEINS,IMDB-BINARY,MUTAG,COLLAB
GAT,-81.305,-22.683333,-30.587444,-18.28,-8.710526,-69.898
GCNN,-94.525,-32.8,-52.950673,-18.625,-17.184211,-72.944
NegationGAT,-97.6,-28.241667,-34.64574,-19.055,-13.921053,-73.959
GIN,-151.84,-37.941667,-60.941704,-18.4,-10.131579,-72.371
GatedGCNN,-85.985,-20.291667,-26.336323,-18.48,-8.236842,-74.237
LiftMP,-95.225,-19.458333,-25.690583,-18.455,-8.236842,-69.621
SDP lift,-73.377361,-19.409491,-25.036219,-16.087033,-8.237257,-56.185211
SDP proj,-81.11,-19.458333,-25.125561,-16.1,-8.236842,-56.363208
gurobi 1s,-75.935,,-25.076233,-16.09,-8.236842,-66.454
gurobi 5s,-75.935,,-25.076233,-16.09,-8.236842,-66.454


In [20]:
# get maxcut, vc scores normalized by gurobi 5s count
maxcut_norms = maxcut_scores.copy()
for dataset in dataset_names:
    maxcut_norms[dataset] = maxcut_scores[dataset] / maxcut_scores.at['gurobi 5s', dataset]
vc_norms = vc_scores.copy()
for dataset in dataset_names:
    vc_norms[dataset] = vc_scores[dataset] / vc_scores.at['gurobi 5s', dataset]

In [21]:
maxcut_norms

Unnamed: 0,RANDOM,ENZYMES,PROTEINS,IMDB-BINARY,MUTAG,COLLAB
GatedGCNN,0.992418,0.977316,0.977171,1.124336,0.98391,0.772688
GAT,0.995334,0.944637,0.9875,0.709762,1.009484,0.991968
GCNN,0.942697,0.565502,0.76238,0.617172,0.681962,0.793162
LiftMP,0.996307,0.981033,0.97868,1.127776,0.98391,1.07125
GIN,0.915612,0.811548,0.925996,0.663734,0.728623,0.887454
SDP lift,1.025486,1.013468,1.014488,1.003483,1.005993,1.001136
SDP proj,0.997462,0.999835,0.999102,0.999971,1.0,0.999847
gurobi 1s,0.999823,1.0,1.0,0.99999,1.0,0.999981
gurobi 5s,1.0,1.0,1.0,1.0,1.0,1.0
edge count,1.699359,1.539751,1.531733,1.870819,1.349778,1.946818


In [22]:
vc_norms

Unnamed: 0,RANDOM,ENZYMES,PROTEINS,IMDB-BINARY,MUTAG,COLLAB
GAT,1.070845,,1.29454,1.100343,1.066132,1.068845
GCNN,1.244962,,2.241011,1.12111,2.103276,1.115423
NegationGAT,1.285462,,1.466298,1.146993,1.703879,1.130944
GIN,1.999842,,2.579212,1.107566,1.240063,1.106661
GatedGCNN,1.132484,,1.114622,1.112382,1.008155,1.135195
LiftMP,1.254182,,1.087293,1.110877,1.008155,1.06461
SDP lift,0.966092,,0.998615,0.999818,0.997438,0.831418
SDP proj,1.070779,,1.002662,1.001023,1.0,0.834081
gurobi 1s,1.0,,1.0,1.0,1.0,1.0
gurobi 5s,1.0,,1.0,1.0,1.0,1.0
