In [1]:
%load_ext autoreload
%autoreload 2

import os
import json
import pandas as pd
import numpy as np
import sys
from pathlib import Path

root_folder = Path('/home/bcjexu/maxcut-80/bespoke-gnn4do/')
sys.path.insert(0, str(root_folder))

from utils.tabulate import load_datasets, load_train_outputs, load_baseline_outputs

In [2]:
baseline_folders = ['baseline_runs/230927_snapshot', 'baseline_runs/230927_snapshot2']
baseline_folders = ['baseline_runs/230928_gurobi']
baseline_folders_greedy = ['baseline_runs/230928_greedy']


In [3]:
# load datasets and calculate validation slices
import torch

datasets = load_datasets()
indices = {}
for name, dataset in datasets.items():
    torch.manual_seed(0)
    print(f"{name} dataset size: {len(dataset)}")
    train_size = int(0.8 * len(dataset))
    val_size = (len(dataset) - train_size)//2
    test_size = len(dataset) - train_size - val_size
    _, _, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
    indices[name] = test_dataset.indices

dataset_names = list(datasets.keys())
#dataset_names.append('RANDOM')
#dataset_names.append('RANDOM 500')
dataset_names.append('ForcedRB')

loading PROTEINS
loading ENZYMES
loading COLLAB
loading IMDB-BINARY
loading MUTAG
PROTEINS dataset size: 1113
ENZYMES dataset size: 600
COLLAB dataset size: 5000
IMDB-BINARY dataset size: 1000
MUTAG dataset size: 188


In [4]:
# VC loading

# loading vertex cover results from files

print("loading model losses")
vc_models = load_train_outputs(root_folder / 'training_runs', '230913_VC') # 230901_VC for without PE
print("loading more model losses")
vc_models_2 = load_train_outputs(root_folder / 'training_runs', '230914_VC')
vc_models.update(vc_models_2)
print("loading model losses, forcedRB")
vc_models_forcedrb = load_train_outputs(root_folder / 'training_runs', '230910_VC_forcedrb')

print("loading sdp lift losses")
vc_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp', indices)
print("loading sdp proj losses")
vc_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp|random_hyperplane', indices)

print("loading SDP ForcedRB")
vc_sdp_forcedrb_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230914_VC_sdp_ForcedRB', 'sdp', indices)
vc_sdp_forcedrb_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230914_VC_sdp_ForcedRB', 'sdp|random_hyperplane', indices)

loading model losses
loading more model losses
loading model losses, forcedRB
loading sdp lift losses


load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES sdp: -19.91958220601082
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp: -73.35152729797363
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG sdp: -7.7372643822117855
load_baseline_outputs: COLLAB length: 104
load_baseline_outputs: COLLAB sdp: -56.45677478496845
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY sdp: -16.756892976760863
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS sdp: -24.925131029316358
loading sdp proj losses
load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES sdp|random_hyperplane: -20.0
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp|random_hyperplane: -81.3
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG sdp|random_hyperplane: -7.7368421052631575
load_baseline_outputs: COLLAB length: 104
load_baseline_outputs: COL

In [5]:
# read in files

vc_scores = pd.DataFrame()

for baseline_folder in baseline_folders:

    for model_folder in os.listdir(os.path.join(root_folder, baseline_folder)):
        with open(os.path.join(os.path.join(root_folder, baseline_folder, model_folder), 'params.txt'), 'r') as f:
            model_args = json.load(f)
        if model_args['problem_type'] != 'vertex_cover':
            continue
        
        #print(model_args['gurobi'], model_args['gurobi_timeout'], model_args['dataset'], model_args['gen_n'])
        #print(load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'gurobi', indices))
        row = f'gurobi_{model_args["gurobi_timeout"]}'
        if isinstance(model_args['gen_n'], list):
            col = f"{model_args['dataset']}@@{model_args['gen_n'][0]}"
        else:
            col = f"{model_args['dataset']}"
        print(row,col, Path(os.path.join(root_folder, baseline_folder)), model_folder)

        val = load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'gurobi')
        if len(val) != 1:
            continue
        
        vc_scores.at[ row , col] = list(val.values())[0]
        #vc_scores.at[ row , 'gen_n'] = model_args['gen_n']
        #vc_scores.at[ row , ''] = model_args['gen_n']

gurobi_4.0 PowerlawCluster@@50 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230928_gurobi 230927_gurobi_VC_paramhash:0b2b4a0a202778fccc974a970a9041158e25f6830b4462b2b7713926b5ec1f48
load_baseline_outputs: PowerlawCluster length: 1000
load_baseline_outputs: PowerlawCluster gurobi: -42.977
gurobi_2.0 ErdosRenyi@@50 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230928_gurobi 230927_gurobi_VC_paramhash:2249aca9c3b82e180263fdfc944526ea5d04adac470462ee38924add4d1dfd45
load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi gurobi: -54.672
gurobi_1.0 ErdosRenyi@@400 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230928_gurobi 230927_gurobi_VC_paramhash:2be8a8c0ef10d8553d326a8890635920ddb3da885a2f5accbdd33ccf6ab7ee32
load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi gurobi: -423.069
gurobi_0.1 ErdosRenyi@@400 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230928_gurobi 230927_gurobi_VC_paramhash:f05641cf6eb5c755ee10

load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi gurobi: -423.069
gurobi_4.0 BarabasiAlbert@@400 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230928_gurobi 230927_gurobi_VC_paramhash:511d88c20461b58431e36d6b6c2435a2d9a1f6fecedf254f4b73b5a2dd48ea0e
load_baseline_outputs: BarabasiAlbert length: 1000
load_baseline_outputs: BarabasiAlbert gurobi: -246.465
gurobi_8.0 ErdosRenyi@@400 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230928_gurobi 230927_gurobi_VC_paramhash:fb36360de9801140b25387d877052d2c21cd1d594bd7f1b2271909db6b2ebc23
load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi gurobi: -415.523
gurobi_1.0 WattsStrogatz@@50 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230928_gurobi 230927_gurobi_VC_paramhash:1c495d623b9f13dfc260c1bc94b25b96e2995ead78b7675a6c1162557e196f74
load_baseline_outputs: WattsStrogatz length: 1000
load_baseline_outputs: WattsStrogatz gurobi: -45.737
gurobi_0.5 IMDB-BINARY /home/bcjex

In [6]:
for baseline_folder in baseline_folders_greedy:
    for model_folder in os.listdir(os.path.join(root_folder, baseline_folder)):
        with open(os.path.join(os.path.join(root_folder, baseline_folder, model_folder), 'params.txt'), 'r') as f:
            model_args = json.load(f)
        if model_args['problem_type'] != 'vertex_cover':
            continue
        
        #print(model_args['gurobi'], model_args['gurobi_timeout'], model_args['dataset'], model_args['gen_n'])
        #print(load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'gurobi', indices))
        row = f'greedy'
        if isinstance(model_args['gen_n'], list):
            col = f"{model_args['dataset']}@@{model_args['gen_n'][0]}"
        else:
            col = f"{model_args['dataset']}"
        print(row,col)

        val = load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'greedy') # indices)
        
        vc_scores.at[ row , col] = list(val.values())[0]
        #vc_scores.at[ row , 'gen_n'] = model_args['gen_n']
        #vc_scores.at[ row , ''] = model_args['gen_n']

greedy IMDB-BINARY
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY greedy: -17.24
greedy REDDIT-BINARY
load_baseline_outputs: REDDIT-BINARY length: 100
load_baseline_outputs: REDDIT-BINARY greedy: -117.16
greedy MUTAG
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG greedy: -12.842105263157896
greedy REDDIT-MULTI-12K
load_baseline_outputs: REDDIT-MULTI-12K length: 100
load_baseline_outputs: REDDIT-MULTI-12K greedy: -115.72
greedy WattsStrogatz@@400
load_baseline_outputs: WattsStrogatz length: 1000
load_baseline_outputs: WattsStrogatz greedy: -434.52
greedy ErdosRenyi@@100
load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi greedy: -143.513
greedy REDDIT-MULTI-5K
load_baseline_outputs: REDDIT-MULTI-5K length: 100
load_baseline_outputs: REDDIT-MULTI-5K greedy: -153.24
greedy WattsStrogatz@@100
load_baseline_outputs: WattsStrogatz length: 1000
load_baseline_outputs: WattsStrogatz greedy: -143.698
greedy Wat

In [7]:
# putting vertex cover results in a table
#vc_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in vc_models.items():
    vc_scores.at[model, dataset] = np.max(valid_scores)

for (model, dataset), (train_losses, valid_scores) in vc_models_forcedrb.items():
    print(dataset)
    vc_scores.at[model, dataset] = np.max(valid_scores)

for dataset, score in vc_sdp_lift.items():
    vc_scores.at['SDP lift', dataset] = score

for dataset, score in vc_sdp_proj.items():
    vc_scores.at['SDP proj', dataset] = score

for dataset, score in vc_sdp_forcedrb_lift.items():
    vc_scores.at['SDP lift', 'ForcedRB'] = score
for dataset, score in vc_sdp_forcedrb_proj.items():
    vc_scores.at['SDP proj', 'ForcedRB'] = score

for dataset in dataset_names:
    nodes = 0
    count = 0
    if dataset not in datasets:
        continue
    for example in datasets[dataset]:
        nodes += example.num_nodes
        count += 1
    vc_scores.at['vertex count', dataset] = -float(nodes) / count

#vc_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

In [8]:
vc_scores.sort_index()

Unnamed: 0,PowerlawCluster@@50,ErdosRenyi@@50,ErdosRenyi@@400,IMDB-BINARY,WattsStrogatz@@100,ErdosRenyi@@100,REDDIT-MULTI-5K,BarabasiAlbert@@100,BarabasiAlbert@@400,WattsStrogatz@@50,...,REDDIT-MULTI-12K,MUTAG,BarabasiAlbert@@50,COLLAB,PowerlawCluster@@100,PROTEINS,ENZYMES,WattsStrogatz@@400,RANDOM,ForcedRB
SDP lift,,,,-16.756893,,,,,,,...,,-7.737264,,-56.456775,,-24.925131,-19.919582,,-73.351527,-196.419424
SDP proj,,,,-16.78,,,,,,,...,,-7.736842,,-56.673077,,-24.973214,-20.0,,-81.3,-197.206
greedy,-51.38,-68.847,-442.838,-17.24,-143.698,-143.513,-153.24,-101.417,-302.528,-72.551,...,-115.72,-12.842105,-51.919,-71.742,-100.868,-33.928571,-27.35,-434.52,,
gurobi_0.1,-42.977,-55.062,-423.069,-16.76,-89.798,-127.83,-108.73,-83.192,-256.333,-45.737,...,-81.57,-7.736842,-42.818,-67.466,-84.069,-24.964286,-20.0,-269.582,,
gurobi_0.5,-42.977,-54.679,-423.069,-16.76,-89.798,-124.836,-107.84,-83.189,-246.558,-45.737,...,-81.57,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
gurobi_1.0,-42.977,-54.672,-423.069,-16.76,-89.798,-123.47,-107.32,-83.189,-246.487,-45.737,...,-81.52,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
gurobi_2.0,-42.977,-54.672,-419.997,-16.76,-89.798,-122.982,-107.32,-83.189,-246.466,-45.737,...,-81.52,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
gurobi_4.0,-42.977,-54.672,-417.029,-16.76,-89.798,-122.858,-107.32,-83.189,-246.465,-45.737,...,-81.52,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
gurobi_8.0,-42.977,-54.672,-415.523,-16.76,-89.798,-122.759,-107.32,-83.189,-246.465,-45.737,...,-81.52,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
vertex count,,,,-19.773,,,,,,,...,,-17.930851,,-74.4948,,-39.057502,-32.633333,,,


In [9]:
vc_scores.to_csv('/home/bcjexu/maxcut-80/bespoke-gnn4do/analysis_ipynb/vc_baseline_scores.csv')
vc_scores

Unnamed: 0,PowerlawCluster@@50,ErdosRenyi@@50,ErdosRenyi@@400,IMDB-BINARY,WattsStrogatz@@100,ErdosRenyi@@100,REDDIT-MULTI-5K,BarabasiAlbert@@100,BarabasiAlbert@@400,WattsStrogatz@@50,...,REDDIT-MULTI-12K,MUTAG,BarabasiAlbert@@50,COLLAB,PowerlawCluster@@100,PROTEINS,ENZYMES,WattsStrogatz@@400,RANDOM,ForcedRB
gurobi_4.0,-42.977,-54.672,-417.029,-16.76,-89.798,-122.858,-107.32,-83.189,-246.465,-45.737,...,-81.52,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
gurobi_2.0,-42.977,-54.672,-419.997,-16.76,-89.798,-122.982,-107.32,-83.189,-246.466,-45.737,...,-81.52,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
gurobi_1.0,-42.977,-54.672,-423.069,-16.76,-89.798,-123.47,-107.32,-83.189,-246.487,-45.737,...,-81.52,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
gurobi_0.1,-42.977,-55.062,-423.069,-16.76,-89.798,-127.83,-108.73,-83.192,-256.333,-45.737,...,-81.57,-7.736842,-42.818,-67.466,-84.069,-24.964286,-20.0,-269.582,,
gurobi_0.5,-42.977,-54.679,-423.069,-16.76,-89.798,-124.836,-107.84,-83.189,-246.558,-45.737,...,-81.57,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
gurobi_8.0,-42.977,-54.672,-415.523,-16.76,-89.798,-122.759,-107.32,-83.189,-246.465,-45.737,...,-81.52,-7.736842,-42.818,-67.462,-84.069,-24.964286,-20.0,-269.394,,
greedy,-51.38,-68.847,-442.838,-17.24,-143.698,-143.513,-153.24,-101.417,-302.528,-72.551,...,-115.72,-12.842105,-51.919,-71.742,-100.868,-33.928571,-27.35,-434.52,,
SDP lift,,,,-16.756893,,,,,,,...,,-7.737264,,-56.456775,,-24.925131,-19.919582,,-73.351527,-196.419424
SDP proj,,,,-16.78,,,,,,,...,,-7.736842,,-56.673077,,-24.973214,-20.0,,-81.3,-197.206
vertex count,,,,-19.773,,,,,,,...,,-17.930851,,-74.4948,,-39.057502,-32.633333,,,
