In [1]:
%load_ext autoreload
%autoreload 2

import os
import json
import pandas as pd
import numpy as np
import sys
from pathlib import Path

root_folder = Path('/home/bcjexu/maxcut-80/bespoke-gnn4do/')
sys.path.insert(0, str(root_folder))

from utils.tabulate import load_datasets, load_train_outputs, load_baseline_outputs

In [2]:
baseline_folder = 'baseline_runs/230927_snapshot'


In [3]:
# load datasets and calculate validation slices
import torch

datasets = load_datasets()
indices = {}
for name, dataset in datasets.items():
    torch.manual_seed(0)
    print(f"{name} dataset size: {len(dataset)}")
    train_size = int(0.8 * len(dataset))
    val_size = (len(dataset) - train_size)//2
    test_size = len(dataset) - train_size - val_size
    _, _, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
    indices[name] = test_dataset.indices

dataset_names = list(datasets.keys())
#dataset_names.append('RANDOM')
#dataset_names.append('RANDOM 500')
dataset_names.append('ForcedRB')

loading PROTEINS
loading ENZYMES
loading COLLAB
loading IMDB-BINARY
loading MUTAG
PROTEINS dataset size: 1113
ENZYMES dataset size: 600
COLLAB dataset size: 5000
IMDB-BINARY dataset size: 1000
MUTAG dataset size: 188


In [4]:
# VC loading

# loading vertex cover results from files

print("loading model losses")
vc_models = load_train_outputs(root_folder / 'training_runs', '230913_VC') # 230901_VC for without PE
print("loading more model losses")
vc_models_2 = load_train_outputs(root_folder / 'training_runs', '230914_VC')
vc_models.update(vc_models_2)
print("loading model losses, forcedRB")
vc_models_forcedrb = load_train_outputs(root_folder / 'training_runs', '230910_VC_forcedrb')

print("loading sdp lift losses")
vc_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp', indices)
print("loading sdp proj losses")
vc_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230902_VC_sdp', 'sdp|random_hyperplane', indices)

print("loading SDP ForcedRB")
vc_sdp_forcedrb_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230914_VC_sdp_ForcedRB', 'sdp', indices)
vc_sdp_forcedrb_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230914_VC_sdp_ForcedRB', 'sdp|random_hyperplane', indices)

loading model losses
loading more model losses
loading model losses, forcedRB
loading sdp lift losses
load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES sdp: -19.91958220601082
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp: -73.35152729797363
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG sdp: -7.7372643822117855
load_baseline_outputs: COLLAB length: 104
load_baseline_outputs: COLLAB sdp: -56.45677478496845
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY sdp: -16.756892976760863
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS sdp: -24.925131029316358
loading sdp proj losses
load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES sdp|random_hyperplane: -20.0
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp|random_hyperplane: -81.3
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG sdp|rando

In [5]:
# read in files

vc_scores = pd.DataFrame()

for model_folder in os.listdir(os.path.join(root_folder, baseline_folder)):
    with open(os.path.join(os.path.join(root_folder, baseline_folder, model_folder), 'params.txt'), 'r') as f:
        model_args = json.load(f)
    if model_args['problem_type'] != 'vertex_cover':
        continue
    
    #print(model_args['gurobi'], model_args['gurobi_timeout'], model_args['dataset'], model_args['gen_n'])
    #print(load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'gurobi', indices))
    row = f'gurobi_{model_args["gurobi_timeout"]}'
    if isinstance(model_args['gen_n'], list):
        col = f"{model_args['dataset']}@@{model_args['gen_n'][0]}"
    else:
        col = f"{model_args['dataset']}"
    print(row,col, Path(os.path.join(root_folder, baseline_folder)), model_folder)

    val = load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'gurobi')
    if len(val) != 1:
        continue
    
    vc_scores.at[ row , col] = list(val.values())[0]
    #vc_scores.at[ row , 'gen_n'] = model_args['gen_n']
    #vc_scores.at[ row , ''] = model_args['gen_n']

gurobi_4.0 WattsStrogatz@@100 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230927_snapshot 230925_gurobi_generated_VC_paramhash:212b31d6ebe8564dc1adae2c7e7c9b0296480453fcac0986f1b4ae0a48376386
load_baseline_outputs: WattsStrogatz length: 1000
load_baseline_outputs: WattsStrogatz gurobi: -89.798
gurobi_4.0 ErdosRenyi@@100 /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230927_snapshot 230925_gurobi_generated_VC_paramhash:29ac124575fd9ac93e52f0fab2f94c2fb29dc8cbb5863ef17fc52f62625fc295
load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi gurobi: -122.771
gurobi_4.0 IMDB-BINARY /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230927_snapshot 230925_gurobi_TU_VC_paramhash:7511e404230dc72a137067047a09e08be01ebb7df76bc75e500aa8c4d45b7e43
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY gurobi: -16.76
gurobi_2.0 REDDIT-MULTI-12K /home/bcjexu/maxcut-80/bespoke-gnn4do/baseline_runs/230927_snapshot 230925_gurobi_TU_VC_par

In [6]:
# putting vertex cover results in a table
#vc_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in vc_models.items():
    vc_scores.at[model, dataset] = np.max(valid_scores)

for (model, dataset), (train_losses, valid_scores) in vc_models_forcedrb.items():
    print(dataset)
    vc_scores.at[model, dataset] = np.max(valid_scores)

for dataset, score in vc_sdp_lift.items():
    vc_scores.at['SDP lift', dataset] = score

for dataset, score in vc_sdp_proj.items():
    vc_scores.at['SDP proj', dataset] = score

for dataset, score in vc_sdp_forcedrb_lift.items():
    vc_scores.at['SDP lift', 'ForcedRB'] = score
for dataset, score in vc_sdp_forcedrb_proj.items():
    vc_scores.at['SDP proj', 'ForcedRB'] = score

for dataset in dataset_names:
    nodes = 0
    count = 0
    if dataset not in datasets:
        continue
    for example in datasets[dataset]:
        nodes += example.num_nodes
        count += 1
    vc_scores.at['vertex count', dataset] = -float(nodes) / count

#vc_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

In [7]:
vc_scores

Unnamed: 0,WattsStrogatz@@100,ErdosRenyi@@100,IMDB-BINARY,REDDIT-MULTI-12K,REDDIT-BINARY,BarabasiAlbert@@400,BarabasiAlbert@@100,PowerlawCluster@@400,WattsStrogatz@@400,BarabasiAlbert@@50,...,MUTAG,WattsStrogatz@@50,REDDIT-MULTI-5K,PowerlawCluster@@100,PROTEINS,COLLAB,ENZYMES,PowerlawCluster@@50,RANDOM,ForcedRB
gurobi_4.0,-89.798,-122.771,-16.76,-90.184409,-77.56,-246.465,-83.189,-247.568,-269.394,-42.818,...,-7.736842,-45.737,-114.062,-84.069,-24.964286,-67.462,-20.0,-42.977,,
gurobi_2.0,-89.798,-122.861,-16.76,-90.184409,-77.56,-246.465,-83.189,-247.568,-269.394,-42.818,...,-7.736842,-45.737,-114.062,-84.069,-24.964286,-67.462,-20.0,-42.977,,
gurobi_8.0,-89.798,-122.704,-16.76,-90.184409,-77.56,-246.465,-83.189,-247.568,-269.394,-42.818,...,-7.736842,-45.737,-114.062,-84.069,-24.964286,-67.462,-20.0,-42.977,,
SDP lift,,,-16.756893,,,,,,,,...,-7.737264,,,,-24.925131,-56.456775,-19.919582,,-73.351527,-196.419424
SDP proj,,,-16.78,,,,,,,,...,-7.736842,,,,-24.973214,-56.673077,-20.0,,-81.3,-197.206
vertex count,,,-19.773,,,,,,,,...,-17.930851,,,,-39.057502,-74.4948,-32.633333,,,


In [8]:
vc_scores.to_csv('/home/bcjexu/maxcut-80/bespoke-gnn4do/analysis_ipynb/vc_baseline_scores.csv')
vc_scores

Unnamed: 0,WattsStrogatz@@100,ErdosRenyi@@100,IMDB-BINARY,REDDIT-MULTI-12K,REDDIT-BINARY,BarabasiAlbert@@400,BarabasiAlbert@@100,PowerlawCluster@@400,WattsStrogatz@@400,BarabasiAlbert@@50,...,MUTAG,WattsStrogatz@@50,REDDIT-MULTI-5K,PowerlawCluster@@100,PROTEINS,COLLAB,ENZYMES,PowerlawCluster@@50,RANDOM,ForcedRB
gurobi_4.0,-89.798,-122.771,-16.76,-90.184409,-77.56,-246.465,-83.189,-247.568,-269.394,-42.818,...,-7.736842,-45.737,-114.062,-84.069,-24.964286,-67.462,-20.0,-42.977,,
gurobi_2.0,-89.798,-122.861,-16.76,-90.184409,-77.56,-246.465,-83.189,-247.568,-269.394,-42.818,...,-7.736842,-45.737,-114.062,-84.069,-24.964286,-67.462,-20.0,-42.977,,
gurobi_8.0,-89.798,-122.704,-16.76,-90.184409,-77.56,-246.465,-83.189,-247.568,-269.394,-42.818,...,-7.736842,-45.737,-114.062,-84.069,-24.964286,-67.462,-20.0,-42.977,,
SDP lift,,,-16.756893,,,,,,,,...,-7.737264,,,,-24.925131,-56.456775,-19.919582,,-73.351527,-196.419424
SDP proj,,,-16.78,,,,,,,,...,-7.736842,,,,-24.973214,-56.673077,-20.0,,-81.3,-197.206
vertex count,,,-19.773,,,,,,,,...,-17.930851,,,,-39.057502,-74.4948,-32.633333,,,
