In [1]:
%load_ext autoreload
%autoreload 2

import os
import json
import pandas as pd
import numpy as np
import sys
from pathlib import Path

root_folder = Path('/home/bcjexu/maxcut-80/bespoke-gnn4do/')
sys.path.insert(0, str(root_folder))

from utils.tabulate import load_datasets, load_train_outputs, load_baseline_outputs

In [2]:
baseline_folders = ['baseline_runs/230927_snapshot', 'baseline_runs/230927_snapshot2']
baseline_folders = ['baseline_runs/230928_gurobi']
baseline_folders_greedy = ['baseline_runs/230928_greedy']

In [3]:
# load datasets and calculate validation slices
import torch

datasets = load_datasets()
indices = {}
for name, dataset in datasets.items():
    torch.manual_seed(0)
    print(f"{name} dataset size: {len(dataset)}")
    train_size = int(0.8 * len(dataset))
    val_size = (len(dataset) - train_size)//2
    test_size = len(dataset) - train_size - val_size
    _, _, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
    indices[name] = test_dataset.indices

dataset_names = list(datasets.keys())
#dataset_names.append('RANDOM')
#dataset_names.append('RANDOM 500')
dataset_names.append('ForcedRB')

loading PROTEINS
loading ENZYMES
loading COLLAB
loading IMDB-BINARY
loading MUTAG
PROTEINS dataset size: 1113
ENZYMES dataset size: 600
COLLAB dataset size: 5000
IMDB-BINARY dataset size: 1000
MUTAG dataset size: 188


In [4]:
# loading max cut results from files

print("loading model losses")
maxcut_models = load_train_outputs(root_folder / 'training_runs', '230823_test')
print("loading sdp lift losses")
maxcut_sdp_lift = load_baseline_outputs(root_folder / 'baseline_runs', '230824_sdp', 'sdp', indices)
print("loading sdp proj losses")
maxcut_sdp_proj = load_baseline_outputs(root_folder / 'baseline_runs', '230824_sdp', 'sdp|random_hyperplane', indices)

loading model losses
loading sdp lift losses


load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY sdp: 97.83850940704346
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp: 896.5637869873046
load_baseline_outputs: COLLAB length: 500
load_baseline_outputs: COLLAB sdp: 2627.7631793823243
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS sdp: 103.8618523819106
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG sdp: 28.101939753482217
load_baseline_outputs: ENZYMES length: 60
load_baseline_outputs: ENZYMES sdp: 82.51778809229533
loading sdp proj losses
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY sdp|random_hyperplane: 97.495
load_baseline_outputs: RANDOM length: 1000
load_baseline_outputs: RANDOM sdp|random_hyperplane: 872.0625
load_baseline_outputs: COLLAB length: 500
load_baseline_outputs: COLLAB sdp|random_hyperplane: 2624.261
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PRO

In [5]:
# read in files

vc_scores = pd.DataFrame()

for baseline_folder in baseline_folders:
    for model_folder in os.listdir(os.path.join(root_folder, baseline_folder)):
        with open(os.path.join(os.path.join(root_folder, baseline_folder, model_folder), 'params.txt'), 'r') as f:
            model_args = json.load(f)
        if model_args['problem_type'] != 'max_cut':
            continue
        
        #print(model_args['gurobi'], model_args['gurobi_timeout'], model_args['dataset'], model_args['gen_n'])
        #print(load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'gurobi', indices))
        row = f'gurobi_{model_args["gurobi_timeout"]}'
        if isinstance(model_args['gen_n'], list):
            col = f"{model_args['dataset']}@@{model_args['gen_n'][0]}"
        else:
            col = f"{model_args['dataset']}"
        print(row,col)

        val = load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'gurobi') # indices)
        
        vc_scores.at[ row , col] = list(val.values())[0]
        #vc_scores.at[ row , 'gen_n'] = model_args['gen_n']
        #vc_scores.at[ row , ''] = model_args['gen_n']

gurobi_2.0 PROTEINS
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS gurobi: 102.36160714285714
gurobi_8.0 COLLAB
load_baseline_outputs: COLLAB length: 500
load_baseline_outputs: COLLAB gurobi: 2624.617
gurobi_0.1 WattsStrogatz@@50
load_baseline_outputs: WattsStrogatz length: 1000
load_baseline_outputs: WattsStrogatz gurobi: 198.744
gurobi_0.1 REDDIT-BINARY
load_baseline_outputs: REDDIT-BINARY length: 100
load_baseline_outputs: REDDIT-BINARY gurobi: 693.015
gurobi_4.0 ErdosRenyi@@400
load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi gurobi: 16495.059
gurobi_8.0 PROTEINS
load_baseline_outputs: PROTEINS length: 112
load_baseline_outputs: PROTEINS gurobi: 102.36160714285714
gurobi_4.0 PowerlawCluster@@100
load_baseline_outputs: PowerlawCluster length: 1000
load_baseline_outputs: PowerlawCluster gurobi: 712.6245
gurobi_0.5 PowerlawCluster@@400
load_baseline_outputs: PowerlawCluster length: 1000
load_baseline_outputs: PowerlawCluster

In [6]:
for baseline_folder in baseline_folders_greedy:
    for model_folder in os.listdir(os.path.join(root_folder, baseline_folder)):
        with open(os.path.join(os.path.join(root_folder, baseline_folder, model_folder), 'params.txt'), 'r') as f:
            model_args = json.load(f)
        if model_args['problem_type'] != 'max_cut':
            continue
        
        #print(model_args['gurobi'], model_args['gurobi_timeout'], model_args['dataset'], model_args['gen_n'])
        #print(load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'gurobi', indices))
        row = f'greedy'
        if isinstance(model_args['gen_n'], list):
            col = f"{model_args['dataset']}@@{model_args['gen_n'][0]}"
        else:
            col = f"{model_args['dataset']}"
        print(row,col)

        val = load_baseline_outputs(Path(os.path.join(root_folder, baseline_folder)), model_folder, 'greedy') # indices)
        
        vc_scores.at[ row , col] = list(val.values())[0]
        #vc_scores.at[ row , 'gen_n'] = model_args['gen_n']
        #vc_scores.at[ row , ''] = model_args['gen_n']

greedy ErdosRenyi@@100
load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi greedy: 1097.259
greedy IMDB-BINARY
load_baseline_outputs: IMDB-BINARY length: 100
load_baseline_outputs: IMDB-BINARY greedy: 51.85
greedy REDDIT-MULTI-5K
load_baseline_outputs: REDDIT-MULTI-5K length: 100
load_baseline_outputs: REDDIT-MULTI-5K greedy: 495.02
greedy MUTAG
load_baseline_outputs: MUTAG length: 19
load_baseline_outputs: MUTAG greedy: 16.94736842105263
greedy WattsStrogatz@@50
load_baseline_outputs: WattsStrogatz length: 1000
load_baseline_outputs: WattsStrogatz greedy: 116.65
greedy REDDIT-BINARY
load_baseline_outputs: REDDIT-BINARY length: 100
load_baseline_outputs: REDDIT-BINARY greedy: 439.79
greedy PowerlawCluster@@100
load_baseline_outputs: PowerlawCluster length: 1000
load_baseline_outputs: PowerlawCluster greedy: 402.539
greedy ErdosRenyi@@50
load_baseline_outputs: ErdosRenyi length: 1000
load_baseline_outputs: ErdosRenyi greedy: 298.546
greedy BarabasiAlbert@@10

In [7]:
# putting max cut results in a table
maxcut_scores = pd.DataFrame(columns=dataset_names)

for (model, dataset), (train_losses, valid_scores) in maxcut_models.items():
    vc_scores.at[model, dataset] = np.max(valid_scores)
for (model, dataset), (train_losses, valid_scores) in maxcut_models.items():
    print(f"{model} {dataset} loss: {train_losses[-1]}")

for dataset, score in maxcut_sdp_lift.items():
    vc_scores.at['SDP lift', dataset] = score

for dataset, score in maxcut_sdp_proj.items():
    vc_scores.at['SDP proj', dataset] = score

for dataset in dataset_names:
    edges = 0
    count = 0
    if dataset not in datasets:
        continue
    for example in datasets[dataset]:
        edges += example.edge_index.shape[1]
        count += 1
    vc_scores.at['edge count', dataset] = float(edges) / count

#maxcut_scores.style.apply(lambda col: ['font-weight:bold' if x==col.max() else '' for x in col])

In [8]:
vc_scores

Unnamed: 0,PROTEINS,COLLAB,WattsStrogatz@@50,REDDIT-BINARY,ErdosRenyi@@400,PowerlawCluster@@100,PowerlawCluster@@400,ENZYMES,WattsStrogatz@@100,WattsStrogatz@@400,...,ErdosRenyi@@50,ErdosRenyi@@100,BarabasiAlbert@@50,PowerlawCluster@@50,BarabasiAlbert@@100,REDDIT-MULTI-5K,IMDB-BINARY,REDDIT-MULTI-12K,BarabasiAlbert@@400,RANDOM
gurobi_2.0,102.361607,2624.595,198.745,694.135,16494.352,712.4625,2169.86,81.45,392.068,1177.892,...,530.076,2002.8825,352.121,346.424,719.803,787.915,97.495,568.91,2208.624,
gurobi_8.0,102.361607,2624.617,198.745,694.135,16495.307,712.8815,2173.876,81.45,392.068,1179.861,...,530.163,2002.9315,352.121,346.424,720.172,787.915,97.495,568.94,2212.489,
gurobi_0.1,102.28125,2624.321,198.744,693.015,16476.722,711.6795,2169.456,81.45,390.957,1173.45,...,529.931,2002.8815,351.872,346.177,719.406,785.445,97.495,567.71,2208.111,
gurobi_4.0,102.361607,2624.607,198.745,694.135,16495.059,712.6245,2173.765,81.45,392.068,1178.477,...,530.111,2002.8975,352.121,346.424,719.891,787.915,97.495,568.92,2212.424,
gurobi_0.5,102.361607,2624.511,198.745,694.075,16481.573,711.8785,2169.456,81.45,392.058,1173.505,...,529.977,2002.8815,352.105,346.423,719.507,787.135,97.495,568.76,2208.111,
gurobi_1.0,102.361607,2624.569,198.745,694.105,16491.6,712.2625,2169.458,81.45,392.067,1175.974,...,530.026,2002.8815,352.116,346.423,719.718,787.485,97.495,568.91,2208.112,
greedy,60.741071,1345.7,116.65,439.79,8622.336066,402.539,1230.981,48.533333,229.433,690.189,...,298.546,1097.259,200.097,196.234,407.981,495.02,51.85,358.4,1255.219,
SDP lift,103.861852,2627.763179,,,,,,82.517788,,,...,,,,,,,97.838509,,,896.563787
SDP proj,102.299107,2624.261,,,,,,81.45,,,...,,,,,,,97.495,,,872.0625
edge count,145.631626,4914.4316,,,,,,124.273333,,,...,,,,,,,193.062,,,


In [9]:
vc_scores.to_csv('/home/bcjexu/maxcut-80/bespoke-gnn4do/analysis_ipynb/mc_baseline_scores.csv')
vc_scores

Unnamed: 0,PROTEINS,COLLAB,WattsStrogatz@@50,REDDIT-BINARY,ErdosRenyi@@400,PowerlawCluster@@100,PowerlawCluster@@400,ENZYMES,WattsStrogatz@@100,WattsStrogatz@@400,...,ErdosRenyi@@50,ErdosRenyi@@100,BarabasiAlbert@@50,PowerlawCluster@@50,BarabasiAlbert@@100,REDDIT-MULTI-5K,IMDB-BINARY,REDDIT-MULTI-12K,BarabasiAlbert@@400,RANDOM
gurobi_2.0,102.361607,2624.595,198.745,694.135,16494.352,712.4625,2169.86,81.45,392.068,1177.892,...,530.076,2002.8825,352.121,346.424,719.803,787.915,97.495,568.91,2208.624,
gurobi_8.0,102.361607,2624.617,198.745,694.135,16495.307,712.8815,2173.876,81.45,392.068,1179.861,...,530.163,2002.9315,352.121,346.424,720.172,787.915,97.495,568.94,2212.489,
gurobi_0.1,102.28125,2624.321,198.744,693.015,16476.722,711.6795,2169.456,81.45,390.957,1173.45,...,529.931,2002.8815,351.872,346.177,719.406,785.445,97.495,567.71,2208.111,
gurobi_4.0,102.361607,2624.607,198.745,694.135,16495.059,712.6245,2173.765,81.45,392.068,1178.477,...,530.111,2002.8975,352.121,346.424,719.891,787.915,97.495,568.92,2212.424,
gurobi_0.5,102.361607,2624.511,198.745,694.075,16481.573,711.8785,2169.456,81.45,392.058,1173.505,...,529.977,2002.8815,352.105,346.423,719.507,787.135,97.495,568.76,2208.111,
gurobi_1.0,102.361607,2624.569,198.745,694.105,16491.6,712.2625,2169.458,81.45,392.067,1175.974,...,530.026,2002.8815,352.116,346.423,719.718,787.485,97.495,568.91,2208.112,
greedy,60.741071,1345.7,116.65,439.79,8622.336066,402.539,1230.981,48.533333,229.433,690.189,...,298.546,1097.259,200.097,196.234,407.981,495.02,51.85,358.4,1255.219,
SDP lift,103.861852,2627.763179,,,,,,82.517788,,,...,,,,,,,97.838509,,,896.563787
SDP proj,102.299107,2624.261,,,,,,81.45,,,...,,,,,,,97.495,,,872.0625
edge count,145.631626,4914.4316,,,,,,124.273333,,,...,,,,,,,193.062,,,
