In [90]:
models = ['LiftMP', 'GIN', 'GAT', 'GCNN', 'GatedGCNN']
# note, no twitter.
dataset_names = ['RANDOM', 'ENZYMES', 'PROTEINS', 'IMDB-BINARY', 'MUTAG', 'COLLAB']
folder_path = Path('/home/penlu/code/bespoke-gnn4do')
training_path = folder_path / "training_runs"

train_df = pd.DataFrame(columns=dataset_names)
valid_df = pd.DataFrame(columns=dataset_names)

In [91]:
from torch_geometric.datasets import TUDataset

# Count edges in each dataset
for dataset in ['PROTEINS', 'ENZYMES', 'COLLAB', 'IMDB-BINARY', 'MUTAG']:
    print(dataset)
    loader = TUDataset(root=f'/tmp/{dataset}', name=dataset)
    edges = 0
    count = 0
    for example in loader:
        edges += example.edge_index.shape[1]
        count += 1

    # theoretical objective after getting all edges
    train_df.at['bound', dataset] = -edges / 2. / count

    # theoretical score after getting all edges
    valid_df.at['bound', dataset] = edges / count

PROTEINS
ENZYMES
COLLAB
IMDB-BINARY
MUTAG


In [92]:
# Collect training outputs
import os
import json
import pandas as pd
import numpy as np
import sys
from pathlib import Path

train_prefix = "230823_test"
model_list = [training_path / x for x in os.listdir(training_path) if x.startswith(train_prefix)]

# load in params
for model_folder in model_list:
    try:
        if not os.path.isfile(os.path.join(model_folder, 'done.txt')):
            continue
        with open(os.path.join(model_folder, 'params.txt'), 'r') as f:
            model_args = json.load(f)
        train_losses = np.load(os.path.join(model_folder, 'train_losses.npy'))
        valid_scores = np.load(os.path.join(model_folder, 'valid_scores.npy'))
        dataset = model_args['TUdataset_name']
        if model_args['dataset'] == 'TU':
            train_df.at[model_args['model_type'], dataset] = train_losses[-1]
            valid_df.at[model_args['model_type'], dataset] = valid_scores[-1]
        else:
            train_df.at[model_args['model_type'], 'RANDOM'] = train_losses[-1]
            valid_df.at[model_args['model_type'], 'RANDOM'] = valid_scores[-1]
    except:
        print(f'something is wrong w/ {model_folder}')
        print(sys.exc_info())

In [93]:
# Collect SDP outputs
sdp_folder = "230821_TU"
runs_folder = folder_path / "baseline_runs" / sdp_folder
for dataset in ['PROTEINS', 'ENZYMES', 'COLLAB', 'IMDB-BINARY', 'MUTAG']:
    results = []
    total_sdp_score = 0.
    total_hyperplane_score = 0.
    total_gurobi_score = 0.
    with open(runs_folder / dataset / 'results.jsonl', 'r') as f:
        for line in f:
            res = json.loads(line)
            if res['method'] == 'sdp':
                total_sdp_score += res['score']
            elif res['method'] == 'sdp|random_hyperplane':
                total_hyperplane_score += res['score']
            results.append(res)
    length = len(results) / 4

    # convert SDP score back to obj and store
    # score == (2 - obj) / 2, so obj == 2 - score * 2
    avg_edges = valid_df.at['bound', dataset]
    train_df.at['SDP lift', dataset] = valid_df.at['bound', dataset] - (total_sdp_score / length) * 2
    valid_df.at['SDP proj', dataset] = total_hyperplane_score / length
    print(f"{dataset} edges: {avg_edges}")

    print(f"{dataset} length: {length}")
    print(f"{dataset} SDP: {total_sdp_score / length}")
    print(f"{dataset} rounded: {total_hyperplane_score / length}")

PROTEINS edges: 145.6316262353998
PROTEINS length: 1113.0
PROTEINS SDP: 24.638019062866526
PROTEINS rounded: 23.172506738544474
ENZYMES edges: 124.27333333333333
ENZYMES length: 600.0
ENZYMES SDP: 20.660313673814137
ENZYMES rounded: 19.56
COLLAB edges: 4914.4316
COLLAB length: 5000.0
COLLAB SDP: 70.9906649263382
COLLAB rounded: 67.7261
IMDB-BINARY edges: 193.062
IMDB-BINARY length: 1000.0
IMDB-BINARY SDP: 8.02489567041397
IMDB-BINARY rounded: 7.6635
MUTAG edges: 39.58510638297872
MUTAG length: 188.0
MUTAG SDP: 10.710323310912923
MUTAG rounded: 10.534574468085106


In [94]:
# Collect gurobi outputs
baselines_path = folder_path / "baseline_runs"
gurobi_1s_list = [baselines_path / x for x in os.listdir(baselines_path) if "gurobi_1s" in x]
gurobi_5s_list = [baselines_path / x for x in os.listdir(baselines_path) if "gurobi_5s" in x]

# load in params
for gurobi_folder in gurobi_1s_list:
    try:
        with open(gurobi_folder / 'params.txt', 'r') as f:
            gurobi_args = json.load(f)
        if model_args['dataset'] == 'TU':
            dataset = gurobi_args['TUdataset_name']
        else:
            dataset = "RANDOM"

        with open(gurobi_folder / 'results.jsonl', 'r') as f:
            total_score = 0.
            total_count = 0
            for line in f:
                res = json.loads(line)
                assert res['method'] == 'gurobi'
                total_score += res['score']
                total_count += 1

        # store gurobi score and equivalent objective
        train_df.at['gurobi 1s', dataset] = valid_df.at['bound', dataset] - (total_score / total_count) * 2
        valid_df.at['gurobi 1s', dataset] = total_score / total_count

        print(f"{dataset} gurobi: {total_score / total_count}")
    except:
        print(f'something is wrong w/ {gurobi_folder}')
        print(sys.exc_info())

# load in params
for gurobi_folder in gurobi_5s_list:
    try:
        with open(gurobi_folder / 'params.txt', 'r') as f:
            gurobi_args = json.load(f)
        if model_args['dataset'] == 'TU':
            dataset = gurobi_args['TUdataset_name']
        else:
            dataset = "RANDOM"

        with open(gurobi_folder / 'results.jsonl', 'r') as f:
            total_score = 0.
            total_count = 0
            for line in f:
                res = json.loads(line)
                assert res['method'] == 'gurobi'
                total_score += res['score']
                total_count += 1

        # store gurobi score and equivalent objective
        train_df.at['gurobi 5s', dataset] = valid_df.at['bound', dataset] - (total_score / total_count) * 2
        valid_df.at['gurobi 5s', dataset] = total_score / total_count

        print(f"{dataset} gurobi: {total_score / total_count}")
    except:
        print(f'something is wrong w/ {gurobi_folder}')
        print(sys.exc_info())

IMDB-BINARY gurobi: 103.1955
MUTAG gurobi: 29.32712765957447
ENZYMES gurobi: 80.71
PROTEINS gurobi: 95.0763701707098
COLLAB gurobi: 944.3480751604033
COLLAB gurobi: 2524.2921
RANDOM gurobi: 874.1265
PROTEINS gurobi: 95.0763701707098
RANDOM gurobi: 874.2815
MUTAG gurobi: 29.32712765957447
IMDB-BINARY gurobi: 103.1965
COLLAB gurobi: 2524.3399
ENZYMES gurobi: 80.71


In [96]:
train_df

Unnamed: 0,RANDOM,ENZYMES,PROTEINS,IMDB-BINARY,MUTAG,COLLAB
bound,,-62.136667,-72.815813,-96.531,-19.792553,-2457.2158
SDP lift,,82.952706,96.355588,177.012209,18.16446,4772.45027
gurobi 1s,,-37.146667,-44.521114,-13.329,-19.069149,-134.1526
gurobi 5s,,-37.146667,-44.521114,-13.331,-19.069149,-134.2482


In [95]:
valid_df

Unnamed: 0,RANDOM,ENZYMES,PROTEINS,IMDB-BINARY,MUTAG,COLLAB
bound,,124.273333,145.631626,193.062,39.585106,4914.4316
SDP proj,,19.56,23.172507,7.6635,10.534574,67.7261
gurobi 1s,874.1265,80.71,95.07637,103.1955,29.327128,2524.2921
gurobi 5s,874.2815,80.71,95.07637,103.1965,29.327128,2524.3399
