In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
from random import shuffle
from itertools import product
import wandb
from rich.progress import track

In [3]:
wandb_user = 'sisaman'
wandb_project = 'GAPTEST'

In [4]:
class CommandBuilder:
    def __init__(self, subcommand):
        self.subcommand = subcommand
    
    def build(self, **params):      
        for key, value in params.items():
            if not (isinstance(value, list) or isinstance(value, tuple)):
                params[key] = (value,)
        
        cmd_list = []
        configs = self.product_dict(params)

        for config in configs:
            options = ' '.join([f' --{param} {value} ' for param, value in config.items()])
            command = f'python train.py {self.subcommand} {options}'
            command = ' '.join(command.split())
            cmd_list.append(command)

        return cmd_list

    @staticmethod
    def product_dict(params):
        keys = params.keys()
        vals = params.values()
        for instance in product(*vals):
            yield dict(zip(keys, instance))


# Experiments

In [5]:
# DEFAULT PARAMS
dataset=['reddit', 'amazon', 'facebook']
epsilon={
    'edge': list(range(1,10,2)),
    'node': list(range(5,30,5))
}
hops=[1,2,3,4,5]
max_degree = {
    'edge': -1,
    'node': {
        'facebook': {
            'standard': [10,20,50,100,200],
            'extended': [10,20,50,100,200],
        },
        'reddit': {
            'standard': [50,100,200,300,400],
            'extended': [50,100,200,300,400],
        },
        'amazon': {
            'standard': [10,20,50,100,200],
            'extended': [10,20,50,100,200],
        },
    }
}
hidden_dim=[16]
encoder_layers=2
pre_layers=1
post_layers=1
combine='cat'
activation='selu'
dropout=0
batch_norm=True
optimizer='adam'
learning_rate=0.01,
weight_decay=0,
pre_epochs = {
    'edge': 100,
    'node': 10,
}
epochs = {
    'edge': 100,
    'node': 10,
}
batch_size = {
    'edge': -1,
    'node': {
        'facebook': 256,
        'reddit':   2048,
        'amazon':   4096,
    }
}
max_grad_norm=1
repeats=10
logger='wandb'

cmd = []

cmd += CommandBuilder('gap').build(
    name='GAP-INF',
    dataset=dataset,
    dp_level='edge',
    epsilon='inf',
    perturbation='aggr',
    hops=hops,
    max_degree=max_degree['edge'],
    hidden_dim=hidden_dim,
    encoder_layers=encoder_layers,
    pre_layers=pre_layers,
    post_layers=post_layers,
    combine=combine,
    activation=activation,
    dropout=dropout,
    batch_norm=batch_norm,
    optimizer=optimizer,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    pre_epochs=pre_epochs['edge'],
    epochs=epochs['edge'],
    batch_size=batch_size['edge'],
    max_grad_norm=max_grad_norm,
    repeats=repeats,
    project=wandb_project,
    logger=logger
)

cmd += CommandBuilder('gap').build(
    name='GAP-EDP',
    dataset=dataset,
    dp_level='edge',
    epsilon=epsilon['edge'],
    perturbation='aggr',
    hops=hops,
    max_degree=max_degree['edge'],
    hidden_dim=hidden_dim,
    encoder_layers=encoder_layers,
    pre_layers=pre_layers,
    post_layers=post_layers,
    combine=combine,
    activation=activation,
    dropout=dropout,
    batch_norm=batch_norm,
    optimizer=optimizer,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    pre_epochs=pre_epochs['edge'],
    epochs=epochs['edge'],
    batch_size=batch_size['edge'],
    max_grad_norm=max_grad_norm,
    repeats=repeats,
    project=wandb_project,
    logger=logger
)

# GAP-E W/O EM
cmd += CommandBuilder('gap').build(
    name='GAP-EDP',
    dataset=dataset,
    dp_level='edge',
    epsilon=epsilon['edge'],
    perturbation='aggr',
    hops=hops,
    max_degree=max_degree['edge'],
    hidden_dim=hidden_dim,
    encoder_layers=0,
    pre_layers=pre_layers,
    post_layers=post_layers,
    combine=combine,
    activation=activation,
    dropout=dropout,
    batch_norm=batch_norm,
    optimizer=optimizer,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    pre_epochs=0,
    epochs=epochs['edge'],
    batch_size=batch_size['edge'],
    max_grad_norm=max_grad_norm,
    repeats=repeats,
    project=wandb_project,
    logger=logger
)


cmd += CommandBuilder('gap').build(
    name='MLP',
    dataset=dataset,
    dp_level='edge',
    epsilon=0,
    perturbation='aggr',
    hops=0,
    max_degree=max_degree['edge'],
    hidden_dim=hidden_dim,
    encoder_layers=0,
    pre_layers=encoder_layers,
    post_layers=post_layers,
    combine=combine,
    activation=activation,
    dropout=dropout,
    batch_norm=batch_norm,
    optimizer=optimizer,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    pre_epochs=0,
    epochs=epochs['edge'],
    batch_size=batch_size['edge'],
    max_grad_norm=max_grad_norm,
    repeats=repeats,
    project=wandb_project,
    logger=logger
)

cmd += CommandBuilder('sage').build(
    name='SAGE-EDP',
    dataset=dataset,
    dp_level='edge',
    epsilon=epsilon['edge'],
    max_degree=max_degree['edge'],
    hidden_dim=hidden_dim,
    encoder_layers=encoder_layers,
    mp_layers=hops,
    post_layers=post_layers,
    activation=activation,
    dropout=dropout,
    batch_norm=batch_norm,
    optimizer=optimizer,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    epochs=epochs['edge'],
    batch_size=batch_size['edge'],
    max_grad_norm=max_grad_norm,
    repeats=repeats,
    project=wandb_project,
    logger=logger
)

cmd += CommandBuilder('sage').build(
    name='SAGE-INF',
    dataset=dataset,
    dp_level='edge',
    epsilon='inf',
    max_degree=max_degree['edge'],
    hidden_dim=hidden_dim,
    encoder_layers=encoder_layers,
    mp_layers=hops,
    post_layers=post_layers,
    activation=activation,
    dropout=dropout,
    batch_norm=batch_norm,
    optimizer=optimizer,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    epochs=epochs['edge'],
    batch_size=batch_size['edge'],
    max_grad_norm=max_grad_norm,
    repeats=repeats,
    project=wandb_project,
    logger=logger
)

for dataset_name in dataset:
    cmd += CommandBuilder('gap').build(
        name='GAP-NDP',
        dataset=dataset_name,
        dp_level='node',
        epsilon=epsilon['node'],
        perturbation='aggr',
        hops=hops,
        max_degree=max_degree['node'][dataset_name]['extended'],
        hidden_dim=hidden_dim,
        encoder_layers=encoder_layers,
        pre_layers=pre_layers,
        post_layers=post_layers,
        combine=combine,
        activation=activation,
        dropout=dropout,
        batch_norm=False,
        optimizer=optimizer,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        pre_epochs=pre_epochs['node'],
        epochs=epochs['node'],
        batch_size=batch_size['node'][dataset_name],
        max_grad_norm=max_grad_norm,
        repeats=repeats,
        project=wandb_project,
        logger=logger
    )
    
    # GAP-N W/O EM
    cmd += CommandBuilder('gap').build(
        name='GAP-NDP',
        dataset=dataset_name,
        dp_level='node',
        epsilon=epsilon['node'],
        perturbation='aggr',
        hops=hops,
        max_degree=max_degree['node'][dataset_name]['standard'],
        hidden_dim=hidden_dim,
        encoder_layers=0,
        pre_layers=pre_layers,
        post_layers=post_layers,
        combine=combine,
        activation=activation,
        dropout=dropout,
        batch_norm=False,
        optimizer=optimizer,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        pre_epochs=0,
        epochs=epochs['node'],
        batch_size=batch_size['node'][dataset_name],
        max_grad_norm=max_grad_norm,
        repeats=repeats,
        project=wandb_project,
        logger=logger
    )
    
    cmd += CommandBuilder('sage').build(
        name='SAGE-NDP',
        dataset=dataset_name,
        dp_level='node',
        epsilon=epsilon['node'],
        max_degree=max_degree['node'][dataset_name]['standard'],
        hidden_dim=hidden_dim,
        encoder_layers=encoder_layers,
        mp_layers=1,
        post_layers=post_layers,
        activation=activation,
        dropout=dropout,
        batch_norm=False,
        optimizer=optimizer,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        epochs=epochs['node'],
        batch_size=batch_size['node'][dataset_name],
        max_grad_norm=max_grad_norm,
        repeats=repeats,
        project=wandb_project,
        logger=logger
    )

    cmd += CommandBuilder('gap').build(
        name='MLP-DP',
        dataset=dataset_name,
        dp_level='node',
        epsilon=epsilon['node'],
        perturbation='aggr',
        hops=0,
        max_degree=-1,
        hidden_dim=hidden_dim,
        encoder_layers=0,
        pre_layers=encoder_layers,
        post_layers=post_layers,
        combine=combine,
        activation=activation,
        dropout=dropout,
        batch_norm=False,
        optimizer=optimizer,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        pre_epochs=0,
        epochs=epochs['node'],
        batch_size=batch_size['node'][dataset_name],
        max_grad_norm=max_grad_norm,
        repeats=repeats,
        project=wandb_project,
        logger=logger
    )


shuffle(cmd)

api = wandb.Api()
runs = api.runs(f"{wandb_user}/{wandb_project}")
run_cmds = []

try:
    for run in track(runs, description='Fetching finished runs...'): 
        command = 'python ' + run.config['cmd']
        command = ' '.join(command.split())
        run_cmds.append(command)
except ValueError as e:
    print(e)
        
new_cmd = set(cmd) - set(run_cmds)
num_total = len(cmd)
num_finished = len(run_cmds)
num_new = len(new_cmd)

filename = 'jobs/gap.jobs'
os.makedirs('jobs', exist_ok=True)
with open(filename, 'w') as file:
    for item in track(new_cmd):
        print(item, file=file)

print('total runs: ', num_total)
print('finished:   ', num_finished)
print('new:        ', num_new)

Output()

Output()

Could not find project GAPTEST


total runs:  1098
finished:    0
new:         1098
