# Setup

In [10]:
import sys
from pathlib import Path
from glob import glob
from typing import Union, List
from pprint import pprint
from subprocess import check_output
from multiprocessing import Pool

import yaml
from tqdm import tqdm

GRAPHGYM_ROOT = Path("~/Documents").expanduser() / "GraphGym"
main_py = GRAPHGYM_ROOT / "run" / "main.py"
configs_gen_py = GRAPHGYM_ROOT / "run" / "configs_gen.py"
agg_batch_py = GRAPHGYM_ROOT / "run" / "agg_batch.py"
assert main_py.exists()
assert configs_gen_py.exists()
assert agg_batch_py.exists()
CONCURRENT_JOBS = 2


def call_python(path: Union[str, Path], args: List[str]) -> str:
    """Call a Python subprocess and return its stdout output as a string."""
    output = check_output([sys.executable, str(path), *args]).decode("utf8")
    return output


def run_config(config_path: Union[str, Path], repeats: int = 1) -> str:
    return call_python(main_py, ["--cfg", str(config_path), "--repeat", str(repeats)])


def run_config_pool(args) -> str:
    return run_config(*args)


def gen_configs(
    config_path: Union[str, Path], grid_path: Union[str, Path], sample_num: int = -1
) -> str:
    args = ["--config", str(config_path), "--grid", str(grid_path)]
    if sample_num > 0:
        args = args + ["--sample", "--sample_num", str(sample_num)]
    return call_python(configs_gen_py, args)


def run_configs(
    config_paths: Union[str, Path], repeats: int = 1, concurrent_jobs: int = CONCURRENT_JOBS
) -> str:
    with Pool(concurrent_jobs) as pool:
        outputs = list(
            tqdm(
                pool.imap(
                    run_config_pool, [(config_path, repeats) for config_path in config_paths]
                ),
                total=len(config_paths),
            )
        )
    output_dir_name = Path(config_paths[0]).parent.name
    print(f"output_dir_name = {output_dir_name}")
    call_python(agg_batch_py, ["--dir", str(Path("").absolute() / "results" / output_dir_name)])
    return outputs


def gen_and_run_configs(
    config_path: Union[str, Path], grid_path: Union[str, List], repeats: int = 1
):
    gen_output = gen_configs(config_path, grid_path)
    output_dir_name = f"{config_path.stem}_grid_{grid_path.stem}"
    all_configs = glob.glob(str(Path("").absolute() / "configs" / output_dir_name / "*.yaml"))
    return dict(gen_output=gen_output, run_output=run_configs(all_configs, repeats=repeats))


# Get help on our 3 main scripts

In [3]:
print(call_python(main_py, ["--help"]))

usage: main.py [-h] --cfg CFG_FILE [--repeat REPEAT] [--mark_done] ...

GraphGym

positional arguments:
  opts             See graphgym/config.py for remaining options.

optional arguments:
  -h, --help       show this help message and exit
  --cfg CFG_FILE   The configuration file path.
  --repeat REPEAT  The number of repeated jobs.
  --mark_done      Mark yaml as done after a job has finished.



In [4]:
print(call_python(configs_gen_py, ["--help"]))

usage: configs_gen.py [-h] [--config CONFIG] --grid GRID [--sample]
                      [--sample_alias SAMPLE_ALIAS] [--sample_num SAMPLE_NUM]
                      [--out_dir OUT_DIR] [--config_budget CONFIG_BUDGET]

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       the base configuration file used for edit
  --grid GRID           configuration file for grid search
  --sample              whether perform random sampling
  --sample_alias SAMPLE_ALIAS
                        configuration file for sample alias
  --sample_num SAMPLE_NUM
                        Number of random samples in the space
  --out_dir OUT_DIR     output directory for generated config files
  --config_budget CONFIG_BUDGET
                        the base configuration file used for matching
                        computation



In [5]:
print(call_python(agg_batch_py, ["--help"]))

usage: agg_batch.py [-h] --dir DIR [--metric METRIC]

Train a classification model

optional arguments:
  -h, --help       show this help message and exit
  --dir DIR        Dir for batch of results
  --metric METRIC  metric to select best epoch



# Run a simple single config experiment

In [96]:
classic_config = Path("").absolute() / "configs" / "classic.yaml"
config = yaml.safe_load(classic_config.read_text())
pprint(config)

{'dataset': {'name': 'Cora',
             'split': [0.8, 0.1, 0.1],
             'task': 'node',
             'task_type': 'classification'},
 'gnn': {'dim_inner': 256,
         'layer_type': 'generalconv',
         'layers_mp': 2,
         'layers_post_mp': 1,
         'layers_pre_mp': 1},
 'model': {'loss_fun': 'cross_entropy', 'type': 'gnn'},
 'optim': {'base_lr': 0.003, 'max_epoch': 400, 'optimizer': 'adam'},
 'out_dir': 'results',
 'train': {'batch_size': 32, 'ckpt_period': 100, 'eval_period': 20}}


In [97]:
output = run_config(classic_config, repeats=3)

# Run a simple grid experiment

In [99]:
classic_config = Path("").absolute() / "configs" / "classic.yaml"
classic_grid = Path("").absolute() / "configs" / "classic_grid.txt"
assert classic_config.exists()
assert classic_grid.exists()
print(gen_configs(classic_config, classic_grid))
config_dir = Path("").absolute() / "configs" / f"{classic_config.stem}_grid_{classic_grid.stem}"
assert config_dir.exists()
config_paths = list(glob(str(config_dir / "*.yaml")))
assert len(config_paths) > 0

Variable label: [['gnn', 'dim_inner'], ['gnn', 'act']]
Variable alias: ['dim_inner', 'activation']
18 configurations saved to: configs/classic_grid_classic_grid



In [101]:
outputs = run_configs(config_paths, repeats=5, concurrent_jobs=4)

100%|██████████| 18/18 [04:27<00:00, 14.88s/it]


output_dir_name = classic_grid_classic_grid


# Run a NAS grid experiment

In [12]:
nas_config = Path("").absolute() / "configs" / "nas.yaml"
nas_config.write_text(
    """
# a simple template file
out_dir: results
dataset:
  name: Cora
  task: node
  task_type: classification
  split: [0.8, 0.2]
train:
  batch_size: 32
model:
  type: nasgnn
  loss_fun: cross_entropy
gnn:
  layers_pre_mp: 1
  layers_mp: 2
  layers_post_mp: 1
# nas config
nas:
  node01_act: tanh
  node02_act: tanh
  node03_act: tanh
  node12_act: tanh
  node13_act: tanh
  node23_act: tanh
  node0: GCN
  node1: GCN
  node2: GCN
  node3: GCN
optim:
  base_lr: 0.003
  max_epoch: 400
"""
)
nas_grid = Path("").absolute() / "configs" / "nas_grid2.txt"
nas_grid.write_text(
    """
nas.node01_act node01_act ["relu","prelu","tanh","identity"]
nas.node02_act node02_act ["relu","prelu","tanh","identity"]
nas.node03_act node03_act ["relu","prelu","tanh","identity"]
nas.node12_act node12_act ["relu","prelu","tanh","identity"]
nas.node13_act node13_act ["relu","prelu","tanh","identity"]
nas.node23_act node23_act ["relu","prelu","tanh","identity"]
nas.node0 node0 ["GCN","GAT","GraphSage","Identity"]
nas.node1 node1 ["GCN","GAT","GraphSage","Identity"]
nas.node2 node2 ["GCN","GAT","GraphSage","Identity"]
nas.node3 node3 ["GCN","GAT","GraphSage","Identity"] 
    """
)
print(gen_configs(nas_config, nas_grid, sample_num=200))
nas_dir = Path("").absolute() / "configs" / f"{nas_config.stem}_grid_{nas_grid.stem}"
assert nas_dir.exists()
config_paths = list(glob(str(nas_dir / "*.yaml")))
assert len(config_paths) > 0

  warn(f"Failed to load image Python extension: {e}")


Total sample size of each chunk of experiment space: [200]
Variable label: [['gnn', 'node01_act'], ['gnn', 'node02_act'], ['gnn', 'node03_act'], ['gnn', 'node12_act'], ['gnn', 'node13_act'], ['gnn', 'node23_act'], ['gnn', 'node0'], ['gnn', 'node1'], ['gnn', 'node2'], ['gnn', 'node3']]
Variable alias: ['node01_act', 'node02_act', 'node03_act', 'node12_act', 'node13_act', 'node23_act', 'node0', 'node1', 'node2', 'node3']
200 configurations saved to: configs/nas_grid_nas_grid2



In [13]:
outputs = run_configs(config_paths[:2], repeats=5, concurrent_jobs=2)

  warn(f"Failed to load image Python extension: {e}")
  warn(f"Failed to load image Python extension: {e}")
Traceback (most recent call last):
  File "/home/rdyro/Documents/GraphGym/run/main.py", line 23, in <module>
    load_cfg(cfg, args)
  File "/home/rdyro/.pyenv/versions/3.9.13/envs/devel/lib/python3.9/site-packages/graphgym/config.py", line 517, in load_cfg
    cfg.merge_from_file(args.cfg_file)
  File "/home/rdyro/.pyenv/versions/3.9.13/envs/devel/lib/python3.9/site-packages/yacs/config.py", line 213, in merge_from_file
    self.merge_from_other_cfg(cfg)
  File "/home/rdyro/.pyenv/versions/3.9.13/envs/devel/lib/python3.9/site-packages/yacs/config.py", line 217, in merge_from_other_cfg
    _merge_a_into_b(cfg_other, self, self, [])
  File "/home/rdyro/.pyenv/versions/3.9.13/envs/devel/lib/python3.9/site-packages/yacs/config.py", line 478, in _merge_a_into_b
    _merge_a_into_b(v, b[k], root, key_list + [k])
  File "/home/rdyro/.pyenv/versions/3.9.13/envs/devel/lib/python3.9/site-

CalledProcessError: Command '['/home/rdyro/.pyenv/versions/3.9.13/envs/devel/bin/python', '/home/rdyro/Documents/GraphGym/run/main.py', '--cfg', '/home/rdyro/Documents/CS224W-Final-Project-GraphGym/configs/nas_grid_nas_grid2/nas-node01_act=identity-node02_act=prelu-node03_act=prelu-node12_act=identity-node13_act=identity-node23_act=prelu-node0=Identity-node1=GCN-node2=GraphSage-node3=GAT.yaml', '--repeat', '5']' returned non-zero exit status 1.