In [None]:
import wandb
import pandas as pd

def get_run_ids_with_filter(entity, project, filter_dict):
    """
    Retrieve the run IDs for all runs in a project that match the given filter.

    Args:
        entity (str): The W&B entity (username or team).
        project (str): The name of the W&B project.
        filter_dict (dict): A dictionary defining the filter criteria.
            For example: {"config.learning_rate": 0.001}
    
    Returns:
        list: A list of run IDs that match the filter.
    """
    api = wandb.Api()
    runs = api.runs(f"{entity}/{project}", filters=filter_dict)
    run_ids = [run.id for run in runs]
    run_names = [run.name for run in runs]
    return run_ids, run_names

def pull_run_data(run_id, entity="confopt-team", project="ConfoptAutoML25"):
    """
    Pull data from a W&B run given the entity, project, and run_id.
    
    Args:
        entity (str): The W&B entity (username or team).
        project (str): The name of the W&B project.
        run_id (str): The run identifier.
    
    Returns:
        dict: A dictionary containing the run summary, config, and history (as a pandas DataFrame).
    """
    api = wandb.Api()
    run_path = f"{entity}/{project}/{run_id}"
    try:
        run = api.run(run_path)
    except Exception as e:
        print(f"Error retrieving run: {e}")
        return None

    # Retrieve summary metrics (as a dict)
    summary = run.summary._json_dict

    # Retrieve configuration used for the run
    config = run.config

    # Retrieve run history (logged metrics) as a pandas DataFrame
    # history = run.history(pandas=True)
    history = {}

    return {"summary": summary, "config": config, "history": history, "run_name": run.name}

In [None]:
# Replace these with your actual values
entity = "confopt-team"
project = "ConfoptAutoML25"

# Define your filter. For example, filtering runs with a specific learning rate.
filter_dict = {
    # "config.benchmark": "deep-regular",
    # "config.dataset": "cifar10",
    # "config.sampler_type": "darts",
    "config.tag": "first-full-run", #"first-full-run",
    # "benchmark": "single_cell-no_skip",
    "config.trainer.batch_size": {"$in": [64, 96, 480, 320]},
    "config.is_debug_run": False,
    "state": "finished",
}

# Retrieve the run IDs matching the filter
matching_run_ids, matching_run_names = get_run_ids_with_filter(entity, project, filter_dict)
print(f"Matching run IDs: {matching_run_ids}")
print(f"Matching run names: {matching_run_names}")


In [None]:
len(matching_run_ids)

In [None]:
datas = dict()

for run_id, run_name in zip(matching_run_ids, matching_run_names):
    print("Loading data for run:", run_name)
    data = pull_run_data(run_id)
    datas[run_id] = data


In [None]:
data = datas["8wovcbhs"]
config = data["config"]


def get_row_data(config):
    optimizer = config["sampler_type"]
    space = config["space"]
    opset = config["opset"]

    pcdarts = config["partial_connector"] is not None
    oles = config["oles"]["oles"] is True
    fairdarts = config["sampler"]["arch_combine_fn"] == "sigmoid"
    sdarts = config["perturbator"] is not None
    seed = config["trainer"]["seed"]
    batch_size = config["trainer"]["batch_size"]
    weight_type = "ws" if config["weight_type"] == "weight_sharing" else "we"
    is_debug_run = config["is_debug_run"]

    exp_summary = f"{optimizer}_{space}_{opset}_"
    exp_extra = f"{'pcdarts_' if pcdarts else ''}{'oles_' if oles else ''}{'fairdarts_' if fairdarts else ''}{'sdarts_' if sdarts else ''}"
    exp_seed = f"seed-{seed}"
    exp_batch_size = f"batch-{batch_size}_"
    exp_weight_type = f"{weight_type}_"
    exp_is_debug_run = f"debug" if is_debug_run else ""
    exp_name = f"{exp_summary}{exp_extra}{exp_weight_type}{exp_batch_size}{exp_seed}{exp_is_debug_run}"

    row = {
        "run_name": data["run_name"],
        # "exp_name": exp_name,
        "optimizer": optimizer,
        "space": space,
        "opset": opset,
        "pcdarts": pcdarts,
        "oles": oles,
        "fairdarts": fairdarts,
        "sdarts": sdarts,
        "seed": seed,
        "batch_size": batch_size,
        "weight_type": weight_type,
        "is_debug_run": is_debug_run,
    }

    return row  

In [None]:
df = pd.DataFrame([get_row_data(data["config"]) for data in datas.values()])
df.head()

In [None]:
def get_results(df, optimizer, space, opset, oles, sdarts, fairdarts, pcdarts, weight_type, is_debug_run=False):
    return df[(df["oles"] == oles) & (df["sdarts"] == sdarts) & (df["fairdarts"] == fairdarts) & (df["pcdarts"] == pcdarts) & (df["space"] == space) & (df["opset"] == opset) & (df["optimizer"] == optimizer) & (df["weight_type"] == weight_type) & (df["is_debug_run"] == is_debug_run)]

In [None]:
script_cmds = [] 

def print_if_incomplete(results, optimizer, space, opset, extra):
    all_seeds = set(range(0, 3))
    if len(results) < 3:
        missing_seeds = all_seeds - set(results["seed"])
        print(f"{optimizer} {space} {opset} {extra}", len(results), "\t\t\t\tMissing seeds:", missing_seeds) 

        for seed in missing_seeds:
            extra_str = "" if extra == "baseline" else f" --{extra} True"

            if extra_str == " --sdarts True":
                extra_str = " --sdarts random"

            cmd = f"python launch_supernet_search.py --seeds {seed} --dataset cifar10_supernet --tag first-full-run --optimizer {optimizer} --subspace {space} --ops {opset}{extra_str} & sleep 5"
            script_cmds.append(cmd)

def print_if_complete(results, optimizer, space, opset, extra):
    if len(results) == 3:
        print(f"{optimizer} {space} {opset} {extra}", len(results), "\t\t\t\tAll seeds complete!")


show_incomplete_jobs = True
print_fn = print_if_incomplete if show_incomplete_jobs is True else print_if_complete

for optimizer in ("darts", "drnas", "gdas"):
    for space in ("deep", "wide", "single_cell"):
        for opset in ("no_skip", "all_skip", "regular"):
            results = get_results(df, optimizer, space, opset, oles=False, sdarts=False, fairdarts=False, pcdarts=False, weight_type="ws")
            print_fn(results, optimizer, space, opset, "baseline")

            if optimizer == "darts":
                results = get_results(df, optimizer, space, opset, oles=True, sdarts=False, fairdarts=False, pcdarts=False, weight_type="ws")
                print_fn(results, optimizer, space, opset, "oles")

                results = get_results(df, optimizer, space, opset, oles=False, sdarts=True, fairdarts=False, pcdarts=False, weight_type="ws")
                print_fn(results, optimizer, space, opset, "sdarts")

                results = get_results(df, optimizer, space, opset, oles=False, sdarts=False, fairdarts=True, pcdarts=False, weight_type="ws")
                print_fn(results, optimizer, space, opset, "fairdarts")

                results = get_results(df, optimizer, space, opset, oles=False, sdarts=False, fairdarts=False, pcdarts=True, weight_type="ws")
                print_fn(results, optimizer, space, opset, "pcdarts")

if show_incomplete_jobs is True:
    print()
    print()
    print("#!/bin/bash")
    print()
    for cmd in script_cmds:
        print(cmd)
                    


In [None]:
for run_id, data in datas.items():
    run_name = data["run_name"]
    summary = data["summary"]
    config = data["config"]

    runtime = summary["_runtime"]
    epoch = summary["_step"]
    time_per_epoch = runtime / epoch

    total_epochs = config["trainer"]["epochs"]
    time_left = (total_epochs - epoch) * time_per_epoch
    will_finish = (total_epochs - epoch) * time_per_epoch + runtime < 24 * 3600

    print(f"Run ID: {run_id}, time per epoch: {time_per_epoch:7.2f} \truntime: {runtime:8.02f}\tepoch: {epoch:3}/{total_epochs:3} \ttime left: {time_left/3600:6.2f} \twill finish: {will_finish} \trun_name: {run_name}")