In [1]:
import torch
import pprint as pp

from src.options import get_options
from src.utils import load_env
from src.agents import Agent
from src.utils.hyperparameter_config import config

import ray
from ray import tune, air
from ray.tune.search import ConcurrencyLimiter
from ray.tune.search.hyperopt import HyperOptSearch
from ray.air import session

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def run(config: dict()):
    # Pretty print the run args
    config["output_dir"] = "runs"
    # config["problem"] = "tsp"
    # config["epoch_size"] = 16
    # config["val_size"] = 16
    # config["batch_size"] = 16
    # config["eval_batch_size"] = 16
    # config["n_epochs"] = 2
    args_list = [f"--{k}={v}" for k, v in config.items()]
    args_list.append("--no_tensorboard")
    args_list.append("--no_cuda")
    opts = get_options(args_list)

    pp.pprint(vars(opts))

    # Set the random seed
    torch.manual_seed(opts.seed)

    # Initialize the Environment
    env = load_env(opts.problem)

    # Train the Agent
    agent = Agent(opts, env, session)
    agent.train()

In [3]:
N_ITER = 3
ray.init(num_cpus=5)
searcher = HyperOptSearch(
    space=config, metric="loss", mode="min", n_initial_points=int(N_ITER / 10)
)
algo = ConcurrencyLimiter(searcher, max_concurrent=5)
objective = tune.with_resources(
    tune.with_parameters(run), resources={"cpu": 1, "memory": 400 * 1000000}
)

tuner = tune.Tuner(
    trainable=objective,
    run_config=air.RunConfig(local_dir="./ray_results"),
    tune_config=tune.TuneConfig(
        metric="loss",
        mode="min",
        search_alg=algo,
        num_samples=N_ITER,
    ),
)

results = tuner.fit()

2023-07-19 03:48:13,679	INFO worker.py:1636 -- Started a local Ray instance.


0,1
Current time:,2023-07-19 03:48:37
Running for:,00:00:23.40
Memory:,9.6/16.0 GiB

Trial name,status,loc,batch_size,hyperparameter_tunin g,lr_model,n_encode_layers,n_epochs,optimizer_class,iter,total time (s),loss
run_984d8b74,TERMINATED,127.0.0.1:69934,512,True,0.000499017,4,30,Adam,2,10.6087,2.40595
run_903cbd52,TERMINATED,127.0.0.1:69944,512,True,0.00050903,4,118,Adam,2,10.5658,2.40595
run_7fa8e274,TERMINATED,127.0.0.1:69958,128,True,8.10994e-05,3,125,Adam,2,9.85945,2.46589


[2m[36m(run pid=69934)[0m {'baseline': 'rollout',
[2m[36m(run pid=69934)[0m  'batch_size': 16,
[2m[36m(run pid=69934)[0m  'battery_limit': 0.6,
[2m[36m(run pid=69934)[0m  'bl_alpha': 0.05,
[2m[36m(run pid=69934)[0m  'bl_warmup_epochs': 1,
[2m[36m(run pid=69934)[0m  'checkpoint_encoder': False,
[2m[36m(run pid=69934)[0m  'checkpoint_epochs': 1,
[2m[36m(run pid=69934)[0m  'dataParallel': False,
[2m[36m(run pid=69934)[0m  'data_distribution': None,
[2m[36m(run pid=69934)[0m  'device': device(type='cpu'),
[2m[36m(run pid=69934)[0m  'display_graphs': None,
[2m[36m(run pid=69934)[0m  'early_stopping_delta': 10.0,
[2m[36m(run pid=69934)[0m  'early_stopping_patience': 5,
[2m[36m(run pid=69934)[0m  'embedding_dim': 128,
[2m[36m(run pid=69934)[0m  'epoch_size': 16,
[2m[36m(run pid=69934)[0m  'epoch_start': 0,
[2m[36m(run pid=69934)[0m  'eval_batch_size': 16,
[2m[36m(run pid=69934)[0m  'eval_only': False,
[2m[36m(run pid=69934)[0m  'exp_bet

100%|██████████| 1/1 [00:00<00:00, 141.18it/s]
  0%|          | 0/1 [00:00<?, ?it/s]


[2m[36m(run pid=69934)[0m epoch: 0, train_batch_id: 0, avg_cost: 2.60872745513916
[2m[36m(run pid=69934)[0m grad_norm: 17.145753860473633, clipped: 1.0


100%|██████████| 1/1 [00:04<00:00,  4.43s/it]


[2m[36m(run pid=69934)[0m Finished epoch 0, took 00:00:05 s
[2m[36m(run pid=69934)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[2m[36m(run pid=69944)[0m Saving model and state...
[

100%|██████████| 1/1 [00:05<00:00,  5.04s/it]
  0%|          | 0/1 [00:00<?, ?it/s][32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m


Trial name,date,done,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
run_7fa8e274,2023-07-19_03-48-37,True,"3_batch_size=128,hyperparameter_tuning=True,lr_model=0.0001,n_encode_layers=3,n_epochs=125,optimizer_class=Adam",Kleios-MBP,2,2.46589,127.0.0.1,69958,9.85945,4.8465,9.85945,1689731317,2,7fa8e274
run_903cbd52,2023-07-19_03-48-33,True,"2_batch_size=512,hyperparameter_tuning=True,lr_model=0.0005,n_encode_layers=4,n_epochs=118,optimizer_class=Adam",Kleios-MBP,2,2.40595,127.0.0.1,69944,10.5658,5.07756,10.5658,1689731313,2,903cbd52
run_984d8b74,2023-07-19_03-48-28,True,"1_batch_size=512,hyperparameter_tuning=True,lr_model=0.0005,n_encode_layers=4,n_epochs=30,optimizer_class=Adam",Kleios-MBP,2,2.40595,127.0.0.1,69934,10.6087,5.48884,10.6087,1689731308,2,984d8b74


[2m[36m(run pid=69934)[0m Validating...
[2m[36m(run pid=69934)[0m Validation overall avg_cost: 2.463257312774658 +- 0.18309058248996735
[2m[36m(run pid=69934)[0m Evaluating candidate model on evaluation dataset
[2m[36m(run pid=69934)[0m Epoch 0 candidate mean 2.518484354019165, baseline epoch 0 mean 2.4411473274230957, difference 0.07733702659606934
[2m[36m(run pid=69934)[0m Set warmup alpha = 1.0
[2m[36m(run pid=69934)[0m Start train epoch 1, lr=0.0004990169467384537 for run rollout_20230719T034818
[2m[36m(run pid=69934)[0m Evaluating baseline on dataset...
[2m[36m(run pid=69958)[0m {'baseline': 'rollout',
[2m[36m(run pid=69958)[0m  'batch_size': 16,
[2m[36m(run pid=69958)[0m  'battery_limit': 0.6,
[2m[36m(run pid=69958)[0m  'bl_alpha': 0.05,
[2m[36m(run pid=69958)[0m  'bl_warmup_epochs': 1,
[2m[36m(run pid=69958)[0m  'checkpoint_encoder': False,
[2m[36m(run pid=69958)[0m  'checkpoint_epochs': 1,
[2m[36m(run pid=69958)[0m  'dataParallel': F

100%|██████████| 1/1 [00:05<00:00,  5.37s/it][32m [repeated 2x across cluster][0m


[2m[36m(run pid=69944)[0m Evaluating baseline on dataset...
[2m[36m(run pid=69944)[0m Evaluating baseline on dataset...
[2m[36m(run pid=69934)[0m Finished epoch 1, took 00:00:05 s[32m [repeated 2x across cluster][0m
[2m[36m(run pid=69934)[0m Saving model and state...[32m [repeated 2x across cluster][0m
[2m[36m(run pid=69934)[0m Validating...[32m [repeated 2x across cluster][0m
[2m[36m(run pid=69934)[0m Validation overall avg_cost: 2.405951499938965 +- 0.12876993417739868[32m [repeated 2x across cluster][0m
[2m[36m(run pid=69934)[0m Evaluating candidate model on evaluation dataset[32m [repeated 2x across cluster][0m
[2m[36m(run pid=69934)[0m Epoch 1 candidate mean 2.4411473274230957, baseline epoch 0 mean 2.4411473274230957, difference 0.0[32m [repeated 2x across cluster][0m
[2m[36m(run pid=69944)[0m Epoch 1 candidate mean 2.4411473274230957, baseline epoch 0 mean 2.4411473274230957, difference 0.0
[2m[36m(run pid=69944)[0m Start train epoch 1, 

100%|██████████| 1/1 [00:00<00:00, 235.89it/s][32m [repeated 12x across cluster][0m


[2m[36m(run pid=69958)[0m epoch: 0, train_batch_id: 0, avg_cost: 2.5773630142211914
[2m[36m(run pid=69958)[0m grad_norm: 25.235061645507812, clipped: 1.0
[2m[36m(run pid=69958)[0m p-value: 0.11460422181120809
[2m[36m(run pid=69958)[0m Set warmup alpha = 1.0
[2m[36m(run pid=69958)[0m Evaluating baseline on dataset...


100%|██████████| 1/1 [00:05<00:00,  5.03s/it][32m [repeated 6x across cluster][0m
100%|██████████| 1/1 [00:00<00:00, 252.21it/s][32m [repeated 6x across cluster][0m
100%|██████████| 1/1 [00:04<00:00,  4.34s/it]
100%|██████████| 1/1 [00:04<00:00,  4.34s/it]
2023-07-19 03:48:37,451	INFO tune.py:1111 -- Total run time: 23.41 seconds (23.40 seconds for the tuning loop).




[2m[36m(run pid=69958)[0m p-value: 0.1450604741367071
[2m[36m(run pid=69958)[0m Finished epoch 1, took 00:00:04 s[32m [repeated 3x across cluster][0m
[2m[36m(run pid=69958)[0m Saving model and state...[32m [repeated 3x across cluster][0m
[2m[36m(run pid=69958)[0m Validating...[32m [repeated 3x across cluster][0m
[2m[36m(run pid=69958)[0m Validation overall avg_cost: 2.465888261795044 +- 0.1560748815536499[32m [repeated 3x across cluster][0m
[2m[36m(run pid=69958)[0m Evaluating candidate model on evaluation dataset[32m [repeated 3x across cluster][0m
[2m[36m(run pid=69958)[0m Epoch 1 candidate mean 2.485405921936035, baseline epoch 0 mean 2.606360912322998, difference -0.12095499038696289[32m [repeated 3x across cluster][0m
[2m[36m(run pid=69958)[0m p-value: 0.1450604741367071
