In [1]:
from rl4co.envs.routing import TSPEnv, TSPGenerator
from rl4co.models import DeepACOPolicy, DeepACO
from rl4co.utils import RL4COTrainer


from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary
from lightning.pytorch.loggers import WandbLogger

from rl4co.models.zoo.deepaco.antsystem import AntSystem
from rl4co.models.zoo.hdgaco.FocusedACO import FocusedACO

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Instantiate generator and environment
num_loc = 50
generator = TSPGenerator(num_loc=num_loc, loc_distribution="uniform")
env = TSPEnv(generator)

In [None]:

policy = DeepACOPolicy(env_name=env.name, 
                     aco_class=FocusedACO,
                     k_sparse=num_loc, 
                     train_with_local_search=True,
                     aco_kwargs={"use_local_search": True})
model = DeepACO(env, 
              policy, 
              batch_size=512, 
              train_data_size=10_000,
              val_data_size=1_000,
              val_batch_size=512,
              test_data_size=1_000, 
              optimizer_kwargs={"lr": 1e-4}
              )

logger = WandbLogger(project="hdgaco", name="tsp_50_deepaco_faco")

/home/shora/Research/rl4co/.venv/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.
/home/shora/Research/rl4co/.venv/lib/python3.12/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.


In [4]:
checkpoint_callback = ModelCheckpoint(  dirpath="checkpoints/gfacs_mmas_{num_loc}", # save to checkpoints/
                                        filename="epoch_{epoch:03d}",  # save as epoch_XXX.ckpt
                                        save_top_k=1, # save only the best model
                                        save_last=True, # save the last model
                                        monitor="val/reward", # monitor validation reward
                                        mode="max") # maximize validation reward


rich_model_summary = RichModelSummary(max_depth=3)

callbacks = [checkpoint_callback, rich_model_summary]


In [5]:
trainer = RL4COTrainer(
    max_epochs=20,
    accelerator="gpu",
    devices=1,
    logger=logger,
    callbacks=callbacks,
)

Using 16bit Automatic Mixed Precision (AMP)
Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.rich_model_summary.RichModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [6]:
trainer.fit(model)


[34m[1mwandb[0m: Currently logged in as: [33mshoraaa[0m ([33mshoraaa-vnu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


/home/shora/Research/rl4co/.venv/lib/python3.12/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:658: Checkpoint directory /home/shora/Research/rl4co/checkpoints/gfacs_mmas_{num_loc} exists and is not empty.
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/home/shora/Research/rl4co/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


                                                                           

/home/shora/Research/rl4co/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
/home/shora/Research/rl4co/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (20) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 19: 100%|██████████| 20/20 [00:31<00:00,  0.64it/s, v_num=37pl, train/reward=-7.22, train/loss=-0.19, val/reward=-5.67] 

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 20/20 [00:31<00:00,  0.64it/s, v_num=37pl, train/reward=-7.22, train/loss=-0.19, val/reward=-5.67]


In [7]:
trainer.test(model)

val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/shora/Research/rl4co/.venv/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 2/2 [00:10<00:00,  0.19it/s]


[{'test/reward': -5.68765115737915}]