In [1]:
%load_ext autoreload
%autoreload 2

import torch
import numpy as np

from rl4co.envs import CVRPTWEnv, EVRPTWEnv 
from rl4co.models import AttentionModelPolicy, REINFORCE, SymNCO, PPO, POMO, RewardConstrainedPOMO
from rl4co.utils.trainer import RL4COTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import rl4co
print(rl4co.__file__)

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\rl4co\__init__.py


In [3]:
import sys
# sys.path.remove(r"c:\users\hyosi\onedrive\ut\2024 fall\mie1666\project\code\rl4evrptw\rl4co")

for path in sys.path:
    print(path)

c:\Users\hyosi\anaconda3\envs\rl4co\python311.zip
c:\Users\hyosi\anaconda3\envs\rl4co\DLLs
c:\Users\hyosi\anaconda3\envs\rl4co\Lib
c:\Users\hyosi\anaconda3\envs\rl4co

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\win32
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\win32\lib
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\Pythonwin
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\setuptools\_vendor


In [4]:
device = torch.device("cuda")
def enforce_reproducibility(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# [num_loc, num_station, num_ev]
settings =[[10, 3, 3], [20, 3, 3], [50, 6, 6], [100, 12,12]]
hard_envs = []
td_tests = []   # Hard env setting for test (cf. get_action_mask() is different)
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    hard_envs.append(env)
    td_init = env.reset(batch_size=[100]).to(device)
    td_tests.append(td_init)
# hard_env_10, hard_env_20, hard_env_50, hard_env_100 = hard_envs[0], hard_envs[1], hard_envs[2], hard_envs[3]
# td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST = td_tests[0], td_tests[1], td_tests[2], td_tests[3]

soft_envs = []
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    env.soft = True ## Soft setting
    soft_envs.append(env)
# soft_env_10, soft_env_20, soft_env_50, soft_env_100 = soft_envs[0], soft_envs[1], soft_envs[2], soft_envs[3]


# MAX_EPOCH = 50
# BATCH_SIZE = 512
# TRAIN_DATA_SIZE = BATCH_SIZE * 200
# VAL_DATA_SIZE = BATCH_SIZE * 50
MAX_EPOCH = 5
BATCH_SIZE = 512
TRAIN_DATA_SIZE = BATCH_SIZE * 200
VAL_DATA_SIZE = BATCH_SIZE * 50



In [4]:
soft_policies = []
for soft_env in soft_envs:
    soft_policy = AttentionModelPolicy(env_name=soft_env.name,
                              embed_dim=256,
                              num_encoder_layers=6,
                              num_heads=8,)
    soft_policies.append(soft_policy)

soft_models = []
for soft_env, soft_policy in zip(soft_envs, soft_policies):
    soft_model = RewardConstrainedPOMO(soft_env,
                soft_policy,
                 # baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, 
                                  "weight_decay": 1e-6})
    soft_models.append(soft_model)


c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.


In [2]:
def validity_check(rewards, infeasibility, soft=False):
    valid_rewards = []
    if soft:
        for infeasible, reward in zip(infeasibility, rewards):
            if -reward > 1000 or infeasible:
                pass
            else:
                valid_rewards.append(reward)
    else:
        for reward in  rewards:
            if -reward > 1000:
                pass
            else:
                valid_rewards.append(reward)
    return np.array(valid_rewards)


def get_reward_and_check(policy, test_data, env_scale, soft=False):
    rewards_trained = []
    num_valids = []
    for td_i, env_i in zip(test_data, env_scale):
        out = policy(td_i.clone(), 
                    env=env_i, 
                    phase="test", 
                    feasibility_check=True, 
                    decode_type="greedy", 
                    return_actions=True)
        valid_out = validity_check(out['reward'].cpu().numpy(), out["infeasibility"], soft=soft)
        rewards_trained.append(valid_out) 
        if soft:
            num_feasible = td_i.batch_size - sum(out["infeasibility"]).cpu().numpy()
            num_valids.append(len(valid_out)+num_feasible)
        else:
            num_valids.append(len(valid_out))       
    return rewards_trained, num_valids


In [7]:
# Soft training (w/o masking)
trainer_C_STEP = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP.fit(soft_models[0])

# Hard training (w/ masking)
hard_model = POMO(
                hard_envs[0],
                soft_policies[0],
                # baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, "weight_decay": 1e-6}
            )

trainer_STEP = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_STEP.fit(hard_model)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 4: 100%|██████████| 200/200 [00:21<00:00,  9.34it/s, v_num=340, train/reward=-3.66, train/loss=-0.178, val/reward=-3.65]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 200/200 [00:21<00:00,  9.28it/s, v_num=340, train/reward=-3.66, train/loss=-0.178, val/reward=-3.65]

Using 16bit Automatic Mixed Precision (AMP)





GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 200/200 [00:19<00:00, 10.31it/s, v_num=341, train/reward=-4.61, train/loss=-0.0642, val/reward=-4.54]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 200/200 [00:19<00:00, 10.25it/s, v_num=341, train/reward=-4.61, train/loss=-0.0642, val/reward=-4.54]


In [8]:
scale = [10, 20, 50, 100]
policy = hard_model.to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=10, S=3, EV=3 (Soft -> Hard)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=10, S=3, EV=3 (Soft -> Hard)
Scale: 10 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 4.238647
Scale: 20 | FeasibleCounts: 96 | Mean Trained Test Cost (Constrained): 6.536129
Scale: 50 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 13.090445
Scale: 100 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 23.495939


In [9]:
del rewards_c_trained, num_c_valid, hard_model, policy, trainer_STEP, trainer_C_STEP
torch.cuda.empty_cache() 

In [10]:
# Soft training (w/o masking)
trainer_C_STEP = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP.fit(soft_models[1])

# Hard training (w/ masking)
hard_model = POMO(
                hard_envs[1],
                soft_policies[1],
                # baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, "weight_decay": 1e-6}
            )

trainer_STEP = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_STEP.fit(hard_model)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 200/200 [00:37<00:00,  5.34it/s, v_num=342, train/reward=-5.64, train/loss=-0.527, val/reward=-5.46]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 200/200 [00:37<00:00,  5.32it/s, v_num=342, train/reward=-5.64, train/loss=-0.527, val/reward=-5.46]

Using 16bit Automatic Mixed Precision (AMP)





GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 200/200 [00:32<00:00,  6.12it/s, v_num=343, train/reward=-7.07, train/loss=-0.163, val/reward=-6.92]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 200/200 [00:32<00:00,  6.10it/s, v_num=343, train/reward=-7.07, train/loss=-0.163, val/reward=-6.92]


In [11]:
scale = [10, 20, 50, 100]
policy = hard_model.to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=20, S=3, EV=3 (Soft -> Hard)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=20, S=3, EV=3 (Soft -> Hard)
Scale: 10 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 4.296908
Scale: 20 | FeasibleCounts: 98 | Mean Trained Test Cost (Constrained): 6.431041
Scale: 50 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 12.404896
Scale: 100 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 25.360874


In [12]:
del rewards_c_trained, num_c_valid, hard_model, policy, trainer_STEP, trainer_C_STEP
torch.cuda.empty_cache() 

In [13]:
# Soft training (w/o masking)
trainer_C_STEP = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP.fit(soft_models[2])

# Hard training (w/ masking)
hard_model = POMO(
                hard_envs[2],
                soft_policies[2],
                # baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, "weight_decay": 1e-6}
            )

trainer_STEP = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_STEP.fit(hard_model)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 200/200 [02:00<00:00,  1.66it/s, v_num=344, train/reward=-10.5, train/loss=-1.42, val/reward=-10.2] 

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 200/200 [02:00<00:00,  1.66it/s, v_num=344, train/reward=-10.5, train/loss=-1.42, val/reward=-10.2]


Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 200/200 [01:28<00:00,  2.26it/s, v_num=345, train/reward=-12.3, train/loss=-0.544, val/reward=-12.1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 200/200 [01:28<00:00,  2.26it/s, v_num=345, train/reward=-12.3, train/loss=-0.544, val/reward=-12.1]


In [14]:
scale = [10, 20, 50, 100]
policy = hard_model.to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=50, S=6, EV=6 (Soft -> Hard)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=50, S=6, EV=6 (Soft -> Hard)
Scale: 10 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 4.565583
Scale: 20 | FeasibleCounts: 86 | Mean Trained Test Cost (Constrained): 6.509837
Scale: 50 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 11.726592
Scale: 100 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 20.970861


In [15]:
del rewards_c_trained, num_c_valid, hard_model, policy, trainer_STEP, trainer_C_STEP
del hard_envs, soft_envs, td_tests
torch.cuda.empty_cache() 

In [3]:
device = torch.device("cuda")
def enforce_reproducibility(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# [num_loc, num_station, num_ev]
settings =[[10, 3, 3], [20, 3, 3], [50, 6, 6], [100, 12,12]]
hard_envs = []
td_tests = []   # Hard env setting for test (cf. get_action_mask() is different)
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    hard_envs.append(env)
    td_init = env.reset(batch_size=[100]).to(device)
    td_tests.append(td_init)

soft_envs = []
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    env.soft = True ## Soft setting
    soft_envs.append(env)

# MAX_EPOCH = 50
# BATCH_SIZE = 512
# TRAIN_DATA_SIZE = BATCH_SIZE * 200
# VAL_DATA_SIZE = BATCH_SIZE * 50
MAX_EPOCH = 5
BATCH_SIZE = 128
TRAIN_DATA_SIZE = BATCH_SIZE * 200
VAL_DATA_SIZE = BATCH_SIZE * 50



In [5]:
# Soft training (w/o masking)
trainer_C_STEP = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP.fit(soft_models[3])

# Hard training (w/ masking)
hard_model = POMO(
                hard_envs[3],
                soft_policies[3],
                # baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, "weight_decay": 1e-6}
            )

trainer_STEP = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_STEP.fit(hard_model)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 4: 100%|██████████| 200/200 [03:03<00:00,  1.09it/s, v_num=347, train/reward=-18.1, train/loss=-3.22, val/reward=-17.3]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 200/200 [03:03<00:00,  1.09it/s, v_num=347, train/reward=-18.1, train/loss=-3.22, val/reward=-17.3]


Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 4: 100%|██████████| 200/200 [02:22<00:00,  1.40it/s, v_num=348, train/reward=-20.0, train/loss=-1.76, val/reward=-19.5]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 200/200 [02:23<00:00,  1.40it/s, v_num=348, train/reward=-20.0, train/loss=-1.76, val/reward=-19.5]


In [6]:
scale = [10, 20, 50, 100]
policy = hard_model.to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=100, S=12, EV=12 (Soft -> Hard)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=100, S=12, EV=12 (Soft -> Hard)
Scale: 10 | FeasibleCounts: 99 | Mean Trained Test Cost (Constrained): 4.621468
Scale: 20 | FeasibleCounts: 77 | Mean Trained Test Cost (Constrained): 6.787745
Scale: 50 | FeasibleCounts: 99 | Mean Trained Test Cost (Constrained): 12.109081
Scale: 100 | FeasibleCounts: 100 | Mean Trained Test Cost (Constrained): 19.127737
