In [1]:
%load_ext autoreload
%autoreload 2

import torch
import numpy as np

from rl4co.envs import CVRPTWEnv, EVRPTWEnv 
from rl4co.models import AttentionModelPolicy, REINFORCE, SymNCO, PPO, POMO, RewardConstrainedPOMO
from rl4co.utils.trainer import RL4COTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import rl4co
print(rl4co.__file__)

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\rl4co\__init__.py


In [3]:
import sys
# sys.path.remove(r"c:\users\hyosi\onedrive\ut\2024 fall\mie1666\project\code\rl4evrptw\rl4co")

for path in sys.path:
    print(path)

c:\Users\hyosi\anaconda3\envs\rl4co\python311.zip
c:\Users\hyosi\anaconda3\envs\rl4co\DLLs
c:\Users\hyosi\anaconda3\envs\rl4co\Lib
c:\Users\hyosi\anaconda3\envs\rl4co

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\win32
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\win32\lib
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\Pythonwin
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\setuptools\_vendor


In [4]:
device = torch.device("cuda")
def enforce_reproducibility(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# [num_loc, num_station, num_ev]
settings =[[10, 3, 3], [20, 3, 3], [50, 6, 6], [100, 12,12]]
hard_envs = []
td_tests = []   # Hard env setting for test (cf. get_action_mask() is different)
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    hard_envs.append(env)
    td_init = env.reset(batch_size=[100]).to(device)
    td_tests.append(td_init)
# hard_env_10, hard_env_20, hard_env_50, hard_env_100 = hard_envs[0], hard_envs[1], hard_envs[2], hard_envs[3]
# td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST = td_tests[0], td_tests[1], td_tests[2], td_tests[3]

soft_envs = []
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    env.soft = True ## Soft setting
    soft_envs.append(env)
# soft_env_10, soft_env_20, soft_env_50, soft_env_100 = soft_envs[0], soft_envs[1], soft_envs[2], soft_envs[3]


# MAX_EPOCH = 50
# BATCH_SIZE = 512
# TRAIN_DATA_SIZE = BATCH_SIZE * 200
# VAL_DATA_SIZE = BATCH_SIZE * 50
MAX_EPOCH = 10
BATCH_SIZE = 512
TRAIN_DATA_SIZE = BATCH_SIZE * 200
VAL_DATA_SIZE = BATCH_SIZE * 50



In [7]:
soft_policies = []
for soft_env in soft_envs:
    soft_policy = AttentionModelPolicy(env_name=soft_env.name,
                              embed_dim=256,
                              num_encoder_layers=6,
                              num_heads=8,)
    soft_policies.append(soft_policy)

soft_models = []
for soft_env, soft_policy in zip(soft_envs, soft_policies):
    soft_model = RewardConstrainedPOMO(soft_env,
                soft_policy,
                 # baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, 
                                  "weight_decay": 1e-6})
    soft_models.append(soft_model)


c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.


In [8]:
def validity_check(rewards, infeasibility, soft=False):
    valid_rewards = []
    if soft:
        for infeasible, reward in zip(infeasibility, rewards):
            if -reward > 1000 or infeasible:
                pass
            else:
                valid_rewards.append(reward)
    else:
        for reward in  rewards:
            if -reward > 1000:
                pass
            else:
                valid_rewards.append(reward)
    return np.array(valid_rewards)


def get_reward_and_check(policy, test_data, env_scale, soft=False):
    rewards_trained = []
    num_valids = []
    for td_i, env_i in zip(test_data, env_scale):
        out = policy(td_i.clone(), 
                    env=env_i, 
                    phase="test", 
                    feasibility_check=True, 
                    decode_type="greedy", 
                    return_actions=True)
        valid_out = validity_check(out['reward'].cpu().numpy(), out["infeasibility"], soft=soft)
        rewards_trained.append(valid_out) 
        if soft:
            num_feasible = td_i.batch_size - sum(out["infeasibility"]).cpu().numpy()
            num_valids.append(len(valid_out)+num_feasible)
        else:
            num_valids.append(len(valid_out))       
    return rewards_trained, num_valids


In [9]:
trainer_C_STEP1 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP1.fit(soft_models[0])

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 200/200 [00:22<00:00,  9.00it/s, v_num=362, train/reward=-3.59, train/loss=-0.126, val/reward=-3.60]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [00:22<00:00,  8.95it/s, v_num=362, train/reward=-3.59, train/loss=-0.126, val/reward=-3.60]


In [10]:
scale = [10, 20, 50, 100]
policy_c0 = soft_models[0].to(device)
# Validity check is done with hard envs
rewards_c_trained, num_c_valid = get_reward_and_check(policy_c0, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=10, S=3, EV=3 (Constrained)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=10, S=3, EV=3 (Constrained)
Scale: 10 | FeasibleCounts: 70 | Mean Trained Test Cost (Constrained): 6.631802
Scale: 20 | FeasibleCounts: 13 | Mean Trained Test Cost (Constrained): 7.999508
Scale: 50 | FeasibleCounts: 19 | Mean Trained Test Cost (Constrained): 15.868690
Scale: 100 | FeasibleCounts: 49 | Mean Trained Test Cost (Constrained): 29.106329


In [11]:
del rewards_c_trained, num_c_valid 
torch.cuda.empty_cache() 

In [12]:
trainer_C_STEP1 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP1.fit(soft_models[1])

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 9: 100%|██████████| 200/200 [00:38<00:00,  5.26it/s, v_num=363, train/reward=-5.40, train/loss=-0.392, val/reward=-5.31]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [00:38<00:00,  5.24it/s, v_num=363, train/reward=-5.40, train/loss=-0.392, val/reward=-5.31]


In [21]:
scale = [10, 20, 50, 100]
policy_c1 = soft_models[1].to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy_c1, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=20, S=3, EV=3 (Constrained)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=20, S=3, EV=3 (Constrained)
Scale: 10 | FeasibleCounts: 45 | Mean Trained Test Cost (Constrained): 6.912762
Scale: 20 | FeasibleCounts: 1 | Mean Trained Test Cost (Constrained): 8.723574
Scale: 50 | FeasibleCounts: 0 | Mean Trained Test Cost (Constrained): nan
Scale: 100 | FeasibleCounts: 0 | Mean Trained Test Cost (Constrained): nan


  print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")
  ret = ret.dtype.type(ret / rcount)


In [14]:
del rewards_c_trained, num_c_valid 
torch.cuda.empty_cache() 

In [8]:
trainer_C_STEP2 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP2.fit(soft_models[2])

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 200/200 [01:56<00:00,  1.72it/s, v_num=323, train/reward=-10.3, train/loss=-1.81, val/reward=-9.97]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [01:56<00:00,  1.71it/s, v_num=323, train/reward=-10.3, train/loss=-1.81, val/reward=-9.97]


In [20]:
scale = [10, 20, 50, 100]
policy_c2 = soft_models[2].to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy_c2, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=50, S=6, EV=6 (Constrained)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=50, S=6, EV=6 (Constrained)
Scale: 10 | FeasibleCounts: 86 | Mean Trained Test Cost (Constrained): 5.373333
Scale: 20 | FeasibleCounts: 23 | Mean Trained Test Cost (Constrained): 8.215921
Scale: 50 | FeasibleCounts: 14 | Mean Trained Test Cost (Constrained): 15.220946
Scale: 100 | FeasibleCounts: 18 | Mean Trained Test Cost (Constrained): 31.732924


In [5]:
device = torch.device("cuda")
def enforce_reproducibility(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# [num_loc, num_station, num_ev]
settings =[[10, 3, 3], [20, 3, 3], [50, 6, 6], [100, 12,12]]
hard_envs = []
td_tests = []   # Hard env setting for test (cf. get_action_mask() is different)
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    hard_envs.append(env)
    td_init = env.reset(batch_size=[100]).to(device)
    td_tests.append(td_init)
# hard_env_10, hard_env_20, hard_env_50, hard_env_100 = hard_envs[0], hard_envs[1], hard_envs[2], hard_envs[3]
# td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST = td_tests[0], td_tests[1], td_tests[2], td_tests[3]

soft_envs = []
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    env.soft = True ## Soft setting
    soft_envs.append(env)
# soft_env_10, soft_env_20, soft_env_50, soft_env_100 = soft_envs[0], soft_envs[1], soft_envs[2], soft_envs[3]


# MAX_EPOCH = 50
# BATCH_SIZE = 512
# TRAIN_DATA_SIZE = BATCH_SIZE * 200
# VAL_DATA_SIZE = BATCH_SIZE * 50
MAX_EPOCH = 10
BATCH_SIZE = 128
TRAIN_DATA_SIZE = BATCH_SIZE * 200
VAL_DATA_SIZE = BATCH_SIZE * 50

In [6]:
trainer_C_STEP3 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP3.fit(soft_models[3])

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 200/200 [03:10<00:00,  1.05it/s, v_num=334, train/reward=-19.0, train/loss=-6.29, val/reward=-17.8]   

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [03:10<00:00,  1.05it/s, v_num=334, train/reward=-19.0, train/loss=-6.29, val/reward=-17.8]


In [7]:
scale = [10, 20, 50, 100]
policy_c3 = soft_models[3].to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy_c3, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=100, S=12, EV=12 (Constrained)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=100, S=12, EV=12 (Constrained)
Scale: 10 | FeasibleCounts: 83 | Mean Trained Test Cost (Constrained): 5.607179
Scale: 20 | FeasibleCounts: 26 | Mean Trained Test Cost (Constrained): 7.925522
Scale: 50 | FeasibleCounts: 46 | Mean Trained Test Cost (Constrained): 15.370482
Scale: 100 | FeasibleCounts: 78 | Mean Trained Test Cost (Constrained): 29.496206


In [8]:
del hard_envs, soft_envs, soft_models, soft_policies
torch.cuda.empty_cache()

In [9]:
device = torch.device("cuda")
def enforce_reproducibility(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# [num_loc, num_station, num_ev]
settings =[[10, 3, 3], [20, 3, 3], [50, 6, 6], [100, 12,12]]
hard_envs = []
td_tests = []   # Hard env setting for test (cf. get_action_mask() is different)
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    hard_envs.append(env)
    td_init = env.reset(batch_size=[100]).to(device)
    td_tests.append(td_init)
# hard_env_10, hard_env_20, hard_env_50, hard_env_100 = hard_envs[0], hard_envs[1], hard_envs[2], hard_envs[3]
# td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST = td_tests[0], td_tests[1], td_tests[2], td_tests[3]

soft_envs = []
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    env.soft = True ## Soft setting
    soft_envs.append(env)
# soft_env_10, soft_env_20, soft_env_50, soft_env_100 = soft_envs[0], soft_envs[1], soft_envs[2], soft_envs[3]


# MAX_EPOCH = 50
# BATCH_SIZE = 512
# TRAIN_DATA_SIZE = BATCH_SIZE * 200
# VAL_DATA_SIZE = BATCH_SIZE * 50
MAX_EPOCH = 10
BATCH_SIZE = 512
TRAIN_DATA_SIZE = BATCH_SIZE * 200
VAL_DATA_SIZE = BATCH_SIZE * 50

In [6]:
soft_policies = []
for soft_env in soft_envs:
    soft_policy = AttentionModelPolicy(env_name=soft_env.name,
                              embed_dim=256,
                              num_encoder_layers=6,
                              num_heads=8,)
    soft_policies.append(soft_policy)

soft_models = []
for soft_env, soft_policy in zip(soft_envs, soft_policies):
    soft_model = POMO(soft_env,
                soft_policy,
                 # baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, 
                                  "weight_decay": 1e-6})
    soft_models.append(soft_model)


c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.


In [11]:
def validity_check(rewards, infeasibility, soft=False):
    valid_rewards = []
    if soft:
        for infeasible, reward in zip(infeasibility, rewards):
            if -reward > 1000 or infeasible:
                pass
            else:
                valid_rewards.append(reward)
    else:
        for reward in  rewards:
            if -reward > 1000:
                pass
            else:
                valid_rewards.append(reward)
    return np.array(valid_rewards)


def get_reward_and_check(policy, test_data, env_scale, soft=False):
    rewards_trained = []
    num_valids = []
    for td_i, env_i in zip(test_data, env_scale):
        out = policy(td_i.clone(), 
                    env=env_i, 
                    phase="test", 
                    feasibility_check=True, 
                    decode_type="greedy", 
                    return_actions=True)
        valid_out = validity_check(out['reward'].cpu().numpy(), out["infeasibility"], soft=soft)
        rewards_trained.append(valid_out) 
        if soft:
            num_feasible = td_i.batch_size - sum(out["infeasibility"]).cpu().numpy()
            num_valids.append(len(valid_out)+num_feasible)
        else:
            num_valids.append(len(valid_out))       
    return rewards_trained, num_valids


In [12]:
trainer_C_STEP0 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP0.fit(soft_models[0])

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 9: 100%|██████████| 200/200 [00:18<00:00, 10.98it/s, v_num=335, train/reward=-3.27, train/loss=-0.0774, val/reward=-3.27]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [00:18<00:00, 10.90it/s, v_num=335, train/reward=-3.27, train/loss=-0.0774, val/reward=-3.27]


In [13]:
scale = [10, 20, 50, 100]
policy_c0 = soft_models[0].to(device)
# Validity check is done with hard envs
rewards_c_trained, num_c_valid = get_reward_and_check(policy_c0, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=10, S=3, EV=3 (Constrained)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=10, S=3, EV=3 (Constrained)
Scale: 10 | FeasibleCounts: 82 | Mean Trained Test Cost (Constrained): 5.027235
Scale: 20 | FeasibleCounts: 29 | Mean Trained Test Cost (Constrained): 7.132036
Scale: 50 | FeasibleCounts: 16 | Mean Trained Test Cost (Constrained): 14.244226
Scale: 100 | FeasibleCounts: 45 | Mean Trained Test Cost (Constrained): 27.166672


In [14]:
del rewards_c_trained, num_c_valid 
torch.cuda.empty_cache() 

In [15]:
trainer_C_STEP1 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP1.fit(soft_models[1])

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 9: 100%|██████████| 200/200 [00:35<00:00,  5.70it/s, v_num=336, train/reward=-4.39, train/loss=-0.169, val/reward=-4.38]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [00:35<00:00,  5.68it/s, v_num=336, train/reward=-4.39, train/loss=-0.169, val/reward=-4.38]


In [16]:
scale = [10, 20, 50, 100]
policy_c1 = soft_models[1].to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy_c1, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=20, S=3, EV=3 (Constrained)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=20, S=3, EV=3 (Constrained)
Scale: 10 | FeasibleCounts: 59 | Mean Trained Test Cost (Constrained): 6.807777
Scale: 20 | FeasibleCounts: 2 | Mean Trained Test Cost (Constrained): 9.660480
Scale: 50 | FeasibleCounts: 0 | Mean Trained Test Cost (Constrained): nan
Scale: 100 | FeasibleCounts: 1 | Mean Trained Test Cost (Constrained): 31.319660


  print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")
  ret = ret.dtype.type(ret / rcount)


In [17]:
del rewards_c_trained, num_c_valid 
torch.cuda.empty_cache() 

In [18]:
trainer_C_STEP2 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP2.fit(soft_models[2])

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Epoch 9: 100%|██████████| 200/200 [01:56<00:00,  1.71it/s, v_num=337, train/reward=-7.30, train/loss=-1.48, val/reward=-6.89] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [01:56<00:00,  1.71it/s, v_num=337, train/reward=-7.30, train/loss=-1.48, val/reward=-6.89]


In [19]:
scale = [10, 20, 50, 100]
policy_c2 = soft_models[2].to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy_c2, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=50, S=6, EV=6 (Constrained)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=50, S=6, EV=6 (Constrained)
Scale: 10 | FeasibleCounts: 61 | Mean Trained Test Cost (Constrained): 6.242512
Scale: 20 | FeasibleCounts: 5 | Mean Trained Test Cost (Constrained): 8.292636
Scale: 50 | FeasibleCounts: 1 | Mean Trained Test Cost (Constrained): 15.748408
Scale: 100 | FeasibleCounts: 13 | Mean Trained Test Cost (Constrained): 32.167564


In [20]:
del rewards_c_trained, num_c_valid 
torch.cuda.empty_cache() 

In [7]:
device = torch.device("cuda")
def enforce_reproducibility(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# [num_loc, num_station, num_ev]
settings =[[10, 3, 3], [20, 3, 3], [50, 6, 6], [100, 12,12]]
hard_envs = []
td_tests = []   # Hard env setting for test (cf. get_action_mask() is different)
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    hard_envs.append(env)
    td_init = env.reset(batch_size=[100]).to(device)
    td_tests.append(td_init)
# hard_env_10, hard_env_20, hard_env_50, hard_env_100 = hard_envs[0], hard_envs[1], hard_envs[2], hard_envs[3]
# td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST = td_tests[0], td_tests[1], td_tests[2], td_tests[3]

soft_envs = []
for num_loc, num_station, num_ev in settings:
    enforce_reproducibility(0)
    env = EVRPTWEnv(generator_params={'num_loc': num_loc, 
                                        'num_station': num_station,
                                        'vehicle_limit': num_ev,
                                        'vehicle_speed': 5,
                                        'vehicle_capacity': 1.5,
                                        'max_time': 1,
                                        'horizon': 1,
                                        'fuel_consumption_rate': 0.25,
                                        'inverse_recharge_rate': 0.25})
    env.soft = True ## Soft setting
    soft_envs.append(env)
# soft_env_10, soft_env_20, soft_env_50, soft_env_100 = soft_envs[0], soft_envs[1], soft_envs[2], soft_envs[3]


# MAX_EPOCH = 50
# BATCH_SIZE = 512
# TRAIN_DATA_SIZE = BATCH_SIZE * 200
# VAL_DATA_SIZE = BATCH_SIZE * 50
MAX_EPOCH = 10
BATCH_SIZE = 128
TRAIN_DATA_SIZE = BATCH_SIZE * 200
VAL_DATA_SIZE = BATCH_SIZE * 50

In [8]:
trainer_C_STEP3 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_C_STEP3.fit(soft_models[3])

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | SharedBaseline       | 0      | train
----------------------------------------------------------
3.6 M     Trainable params
0         Non-trainable params
3.6 M     Total params
14.241    Total estimated model params size (MB)
126       Modules in train mode
0         Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 200/200 [02:54<00:00,  1.15it/s, v_num=339, train/reward=-10.2, train/loss=-0.869, val/reward=-10.3]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [02:54<00:00,  1.14it/s, v_num=339, train/reward=-10.2, train/loss=-0.869, val/reward=-10.3]


In [9]:
scale = [10, 20, 50, 100]
policy_c3 = soft_models[3].to(device)
rewards_c_trained, num_c_valid = get_reward_and_check(policy_c3, td_tests, hard_envs, soft=False)

print("\nTrained with Environment of C=100, S=12, EV=12 (Constrained)")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")


Trained with Environment of C=100, S=12, EV=12 (Constrained)
Scale: 10 | FeasibleCounts: 42 | Mean Trained Test Cost (Constrained): 6.956683
Scale: 20 | FeasibleCounts: 1 | Mean Trained Test Cost (Constrained): 9.733961
Scale: 50 | FeasibleCounts: 0 | Mean Trained Test Cost (Constrained): nan
Scale: 100 | FeasibleCounts: 0 | Mean Trained Test Cost (Constrained): nan


  print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")
  ret = ret.dtype.type(ret / rcount)
