In [1]:
%load_ext autoreload
%autoreload 2

import torch
import numpy as np

from rl4co.envs import CVRPTWEnv, EVRPTWEnv 
from rl4co.models import AttentionModelPolicy, REINFORCE, SymNCO, PPO, POMO, RewardConstrainedPOMO
from rl4co.utils.trainer import RL4COTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import rl4co
print(rl4co.__file__)

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\rl4co\__init__.py


In [3]:
import sys
# sys.path.remove(r"c:\users\hyosi\onedrive\ut\2024 fall\mie1666\project\code\rl4evrptw\rl4co")

for path in sys.path:
    print(path)

c:\Users\hyosi\anaconda3\envs\rl4co\python311.zip
c:\Users\hyosi\anaconda3\envs\rl4co\DLLs
c:\Users\hyosi\anaconda3\envs\rl4co\Lib
c:\Users\hyosi\anaconda3\envs\rl4co

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\win32
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\win32\lib
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\Pythonwin
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\setuptools\_vendor


In [4]:
device = torch.device("cuda")
def enforce_reproducibility(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

enforce_reproducibility(0)
env_10 = EVRPTWEnv(generator_params={'num_loc': 10, 
                                    'num_station': 3,
                                    'vehicle_limit': 3,
                                    'vehicle_speed': 5,
                                    'vehicle_capacity': 1.5,
                                    'max_time': 1,
                                    'horizon': 1,
                                    'fuel_consumption_rate': 0.25,
                                    'inverse_recharge_rate': 0.25})
td_10_TEST = env_10.reset(batch_size=[100]).to(device)


enforce_reproducibility(0)
env_20 = EVRPTWEnv(generator_params={'num_loc': 20, 
                                     'num_station': 3,
                                     'vehicle_limit': 3,
                                     'vehicle_speed': 5,
                                     'vehicle_capacity': 1.5,
                                     'max_time': 1,
                                     'horizon': 1,
                                     'fuel_consumption_rate': 0.25,
                                     'inverse_recharge_rate': 0.25})
td_20_TEST = env_20.reset(batch_size=[100]).to(device)


enforce_reproducibility(0)
env_50 = EVRPTWEnv(generator_params={'num_loc': 50, 
                                     'num_station': 6,
                                     'vehicle_limit': 6,
                                     'vehicle_speed': 5,
                                     'vehicle_capacity': 1.5,
                                     'max_time': 1,
                                     'horizon': 1,
                                     'fuel_consumption_rate': 0.25,
                                     'inverse_recharge_rate': 0.25})
td_50_TEST = env_50.reset(batch_size=[100]).to(device)


enforce_reproducibility(0)
env_100 = EVRPTWEnv(generator_params={'num_loc': 100,
                                    'num_station': 12,
                                    'vehicle_limit': 12,
                                    'vehicle_speed': 5,
                                    'vehicle_capacity': 1.5,
                                    'max_time': 1,
                                    'horizon': 1,
                                    'fuel_consumption_rate': 0.25,
                                    'inverse_recharge_rate': 0.25})
td_100_TEST = env_100.reset(batch_size=[100]).to(device)

MAX_EPOCH = 10
BATCH_SIZE = 512
TRAIN_DATA_SIZE = BATCH_SIZE * 200
VAL_DATA_SIZE = BATCH_SIZE * 50
# MAX_EPOCH = 1
# BATCH_SIZE = 512
# TRAIN_DATA_SIZE = BATCH_SIZE * 50
# VAL_DATA_SIZE = BATCH_SIZE * 10



In [5]:
# Policy: neural network, in this case with encoder-decoder architecture
policy1 = AttentionModelPolicy(env_name=env_10.name,
                              embed_dim=256,
                              num_encoder_layers=6,
                              num_heads=8,)

policy2 = AttentionModelPolicy(env_name=env_20.name,
                              embed_dim=256,
                              num_encoder_layers=6,
                              num_heads=8,)

policy5 = AttentionModelPolicy(env_name=env_50.name,
                              embed_dim=256,
                              num_encoder_layers=6,
                              num_heads=8,)

model_10 = REINFORCE(env_10,
                policy1,
                 baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, 
                                  "weight_decay": 1e-6})

model_20 = REINFORCE(env_20,
                policy2,
                baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, 
                                "weight_decay": 1e-6})

model_50 = REINFORCE(env_50,
                policy5,
                baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, 
                                "weight_decay": 1e-6})




c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.


In [6]:
# Greedy rollouts over untrained policy
device = torch.device("cuda")

policy = policy1.to(device)
out_10 = policy(td_10_TEST.clone(), env=env_10, phase="test", decode_type="greedy", return_actions=True)
out_20 = policy(td_20_TEST.clone(), env=env_20, phase="test", decode_type="greedy", return_actions=True)
out_50 = policy(td_50_TEST.clone(), env=env_50, phase="test", decode_type="greedy", return_actions=True)
out_100 = policy(td_100_TEST.clone(), env=env_100, phase="test", decode_type="greedy", return_actions=True)

rewards_untrained_10 = out_10['reward'].cpu().numpy()
rewards_untrained_20 = out_20['reward'].cpu().numpy()
rewards_untrained_50 = out_50['reward'].cpu().numpy()
rewards_untrained_100 = out_100['reward'].cpu().numpy()
rewards = [rewards_untrained_10, rewards_untrained_20, rewards_untrained_50, rewards_untrained_100]
# print(f"Scale: 10 | Mean Untrained Test Cost: {-rewards_untrained_10.mean():3f}")
scale = [10, 20, 50, 100]
for i in range(4):
    print(f"Scale: {scale[i]} | Mean Untrained Test Cost: {-rewards[i].mean():3f}")

Scale: 10 | Mean Untrained Test Cost: 9.408985
Scale: 20 | Mean Untrained Test Cost: 17.372381
Scale: 50 | Mean Untrained Test Cost: 40.112305
Scale: 100 | Mean Untrained Test Cost: 75.287361


In [7]:
del out_10, out_20, out_50, out_100
torch.cuda.empty_cache() 

In [3]:
def validity_check(rewards, raw=False):
    valid_rewards = []
    for reward in rewards:
        if -reward > 1000:
            pass
        else:
            valid_rewards.append(reward)
    return np.array(valid_rewards)

def get_reward_and_check(policy, test_data, env_scale):
    rewards_trained = []
    rewards_trained_for_fesibility = []
    num_valids = []
    for td_i, env_i in zip(test_data, env_scale):
        out = policy(td_i.clone(), 
                    env=env_i, 
                    phase="test", 
                    feasibility_check=True, 
                    decode_type="greedy", 
                    return_actions=True)
        valid_out = validity_check(out['reward'].cpu().numpy())
        rewards_trained.append(valid_out)
        num_valids.append(len(valid_out))

    return rewards_trained, num_valids

In [9]:
trainer_STEP1 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_STEP1.fit(model_10)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | WarmupBaseline       | 3.6 M  | train
----------------------------------------------------------
7.1 M     Trainable params
0         Non-trainable params
7.1 M     Total params
28.482    Total estimated model params size (MB)
128       Modules in train mode
124       Modules in eval mode


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 200/200 [00:19<00:00, 10.28it/s, v_num=357, train/reward=-4.24, train/loss=-0.152, val/reward=-4.21]   

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [00:24<00:00,  8.07it/s, v_num=357, train/reward=-4.24, train/loss=-0.152, val/reward=-4.21]


In [10]:
td_scale_test = [td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST]
env_scale = [env_10, env_20, env_50, env_100]
scale = [10, 20, 50, 100]

policy1 = policy1.to(device)
rewards_trained, num_valid = get_reward_and_check(policy1, td_scale_test, env_scale)
# print(rewards_trained)
print("Trained with Environment of C=10, S=3, EV=3")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_valid[i]} | Mean Trained Test Cost: {-rewards_trained[i].mean():3f}")


# policy_c1 = policy_c1.to(device)
# rewards_c_trained, num_c_valid = get_reward_and_check(policy_c1, td_scale_test, env_scale)
# print("\nTrained with Environment of C=10, S=3, EV=3 (Constrained)")
# for i, s in enumerate(scale):
#     print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")

Trained with Environment of C=10, S=3, EV=3
Scale: 10 | FeasibleCounts: 100 | Mean Trained Test Cost: 4.357929
Scale: 20 | FeasibleCounts: 95 | Mean Trained Test Cost: 6.851532
Scale: 50 | FeasibleCounts: 100 | Mean Trained Test Cost: 13.800399
Scale: 100 | FeasibleCounts: 100 | Mean Trained Test Cost: 24.466507


In [11]:
# del rewards_trained, rewards_c_trained, num_valid
del rewards_trained, num_valid
torch.cuda.empty_cache()

In [12]:
trainer_STEP2 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_STEP2.fit(model_20)

# trainer_C_STEP2 = RL4COTrainer(
#     max_epochs=MAX_EPOCH,
#     accelerator="gpu",
#     devices=1,
#     logger=None,
# )
# trainer_C_STEP2.fit(model_constrained_20)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | WarmupBaseline       | 3.6 M  | train
----------------------------------------------------------
7.1 M     Trainable params
0         Non-trainable params
7.1 M     Total params
28.482    Total estimated model params size (MB)
128       Modules in train mode
124       Modules in eval mode


Epoch 9: 100%|██████████| 200/200 [00:29<00:00,  6.82it/s, v_num=358, train/reward=-6.93, train/loss=-0.668, val/reward=-6.71]  

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [00:37<00:00,  5.28it/s, v_num=358, train/reward=-6.93, train/loss=-0.668, val/reward=-6.71]


In [13]:
td_scale_test = [td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST]
env_scale = [env_10, env_20, env_50, env_100]
scale = [10, 20, 50, 100]


policy2 = policy2.to(device)
rewards_trained, num_valid = get_reward_and_check(policy2, td_scale_test, env_scale)
print("Trained with Environment of C=20, S=3, EV=3")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_valid[i]} | Mean Trained Test Cost: {-rewards_trained[i].mean():3f}")


# policy_c2 = policy_c2.to(device)
# rewards_c_trained, num_c_valid = get_reward_and_check(policy_c2, td_scale_test, env_scale)
# print("\nTrained with Environment of C=20, S=3, EV=3 (Constrained)")
# for i, s in enumerate(scale):
#     print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")

Trained with Environment of C=20, S=3, EV=3
Scale: 10 | FeasibleCounts: 100 | Mean Trained Test Cost: 4.389651
Scale: 20 | FeasibleCounts: 95 | Mean Trained Test Cost: 6.696898
Scale: 50 | FeasibleCounts: 100 | Mean Trained Test Cost: 12.970035
Scale: 100 | FeasibleCounts: 100 | Mean Trained Test Cost: 22.611994


In [14]:
del rewards_trained, num_valid
torch.cuda.empty_cache()

In [15]:
trainer_STEP3 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_STEP3.fit(model_50)

# trainer_c_STEP3 = RL4COTrainer(
#     max_epochs=MAX_EPOCH,
#     accelerator="gpu",
#     devices=1,
#     logger=None,
# )
# trainer_c_STEP3.fit(model_constrained_50)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      | train
1 | policy   | AttentionModelPolicy | 3.6 M  | train
2 | baseline | WarmupBaseline       | 3.6 M  | train
----------------------------------------------------------
7.1 M     Trainable params
0         Non-trainable params
7.1 M     Total params
28.482    Total estimated model params size (MB)
128       Modules in train mode
124       Modules in eval mode


Epoch 9: 100%|██████████| 200/200 [00:52<00:00,  3.80it/s, v_num=359, train/reward=-12.8, train/loss=-5.50, val/reward=-12.6] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [01:00<00:00,  3.28it/s, v_num=359, train/reward=-12.8, train/loss=-5.50, val/reward=-12.6]


In [16]:
td_scale_test = [td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST]
env_scale = [env_10, env_20, env_50, env_100]
scale = [10, 20, 50, 100]


policy5 = policy5.to(device)
rewards_trained, num_valid = get_reward_and_check(policy5, td_scale_test, env_scale)
print("Trained with Environment of C=50, S=6, EV=6")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_valid[i]} | Mean Trained Test Cost: {-rewards_trained[i].mean():3f}")


# policy_c5 = policy_c5.to(device)
# rewards_c_trained, num_c_valid = get_reward_and_check(policy_c5, td_scale_test, env_scale)
# print("\nTrained with Environment of C=50, S=6, EV=6")
# for i, s in enumerate(scale):
#     print(f"Scale: {s} | FeasibleCounts: {num_c_valid[i]} | Mean Trained Test Cost (Constrained): {-rewards_c_trained[i].mean():3f}")

Trained with Environment of C=50, S=6, EV=6
Scale: 10 | FeasibleCounts: 100 | Mean Trained Test Cost: 4.580757
Scale: 20 | FeasibleCounts: 83 | Mean Trained Test Cost: 6.744152
Scale: 50 | FeasibleCounts: 99 | Mean Trained Test Cost: 12.632970
Scale: 100 | FeasibleCounts: 100 | Mean Trained Test Cost: 20.994280


In [17]:
del rewards_trained, num_valid
torch.cuda.empty_cache()

In [5]:
# For C=100

MAX_EPOCH = 10
BATCH_SIZE_100 = 128
TRAIN_DATA_SIZE_100 = BATCH_SIZE_100 * 200
VAL_DATA_SIZE_100 = BATCH_SIZE_100 * 50


policy100 = AttentionModelPolicy(env_name=env_100.name,
                              embed_dim=256,
                              num_encoder_layers=6,
                              num_heads=8,)

model_100 = REINFORCE(env_100,
                policy100,
                baseline="rollout",
                batch_size=BATCH_SIZE,
                train_data_size=TRAIN_DATA_SIZE,
                val_data_size=VAL_DATA_SIZE,
                optimizer_kwargs={"lr": 1e-4, 
                                  "weight_decay": 1e-6})

trainer_STEP4 = RL4COTrainer(
    max_epochs=MAX_EPOCH,
    accelerator="gpu",
    devices=1,
    logger=None,
)
trainer_STEP4.fit(model_100)

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.
c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\utilities\parsing.py:208: Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | env      | EVRPTWEnv            | 0      |

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


                                                                           

c:\Users\hyosi\anaconda3\envs\rl4co\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 200/200 [01:51<00:00,  1.80it/s, v_num=361, train/reward=-21.5, train/loss=-24.0, val/reward=-20.8] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 200/200 [02:27<00:00,  1.35it/s, v_num=361, train/reward=-21.5, train/loss=-24.0, val/reward=-20.8]


In [6]:
td_scale_test = [td_10_TEST, td_20_TEST, td_50_TEST, td_100_TEST]
env_scale = [env_10, env_20, env_50, env_100]
scale = [10, 20, 50, 100]


policy100 = policy100.to(device)
rewards_trained, num_valid = get_reward_and_check(policy100, td_scale_test, env_scale)
print("Trained with Environment of C=100, S=12, EV=12")
for i, s in enumerate(scale):
    print(f"Scale: {s} | FeasibleCounts: {num_valid[i]} | Mean Trained Test Cost: {-rewards_trained[i].mean():3f}")


Trained with Environment of C=100, S=12, EV=12
Scale: 10 | FeasibleCounts: 100 | Mean Trained Test Cost: 4.901434
Scale: 20 | FeasibleCounts: 57 | Mean Trained Test Cost: 7.132657
Scale: 50 | FeasibleCounts: 94 | Mean Trained Test Cost: 12.950029
Scale: 100 | FeasibleCounts: 100 | Mean Trained Test Cost: 20.851795
