In [1]:
import sys
import time
import json
from alpyne.client.alpyne_client import AlpyneClient
from stable_baselines3 import PPO
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.env_util import make_vec_env

sys.path.append("../..")
from thesis.envs.matrix_routing_centralized import MatrixRoutingCentral
from thesis.policies.routing_attention import RoutingFE
from thesis.policies.ppo_ac_attention import AttentionACPolicy

seed = 42
set_random_seed(seed)

ModuleNotFoundError: No module named 'thesis.policies.routing_attention'

In [None]:
models_dir = "../../models/MiniMatrix_Routing_Attn"
logdir = "../../logs/MiniMatrix_Routing_Attn"
fleetsize = 6
max_fleetsize = 10
run_name = f"PPO-{fleetsize}-{max_fleetsize}-{time.strftime('%d_%m-%H_%M_%S')}-{seed}"

In [None]:
env_args = dict(
        reward_target = 1, 
        reward_distance = 0.05,
        reward_block = -0.5, 
        dispatchinginterval=30,
        routinginterval = 2,
        withCollisions = True,
        blockTimeout = 5,
        routingOnNode = False,
        # coordinates = False,
        includeNodesInReach = True,
    )

ppo_args = dict(
    #learning_rate = 3e-3,
    n_steps = 64,
    batch_size = 512,
    #ent_coef = 0.2,
    target_kl = 0.003,
    gamma = 0.7,
    clip_range = 0.3
)
fe_args = dict(
    max_fleetsize=max_fleetsize,
    embed_dim = 64,
    n_heads = 8,
    depth = 8
)
net_arch = [dict(pi = [], vf = [])]

hparams = dict(
    fleetsize = fleetsize,
    max_fleetsize = max_fleetsize,
    env_args = env_args,
    ppo_args = ppo_args,
    fe_args = fe_args,
    net_arch = net_arch
)
with open(f"{models_dir}/{run_name}.json", 'w') as outfile:
    json.dump(hparams, outfile, indent = 3)

In [None]:
i = [0]

client = AlpyneClient("../../envs/MiniMatrix.zip", port=51150)

env = make_vec_env(MatrixRoutingCentral, 8, env_kwargs=dict(
    max_seconds = 5*60, 
    fleetsize = fleetsize, 
    max_fleetsize=max_fleetsize, 
    config_args = env_args,
    counter = i,
    client = client
))

model =PPO(
    AttentionACPolicy,
    env, 
    tensorboard_log= logdir,
    device = "cuda",
    policy_kwargs=dict(
        net_arch = net_arch,
        features_extractor_class=RoutingFE, 
        features_extractor_kwargs=fe_args
        ),
    **ppo_args,
    )

  warn(f"Unzipping to temporary directory ({tmp_dir})")


In [None]:
TIMESTEPS = 50000
for i in range(1, 15):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=run_name)#,callback=MiniMatrixCallback())
    model.save(f"{models_dir}/{run_name}-{TIMESTEPS * i}")

In [None]:
import torch
torch.Tensor([1,2,3]).max(dim=0)[0]

tensor(3.)

In [None]:
model.policy.features_extractor.mask[3]

tensor([[0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])

In [None]:
torch.rand(8,10,15)[torch.rand(8,10)<0.2].shape

torch.Size([16, 15])

In [None]:
import torch 
reshaped = torch.rand(8,10,15)
reshaped[model.policy.features_extractor.mask.squeeze().detach() == 1]

tensor([[4.6802e-01, 4.1489e-01, 1.1272e-01, 9.0595e-01, 5.7178e-01, 9.3568e-01,
         9.8407e-01, 2.0722e-01, 5.0167e-01, 9.6639e-02, 9.8368e-02, 9.3464e-01,
         6.1793e-01, 7.2499e-02, 2.1527e-01],
        [1.6276e-01, 2.5012e-01, 4.0997e-01, 5.0386e-01, 6.8810e-01, 6.3436e-01,
         5.5789e-01, 7.5241e-01, 7.3755e-01, 8.2399e-01, 1.3555e-02, 8.8460e-02,
         5.8754e-01, 8.7182e-02, 4.0445e-01],
        [6.1244e-01, 3.5204e-02, 2.7557e-01, 1.6139e-01, 2.2698e-01, 4.8263e-01,
         6.5265e-01, 8.8682e-01, 4.5719e-01, 3.3674e-01, 5.2818e-01, 4.6900e-01,
         4.9338e-01, 3.8698e-01, 5.6798e-01],
        [3.1807e-01, 9.3433e-01, 6.1566e-01, 6.4365e-01, 8.3279e-02, 7.8307e-01,
         8.2076e-01, 7.2095e-01, 6.6044e-02, 7.9812e-01, 6.4318e-01, 1.6442e-01,
         3.1523e-01, 7.0239e-01, 3.7467e-01],
        [6.3371e-02, 5.1225e-01, 6.8430e-01, 2.0919e-01, 2.1251e-01, 7.9288e-01,
         5.9515e-01, 1.0767e-01, 3.7688e-01, 3.8917e-01, 8.6497e-01, 2.4291e-01,
      

In [None]:
for i in range(15, 30):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=run_name)#,callback=MiniMatrixCallback())
    model.save(f"{models_dir}/{run_name}-{TIMESTEPS * i}")