In [1]:
import sys
import time
import json
from alpyne.client.alpyne_client import AlpyneClient
from stable_baselines3 import DQN
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.env_util import make_vec_env

sys.path.append("../..")
from thesis.envs.matrix_routing_multiagent import MatrixRoutingMA
from thesis.policies.routing_attention import RoutingFE_offPolicy
from thesis.policies.ppo_ac_attention import AttentionACPolicy
seed = 42
set_random_seed(seed)

In [2]:
models_dir = "../../models/MiniMatrix_Routing_MA"
logdir = "../../logs/MiniMatrix_Routing_MA"
fleetsize = 6
max_fleetsize = 10
run_name = f"DQN-{fleetsize}-{max_fleetsize}-{time.strftime('%d_%m-%H_%M_%S')}-{seed}"

In [3]:
env_args = dict(
        reward_target = 1, 
        reward_distance = 0.05,
        reward_block = -0.5, 
        dispatchinginterval=120,
        routinginterval = 2,
        withCollisions = False,
        blockTimeout = 5,
        includeNodesInReach = True,
    )

dqn_args = dict(
    #learning_rate = 3e-3,
    #buffer_size = 100000
)
fe_args = dict(
    max_fleetsize=max_fleetsize,
    embed_dim = 64,
    n_heads = 8,
    depth = 8
)
net_arch = dict(qf = [], pi = [])

hparams = dict(
    fleetsize = fleetsize,
    max_fleetsize = max_fleetsize,
    env_args = env_args,
    ppo_args = env_args,
    fe_args = fe_args,
    net_arch = net_arch
)
with open(f"{models_dir}/{run_name}.json", 'w') as outfile:
    json.dump(hparams, outfile, indent = 3)

In [4]:
i = [0]

client = AlpyneClient("../../envs/MiniMatrix.zip", port=51151)

env = make_vec_env(MatrixRoutingMA, 4, env_kwargs=dict(
    max_seconds = 60*60, 
    fleetsize = fleetsize, 
    max_fleetsize=max_fleetsize, 
    config_args = env_args,
    counter = i,
    client = client
))

  warn(f"Unzipping to temporary directory ({tmp_dir})")


In [5]:

model =DQN(
    "MlpPolicy",
    env, 
    tensorboard_log= logdir,
    device = "cuda",
    policy_kwargs=dict(
        #net_arch = net_arch,
        features_extractor_class=RoutingFE_offPolicy, 
        features_extractor_kwargs=fe_args
        ),
    optimize_memory_usage=True,
    **dqn_args,
    )

In [6]:
TIMESTEPS = 50000
for i in range(1, 15):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=run_name)#,callback=MiniMatrixCallback())
    model.save(f"{models_dir}/{run_name}-{TIMESTEPS * i}")