In [1]:
import sys
import time

from stable_baselines3 import PPO
from stable_baselines3.common.utils import set_random_seed

sys.path.append("../..")
from thesis.envs.minimatrix import MiniMatrix
from thesis.policies.agv_routing import AgvRoutingFE

seed = 42
set_random_seed(seed)

In [2]:
models_dir = "../../models/MiniMatrix_Routing"
logdir = "../../logs/MiniMatrix_Routing"
fleetsize = 5
max_fleetsize = 5
run_name = f"PPO-{fleetsize}-{max_fleetsize}-{time.strftime('%d_%m-%H_%M_%S')}-{seed}"

In [3]:
env = MiniMatrix(
    "../../envs/MiniMatrix.zip", 
    max_seconds = 60*60, 
    fleetsize = fleetsize, 
    max_fleetsize=max_fleetsize, 
    config_args = dict(
        reward_target = 10, 
        reward_block = -1, 
        dispatchinginterval=120,
        reward_distance = 0.01,
        withCollisions = False
    )
)

  warn(f"Unzipping to temporary directory ({tmp_dir})")


In [None]:
model =PPO(
    "MlpPolicy", 
    env, 
    tensorboard_log= logdir,
    device = "cuda",
    policy_kwargs=dict(
        net_arch = [dict(pi = [], vf = [])],
        features_extractor_class=AgvRoutingFE, 
        features_extractor_kwargs=dict(max_fleetsize=max_fleetsize, with_transformer = True),
        ),
    learning_rate=1e-3
    )


In [8]:
# SAVE FE
model.policy.features_extractor._save(f"{models_dir}/FE/{run_name}")

In [12]:
# LOAD FE
model.policy.features_extractor._load(f"{models_dir}/FE/PPO-1-1-18_07-19_46_23-42")

In [None]:
# LOAD
model = PPO.load(f"{models_dir}/PPO-1-1-07_11_19_19_02-42-200000.zip", env =env)

In [5]:
# TRAIN ONLY FE
import torch
def net_arch_to_eye(model):
    def init_weights(m):
        if isinstance(m, torch.nn.Linear):
            torch.nn.init.eye_(m.weight)
            m.bias.data.fill_(0.0)
            for param in m.parameters():
                param.requires_grad = False

    model.action_net.apply(init_weights)
    model.mlp_extractor.apply(init_weights)
net_arch_to_eye(model.policy)

In [None]:
TIMESTEPS = 100000
for i in range(1, 5):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=run_name)
    model.save(f"{models_dir}/{run_name}-{TIMESTEPS * i}")