# Runtime Comparison - Door Corridor

Compare the run time of different models: Q table, MLP, NDNF-MT and logic-based
programs.

Each model is run 100 episodes.


In [1]:
from datetime import datetime
import sys

sys.path.append("..")

In [2]:
import gymnasium as gym
from hydra import compose, initialize
import torch

In [3]:
from corridor_grid.envs import DoorCorridorEnv

from door_corridor_ppo import construct_model, make_env
from eval.door_corridor_asp_rules_inference import rule_eval
from eval.door_corridor_ppo_multirun_rl_performance_eval import simulate

In [4]:
DEVICE = torch.device("cpu")
NUM_EPISODES = 10000
NUM_PROCESSES = 8

In [5]:
dc_env = DoorCorridorEnv(render_mode=None)
envs = gym.vector.SyncVectorEnv(
    [make_env(i, i, False) for i in range(NUM_PROCESSES)]
)

## MLP


In [6]:
# MLP
with initialize(version_base=None, config_path="../conf"):
    cfg = compose(
        config_name="config",
        overrides=["+eval=door_corridor_ppo_mlp_multirun_eval"],
    )

mlp_model = construct_model(
    cfg=cfg["eval"],
    num_actions=dc_env.get_num_actions(),
    use_ndnf=False,
    img_obs_space=dc_env.observation_space["image"],  # type: ignore
)
mlp_model.to(DEVICE)
sd = torch.load(
    "../dc_ppo_storage/dc5_ppo_mlp_k1eoc4_tanh_exl16_3e5_2457/model.pth",
    map_location=DEVICE,
)
mlp_model.load_state_dict(sd)
mlp_model.eval()

  sd = torch.load(


DCPPOMLPAgent(
  (image_encoder): Sequential(
    (0): Conv2d(2, 4, kernel_size=(1, 1), stride=(1, 1))
    (1): Tanh()
  )
  (extra_layer): Sequential(
    (0): Linear(in_features=36, out_features=16, bias=True)
    (1): Tanh()
  )
  (actor): Sequential(
    (0): Linear(in_features=16, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=4, bias=True)
  )
  (critic): Sequential(
    (0): Linear(in_features=16, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [7]:
# Non-parallel

start_time = datetime.now().timestamp()

for _ in range(NUM_EPISODES):
    obs, _ = dc_env.reset()
    terminated, truncated = False, False
    while not terminated and not truncated:
        with torch.no_grad():
            action = mlp_model.get_actions(
                preprocessed_obs={
                    "image": torch.tensor(obs["image"].copy(), device=DEVICE)
                    .unsqueeze(0)
                    .float()
                },
                use_argmax=True,
            )[0]
        obs, _, terminated, truncated, _ = dc_env.step(action)

end_time = datetime.now().timestamp()
print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 10.17816710472107
Avg time per episode: 0.0010178167104721069


In [8]:
# Parallel
start_time = datetime.now().timestamp()
simulate(envs, mlp_model)
end_time = datetime.now().timestamp()

print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 0.07258200645446777
Avg time per episode: 7.258200645446778e-06


# NDNF-MT


In [9]:
# NDNF-MT
with initialize(version_base=None, config_path="../conf"):
    cfg = compose(
        config_name="config",
        overrides=["+eval=door_corridor_ppo_ndnf_mt_multirun_eval"],
    )
ndnf_mt_model = construct_model(
    cfg=cfg["eval"],
    num_actions=dc_env.get_num_actions(),
    use_ndnf=True,
    img_obs_space=dc_env.observation_space["image"],  # type: ignore
)
ndnf_mt_model.to(DEVICE)
sd = torch.load(
    "../dc_ppo_storage/dc5_ppo_ndnf_mt_k1eoc4_tanh_exl16_3e5_aux_6731/model_2nd_mr_pruned.pth",
    map_location=DEVICE,
)
ndnf_mt_model.load_state_dict(sd)
ndnf_mt_model.eval()

  sd = torch.load(


DCPPONDNFMutexTanhAgent(
  (image_encoder): Sequential(
    (0): Conv2d(2, 4, kernel_size=(1, 1), stride=(1, 1))
    (1): Tanh()
  )
  (extra_layer): Sequential(
    (0): Linear(in_features=36, out_features=16, bias=True)
    (1): Tanh()
  )
  (actor): NeuralDNFMutexTanh(
    (conjunctions): SemiSymbolic(in_features=16, out_features=12, layer_type=SemiSymbolicLayerType.CONJUNCTION,current_delta=1.00)
    (disjunctions): SemiSymbolicMutexTanh(in_features=12, out_features=4, layer_type=SemiSymbolicLayerType.DISJUNCTION,current_delta=1.00)
  )
  (critic): Sequential(
    (0): Linear(in_features=16, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [10]:
# Non-parallel

start_time = datetime.now().timestamp()

for _ in range(NUM_EPISODES):
    obs, _ = dc_env.reset()
    terminated, truncated = False, False
    while not terminated and not truncated:
        with torch.no_grad():
            action = ndnf_mt_model.get_actions(
                preprocessed_obs={
                    "image": torch.tensor(obs["image"].copy(), device=DEVICE)
                    .unsqueeze(0)
                    .float()
                },
                use_argmax=True,
            )[0][0]
        obs, _, terminated, truncated, _ = dc_env.step(action)


end_time = datetime.now().timestamp()
print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 36.72632598876953
Avg time per episode: 0.003672632598876953


In [11]:
# Parallel
start_time = datetime.now().timestamp()
simulate(envs, ndnf_mt_model)
end_time = datetime.now().timestamp()

print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 0.11115288734436035
Avg time per episode: 1.1115288734436035e-05


# ASP


In [12]:
# ASP

with open(
    "../dc_ppo_storage/dc5_ppo_ndnf_mt_k1eoc4_tanh_exl16_3e5_aux_6731/asp_rules.lp",
    "r",
) as f:
    asp_rules = f.readlines()
asp_rules = [r.strip() for r in asp_rules]

In [14]:
N_ASP_EVAL_EPISODES = 1000

start_time = datetime.now().timestamp()

for _ in range(N_ASP_EVAL_EPISODES):
    rule_eval(ndnf_mt_model, asp_rules)  # type: ignore

end_time = datetime.now().timestamp()

print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / N_ASP_EVAL_EPISODES}")

Time taken: 25.296003103256226
Avg time per episode: 0.025296003103256224
