# Runtime Comparison - Blackjack

Compare the run time of different models: Q table, MLP, NDNF-MT and logic-based
programs.

Each model is run 100 episodes.

In [1]:
from datetime import datetime
from pathlib import Path
import sys

sys.path.append("..")

In [2]:
import gymnasium as gym
import numpy as np
import torch

In [3]:
from blackjack_common import get_target_policy, construct_model, blackjack_env_preprocess_obss
from eval.blackjack_ppo_rl_eval_common import eval_on_environments
from eval.blackjack_problog_rules_inference import blackjack_problog_context_gen_fn
from eval.problog_inference_common import prolog_inference_in_env_single_run

In [4]:
DEVICE = torch.device("cpu")
NUM_EPISODES = 10000

In [5]:
blackjack_env = gym.make("Blackjack-v1", render_mode=None)

# Q table

In [6]:
# Q table
target_policy = get_target_policy(Path("../results/BJ-TAB/BLACKJACK-TAB-snb/blackjack_tab_snb.csv"))

start_time = datetime.now().timestamp()

for _ in range(NUM_EPISODES):
    obs, _ = blackjack_env.reset()
    terminated, truncated = False, False
    while not terminated and not truncated:
        action = target_policy[obs]
        obs, _, terminated, truncated, _ = blackjack_env.step(action)

end_time = datetime.now().timestamp()

print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 0.41561198234558105
Avg time per episode: 4.1561198234558106e-05


## MLP

In [7]:
# MLP
mlp_model = construct_model(
    num_latent=64,
    use_ndnf=False,
    use_decode_obs=True,
    use_eo=False,
    use_mt=True,
    share_layer_with_critic=False,
)
mlp_model.to(DEVICE)
sd = torch.load(
    "../blackjack_ppo_storage/blackjack_ppo_mlp_al64_3e5_3802/model.pth",
    map_location=DEVICE,
)
mlp_model.load_state_dict(sd)
mlp_model.eval()


  sd = torch.load(


BlackjackMLPAgent(
  (actor): Sequential(
    (0): Linear(in_features=44, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=2, bias=True)
  )
  (critic): Sequential(
    (0): Linear(in_features=44, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [8]:
# Non-parallel

start_time = datetime.now().timestamp()

for _ in range(NUM_EPISODES):
    obs, _ = blackjack_env.reset()
    terminated, truncated = False, False
    while not terminated and not truncated:
        with torch.no_grad():
            obs_tensor = blackjack_env_preprocess_obss(
                obs_tuple=(
                    np.array([obs[0]]),
                    np.array([obs[1]]),
                    np.array([obs[2]]),
                ),
                use_ndnf=False,
                device=DEVICE,
            )
        action = mlp_model.get_actions(obs_tensor, use_argmax=False)[0]
        obs, _, terminated, truncated, _ = blackjack_env.step(action)

end_time = datetime.now().timestamp()
print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 2.2830190658569336
Avg time per episode: 0.00022830190658569335


In [9]:
# Parallel
start_time = datetime.now().timestamp()
eval_on_environments(
    model=mlp_model,
    device=DEVICE,
    use_argmax=False,
    eval_num_runs=NUM_EPISODES,
)
end_time = datetime.now().timestamp()

print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 1.1566901206970215
Avg time per episode: 0.00011566901206970215


# NDNF-MT

In [10]:
# NDNF-MT
ndnf_mt_model = construct_model(
    num_latent=64,
    use_ndnf=True,
    use_decode_obs=True,
    use_eo=False,
    use_mt=True,
    share_layer_with_critic=False,
)
ndnf_mt_model.to(DEVICE)
sd = torch.load(
    "../blackjack_ppo_storage/blackjack_ppo_ndnf_mt_al64_3e5_aux10_7142/model_soft_2nd_mr_pruned.pth",
    map_location=DEVICE,
)
ndnf_mt_model.load_state_dict(sd)
ndnf_mt_model.eval()


  sd = torch.load(


BlackjackNDNFMutexTanhAgent(
  (actor): NeuralDNFMutexTanh(
    (conjunctions): SemiSymbolic(in_features=44, out_features=64, layer_type=SemiSymbolicLayerType.CONJUNCTION,current_delta=1.00)
    (disjunctions): SemiSymbolicMutexTanh(in_features=64, out_features=2, layer_type=SemiSymbolicLayerType.DISJUNCTION,current_delta=1.00)
  )
  (critic): Sequential(
    (0): Linear(in_features=44, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [11]:
# Non-parallel

start_time = datetime.now().timestamp()

for _ in range(NUM_EPISODES):
    obs, _ = blackjack_env.reset()
    terminated, truncated = False, False
    while not terminated and not truncated:
        with torch.no_grad():
            obs_tensor = blackjack_env_preprocess_obss(
                obs_tuple=(
                    np.array([obs[0]]),
                    np.array([obs[1]]),
                    np.array([obs[2]]),
                ),
                use_ndnf=False,
                device=DEVICE,
            )
        action = ndnf_mt_model.get_actions(obs_tensor, use_argmax=False)[0][0]
        obs, _, terminated, truncated, _ = blackjack_env.step(action)

end_time = datetime.now().timestamp()
print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 5.563965082168579
Avg time per episode: 0.0005563965082168579


In [12]:
# Parallel
start_time = datetime.now().timestamp()
eval_on_environments(
    model=ndnf_mt_model,
    device=DEVICE,
    use_argmax=False,
    eval_num_runs=NUM_EPISODES,
)
end_time = datetime.now().timestamp()

print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / NUM_EPISODES}")

Time taken: 1.573606014251709
Avg time per episode: 0.0001573606014251709


# ProbLog

In [13]:
# ProbLog

with open("../blackjack_ppo_storage/blackjack_ppo_ndnf_mt_al64_3e5_aux10_1681/problog_rules.pl", "r") as f:
    problog_rules = f.readlines()
problog_rules = [r.strip() for r in problog_rules]

In [14]:
start_time = datetime.now().timestamp()

for _ in range(10):
    prolog_inference_in_env_single_run(
        env=blackjack_env,
        problog_rules=problog_rules,
        num_actions=2,
        context_problog_gen_fn=blackjack_problog_context_gen_fn,
        use_argmax=False,
    )

end_time = datetime.now().timestamp()

print(f"Time taken: {end_time - start_time}")
print(f"Avg time per episode: {(end_time - start_time) / 10}")

Time taken: 21.583281993865967
Avg time per episode: 2.1583281993865966
