In [None]:
import os
from typing import *

from game.api import BlackjackWrapper
from game.models.model import *
from training.agent import BlackjackDQN

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
def evaluate_agent(
    game_wrapper: BlackjackWrapper,
    dqn_model: BlackjackDQN,
    num_eps: int,
    max_steps: int = 10,
    add_steps: bool = False,
    add_card_counting: bool = True,
) -> Tuple[float, float, float]:
    rewards: List[float] = []

    for i_eps in range(num_eps):
        game_wrapper = game_wrapper.reset()
        state = game_wrapper.get_state()
        eps_reward = 0.0
        for i_step in range(max_steps):
            step_num = i_step / max_steps if add_steps else None
            state_features = state.torch_flatten(
                device=device,
                include_discarded=add_card_counting,
                step_num=step_num
            )
            if i_step == 0:
                bet_percent, _, _ = dqn_model.get_bet_percent(
                    state_features, allow_explore=False, num_steps=0
                )
                outcome = game_wrapper.bet_step(bet_percent)
            else:
                card_action, _, _ = dqn_model.get_card_action(
                    state_features, allow_explore=False, num_steps=0
                )
                outcome = game_wrapper.card_step(take_card=card_action)
            state = outcome.new_state
            terminated = outcome.terminated
            eps_reward += outcome.reward
            if terminated:
                break
        rewards.append(eps_reward)

    mean_reward = np.mean(rewards).item()
    median_reward = np.median(rewards).item()
    std_reward = np.std(rewards).item()
    return mean_reward, median_reward, std_reward

In [None]:
proj_path = os.path.join(os.getcwd(), "..")
# proj_path = os.getcwd()
model_dir = os.path.join(proj_path, "models")

In [None]:
num_eps = 1000

initial_cash = 10000
deck_nums = 8
min_bet = 10

In [None]:
dqn_model_paths = [
    (os.path.join(model_dir, f), f) for f in os.listdir(model_dir)
    if os.path.isfile(os.path.join(model_dir, f)) and f.startswith("dqn") and not f.endswith(".txt")
]
eval_model = BlackjackDQN(
    in_features=GameState.get_state_size(),
    bet_choices=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    card_choices=[True, False],
    epsilon=0,
    min_epsilon=0,
)
eval_model_with_steps = BlackjackDQN(
    in_features=GameState.get_state_size() + 1,
    bet_choices=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    card_choices=[True, False],
    epsilon=0,
    min_epsilon=0,
).eval()


for model_path, model_name in dqn_model_paths:
    game_wrapper = BlackjackWrapper(
        initial_cash=initial_cash,
        deck_nums=deck_nums,
        min_bet=min_bet,
    )
    add_step = "steps" in model_name
    has_card_counting = "no_counting" not in model_name
    model = eval_model_with_steps if add_step else eval_model
    model.load_state_dict(torch.load(model_path, map_location=device))
    mean_reward, median_reward, std_reward = evaluate_agent(
        game_wrapper=game_wrapper,
        dqn_model=model,
        num_eps=1000,
        add_steps=add_step,
        add_card_counting=False,
    )
    print(
        f"{model_name} (without card counting): "
        f"{round(mean_reward, 3)} "
        f"+/- {round(std_reward, 3)}, "
        f"median: {round(median_reward, 3)}"
    )
    if has_card_counting:
        mean_reward, median_reward, std_reward = evaluate_agent(
            game_wrapper=game_wrapper,
            dqn_model=model,
            num_eps=1000,
            add_steps=add_step,
            add_card_counting=True,
        )
        print(
            f"{model_name} (with card counting): "
            f"{round(mean_reward, 3)} "
            f"+/- {round(std_reward, 3)}, "
            f"median: {round(median_reward, 3)}"
        )