# Continuous Blackjack Experiment

Run with:
`/Users/muzhao/Documents/Workspace/Python/Continuous-Blackjack/.venv/bin/python`

In [None]:
from pathlib import Path
import re

import pandas as pd

from continuous_blackjack.core import ContinuousBlackjackGame
from continuous_blackjack.strategies import (
    AdaptiveNashEquilibriumStrategy,
    AdaptiveStrategy,
    EpsilonGreedyBanditStrategy,
    NaiveStrategy,
    NashEquilibriumStrategy,
    PolicyGradientBanditStrategy,
    StatisticalStrategy,
    UCBBanditStrategy,
    UniformStrategy,
    ZeroIntelligenceStrategy,
)

try:
    from continuous_blackjack.rl import ActorCriticStrategy, DQNStrategy
    HAS_RL = True
except Exception:
    HAS_RL = False


def _safe_name(name):
    return re.sub(r"[^a-zA-Z0-9._-]+", "_", name).strip("_")


def _checkpoint_path(model_dir, player_id, strategy_name):
    model_dir = Path(model_dir)
    return model_dir / f"player_{player_id:02d}_{_safe_name(strategy_name)}.pt"


def maybe_load_models(game, model_dir):
    if not model_dir:
        return 0
    loaded = 0
    for player_id, strategy in enumerate(game.players):
        if not hasattr(strategy, "load_checkpoint"):
            continue
        path = _checkpoint_path(model_dir, player_id, strategy.name)
        if path.exists():
            strategy.load_checkpoint(path)
            loaded += 1
    return loaded


def maybe_save_models(game, model_dir):
    if not model_dir:
        return 0
    saved = 0
    for player_id, strategy in enumerate(game.players):
        if not hasattr(strategy, "save_checkpoint"):
            continue
        path = _checkpoint_path(model_dir, player_id, strategy.name)
        strategy.save_checkpoint(path)
        saved += 1
    return saved


def build_strategy_sets():
    ne = NashEquilibriumStrategy()
    zi = ZeroIntelligenceStrategy()
    naive = NaiveStrategy()
    adaptive_ne = AdaptiveNashEquilibriumStrategy()
    uninformed = UniformStrategy("uninformed")
    informed = UniformStrategy("informed")
    adaptive = AdaptiveStrategy()
    statistical = StatisticalStrategy()
    ucb = UCBBanditStrategy(confidence_level=3)
    greedy = EpsilonGreedyBanditStrategy(
        exploration_rate=0.1,
        exploration_decay=0.99,
        exploration_decay_rounds=10_000,
    )
    policy_gradient = PolicyGradientBanditStrategy(baseline=1.0, learning_rate=0.01)

    strategy_sets = [
        [ne, naive, uninformed, informed, statistical],
        [ne, uninformed, uninformed, ne, adaptive_ne],
        [ne, uninformed, informed, ne, greedy],
        [ne, zi, zi, ne, adaptive_ne],
        [ne, zi, naive, uninformed, ucb],
        [ne, uninformed, informed, naive, policy_gradient],
        [ne, uninformed, zi, greedy, adaptive],
    ]

    if HAS_RL:
        dqn = DQNStrategy(action_bins=1024, lr=0.001)
        actor_critic = ActorCriticStrategy(
            action_bins=1024,
            actor_lr=0.001,
            critic_lr=0.001,
        )
        strategy_sets.extend([
            [ne, uninformed, zi, ucb, dqn],
            [ne, uninformed, zi, greedy, actor_critic],
            [ne, zi, zi, zi, actor_critic],
            [ne, informed, ucb, statistical, dqn],
        ])

    return strategy_sets


In [None]:
# Parameters
blocks = 2
rounds_per_block = 2000
enable_log = True
load_model_dir = None  # e.g. "/private/tmp/cb_ckpt"
save_model_dir = None  # e.g. "/private/tmp/cb_ckpt"

strategy_sets = build_strategy_sets()
actor_critic_index = next(
    (
        i
        for i, strategies in enumerate(strategy_sets)
        if any(s.__class__.__name__ == "ActorCriticStrategy" for s in strategies)
    ),
    None,
)
set_index = actor_critic_index if actor_critic_index is not None else 1
print(f"Using set_index={set_index} (Actor-Critic ready={actor_critic_index is not None})")

game = ContinuousBlackjackGame(strategy_sets[set_index])
loaded = maybe_load_models(game, load_model_dir)
if load_model_dir:
    print(f"loaded checkpoints: {loaded} from {load_model_dir}")

game.run(num_blocks=blocks, rounds_per_block=rounds_per_block, log=enable_log)

saved = maybe_save_models(game, save_model_dir)
if save_model_dir:
    print(f"saved checkpoints: {saved} to {save_model_dir}")

summary = game.summary()


In [None]:
# Final results: single dataframe
player_df = pd.DataFrame(summary["player_summary"]).set_index("label")
player_df.index.name = "Player"
player_df = player_df.rename(
    columns={
        "player_id": "Player ID",
        "strategy": "Strategy",
        "wins": "Wins",
        "win_rate": "Win Rate",
    }
)

position_mix_df = pd.DataFrame(
    summary["position_given_win"],
    index=summary["player_labels"],
    columns=[
        f"Win Share from Position {i + 1}"
        for i in range(len(summary["player_labels"]))
    ],
)
position_mix_df.index.name = "Player"

final_results_df = player_df.join(position_mix_df, how="left")
final_results_df["Win Rate (%)"] = final_results_df["Win Rate"] * 100
final_results_df = final_results_df.sort_values(["Wins", "Win Rate"], ascending=False)
final_results_df.insert(0, "Rank", range(1, len(final_results_df) + 1))
final_results_df = final_results_df.drop(columns=["Win Rate"])
final_results_df
