In [3]:
from __future__ import annotations

from typing import ClassVar
from dataclasses import dataclass
import random
import enum

from zerosum.game import Player, Game


class Action(enum.IntEnum):
    DRAW1 = 1
    DRAW2 = 2
    BET = 4
    CHECK = 5
    CALL = 6
    FOLD = 7


Card = int


@dataclass(slots=True, frozen=True)
class InfoSet:
    card: Card | None
    history: tuple[Action, ...]

    def actions(self) -> tuple[Action, ...]:
        if len(self.history) == 0:
            return (Action.CHECK, Action.BET)
        elif len(self.history) == 1:
            last = self.history[-1]
            if last == Action.BET:
                return (Action.CALL, Action.FOLD)
            elif last == Action.CHECK:
                return (Action.CHECK,)
        raise ValueError


@dataclass(slots=True, frozen=True)
class Bluff:
    players: ClassVar[int] = 2

    history: tuple[Action, ...] = ()

    @classmethod
    def default(cls):
        return cls()

    @property
    def terminal(self):
        return len(self.history) == 3

    def payoff(self, player: Player):
        value = 1
        if Action.BET in self.history and Action.CALL in self.history:
            value = 2

        if Action.FOLD in self.history:
            return value if 0 == player else -value

        if self.history[0] == Action.DRAW1:
            value = -value

        if player == 1:
            value = -value

        return value

    @property
    def chance(self):
        return len(self.history) == 0

    def chances(self) -> dict[Action, float]:
        chances = {Action.DRAW1: 2 / 3, Action.DRAW2: 1 / 3}
        return chances

    def sample(self) -> Action:
        return random.choice(list(self.chances().keys()))

    @property
    def active(self) -> Player:
        if len(self.history) <= 1:
            return 0
        return 1

    def infoset(self, player: Player) -> InfoSet:
        return InfoSet(
            card=self.history[0] if player == 0 else None,
            history=self.history[1:],
        )

    def apply(self, action: Action):
        return self.__class__(self.history + (action,))

In [4]:
import zerosum as zs

In [32]:
algo = zs.Algorithm(zs.ESLCFR(1000), Bluff)

In [35]:
for _ in range(100000):
    algo.once()

In [34]:
algo.impl.strategies

{InfoSet(card=<Action.DRAW2: 2>, history=()): {<Action.CHECK: 5>: 0.0,
  <Action.BET: 4>: 2547.7570093457944},
 InfoSet(card=None, history=(<Action.CHECK: 5>,)): {<Action.CHECK: 5>: 5001.149532710281},
 InfoSet(card=None, history=(<Action.BET: 4>,)): {<Action.CALL: 6>: 3374.860472539911,
  <Action.FOLD: 7>: 1626.2890601703557},
 InfoSet(card=<Action.DRAW1: 1>, history=()): {<Action.CHECK: 5>: 1594.8886924208896,
  <Action.BET: 4>: 858.5038309435939}}

In [1]:
from zerosum.normal import zerosum
import zerosum as zs

In [2]:
game = zerosum((
    (-2 / 3, 1),
    (0, -1 / 3),
))

In [9]:
algo = zs.Algorithm(zs.ESCFR(), game)

In [12]:
for _ in range(50000):
    algo.once()

In [13]:
algo.impl.strategies

{InfoSet(player=0, n=2): {0: 5022.254280359624, 1: 25017.7457196403},
 InfoSet(player=1, n=2): {0: 40386.74249959822, 1: 19693.257500401713}}