In [28]:
from __future__ import annotations

from dataclasses import dataclass
import enum


class Event(enum.IntEnum):
    DRAW1 = 1
    DRAW2 = 2
    DRAW3 = 3
    BET = 4
    CHECK = 5
    CALL = 6
    FOLD = 7


Player = int
Card = int


@dataclass(slots=True, frozen=True)
class InfoSet:
    card: Card
    history: tuple[Event, ...]

    def actions(self):
        if len(self.history) == 0:
            return (Event.CHECK, Event.BET, Event.FOLD)
        elif len(self.history) == 1:
            last = self.history[-1]
            if last == Event.BET:
                return (Event.CALL, Event.FOLD)
            elif last == Event.CHECK:
                return (Event.BET, Event.CHECK)
            raise ValueError
        else:
            assert len(self.history) == 2 and self.history == (Event.CHECK, Event.BET), self.history
            return (Event.CALL, Event.FOLD)


@dataclass(slots=True, frozen=True)
class OCP:
    history: tuple[Event, ...] = ()

    @property
    def terminal(self):
        if not self.history:
            return False
        
        last = self.history[-1]
        if last == Event.FOLD:
            return True
        elif last == Event.CALL:
            return len(self.history) >= 4
        elif last == Event.CHECK:
            return len(self.history) >= 4

        return False
    
    def payoff(self, player: Player):
        value = 1
        if Event.BET in self.history:
            value = 2
        
        if self.history[0] < self.history[1]:
            value = -value
        
        if player == 1:
            value = -value
        
        return value

    @property
    def chance(self):
        return len(self.history) <= 1
    
    def chances(self) -> dict[Event, float]:
        chances = {Event.DRAW1: 1 / 3, Event.DRAW2: 1 / 3, Event.DRAW3: 1 / 3}
        if not self.history:
            return chances
        
        chances.pop(self.history[0])
        for k, v in chances.items():
            chances[k] = 1 / 2
        
        return chances

    @property
    def active(self) -> Player:
        return len(self.history) % 2

    def infoset(self, player: Player) -> InfoSet:
        return InfoSet(
            card=int(self.history[player]),
            history=self.history[2:],
        )
    
    def apply(self, event: Event) -> OCP:
        return OCP(self.history + (event,))

In [32]:
def matching(regrets):
    regrets = [max(0, r) for r in regrets]
    denom = sum(regrets)

    if denom > 0:
        return [r / denom for r in regrets]

    return [1 / len(regrets)] * len(regrets)


def walk(game, player, p0, p1, regrets, strategies):
    if game.terminal:
        return game.payoff(player)

    if game.chance:
        value = 0
        for action, p in game.chances().items():
            p0p = p0 if player == 0 else p0 * p
            p1p = p1 if player == 1 else p1 * p
            value += p * walk(game.apply(action), player, p0p, p1p, regrets, strategies)
        return value

    infoset = game.infoset(player)
    actions = infoset.actions()

    if infoset not in regrets:
        regrets[infoset] = {action: 0 for action in actions}
    if infoset not in strategies:
        strategies[infoset] = {action: 0 for action in actions}

    R = regrets[infoset]
    S = strategies[infoset]

    strategy = matching(R)
    cfs = {action: 0 for action in actions}
    value = 0

    for action, p in zip(actions, strategy):
        p0p = p0 * p if game.active == 0 else p0
        p1p = p1 * p if game.active == 1 else p1
        cf = walk(game.apply(action), player, p0p, p1p, regrets, strategies)

        cfs[action] = cf
        value += p * cf

    pi, pmi = p0, p1
    if game.active == 1:
        pi, pmi = p1, p0

    if game.active == player:
        for action, p in zip(actions, strategy):
            R[action] += pmi * (cfs[action] - value)
            S[action] += pi * p

    return value

In [35]:
from collections import defaultdict


regrets = defaultdict(lambda: defaultdict(int))
strategies = defaultdict(lambda: defaultdict(int))

for _ in range(10000):
    for player in range(2):
        walk(OCP(), player, 1, 1, regrets, strategies)

In [36]:
strategies

defaultdict(<function __main__.<lambda>()>,
            {InfoSet(card=1, history=()): {<Event.CHECK: 5>: 6250.0,
              <Event.BET: 4>: 5000.0,
              <Event.FOLD: 7>: 8750.0},
             InfoSet(card=1, history=(<Event.CHECK: 5>,)): {<Event.BET: 4>: 8888.888888888552,
              <Event.CHECK: 5>: 11111.111111108146},
             InfoSet(card=1, history=(<Event.CHECK: 5>, <Event.BET: 4>)): {<Event.CALL: 6>: 2884.6153846158536,
              <Event.FOLD: 7>: 3365.3846153837135},
             InfoSet(card=1, history=(<Event.BET: 4>,)): {<Event.CALL: 6>: 9230.769230768068,
              <Event.FOLD: 7>: 10769.230769230167},
             InfoSet(card=2, history=()): {<Event.CHECK: 5>: 6250.0,
              <Event.BET: 4>: 5000.0,
              <Event.FOLD: 7>: 8750.0},
             InfoSet(card=2, history=(<Event.CHECK: 5>,)): {<Event.BET: 4>: 8888.888888888552,
              <Event.CHECK: 5>: 11111.111111108146},
             InfoSet(card=2, history=(<Event.CHECK: 5>, 