In [25]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [31]:
%%cython -a
cimport numpy as cnp
import numpy as np
import cython

NUM_SIDES = 6
NUM_ACTIONS = NUM_SIDES * 2 + 1
DUDO = NUM_ACTIONS - 1


class Node:
    def __init__(self, info_set):
        self.info_set = info_set
        self.regret_sum = np.zeros(NUM_ACTIONS, dtype=np.float32)
        self.strategy = np.zeros(NUM_ACTIONS, dtype=np.float32)
        self.strategy_sum = np.zeros(NUM_ACTIONS, dtype=np.float32)

    def get_strategy(self, realization_weight: cython.float):
        self.strategy = np.maximum(self.regret_sum, 0)
        normalizing_sum = np.sum(self.strategy)
        if normalizing_sum > 0:
            self.strategy /= normalizing_sum
        else:
            self.strategy = np.ones(NUM_ACTIONS) / NUM_ACTIONS
        self.strategy_sum += realization_weight * self.strategy
        return self.strategy

    def get_avg_strategy(self):
        avg_strategy = [0] * NUM_ACTIONS
        normalizing_sum = 0
        for i in range(NUM_ACTIONS):
            normalizing_sum += self.strategy_sum[i]
        for i in range(NUM_ACTIONS):
            if normalizing_sum > 0:
                avg_strategy[i] = self.strategy_sum[i] / normalizing_sum
            else:
                avg_strategy[i] = 1 / NUM_ACTIONS
        return avg_strategy

    def __str__(self):
        return f"{self.info_set} {self.get_avg_strategy()}"



def action_num_to_str(action_num):
    if action_num == DUDO:
        return "DUDO"
    count, face = action_num_to_ints(action_num)
    return f"{count}x{face}"


def action_num_to_ints(action_num):
    return action_num // NUM_SIDES + 1, (action_num + 1) % NUM_SIDES + 1


class History:
    def __init__(self):
        self.history = [False] * (NUM_SIDES * 2 + 1)

    def active_player(self):
        return self.history.count(False) % 2

    def __str__(self):
        return ",".join(action_num_to_str(i) for i, v in enumerate(self.history) if v)
    
    def __repr__(self) -> str:
        return self.__str__()

    def __hash__(self):
        return hash(str(self.history))

    def next_history(self, action):
        new_history = History()
        new_history.history = self.history.copy()
        new_history.history[action] = True
        return new_history

    def is_terminal(self):
        return self.history[-1]


test_map = {
    0: "1x2",
    1: "1x3",
    2: "1x4",
    3: "1x5",
    4: "1x6",
    5: "1x1",
    6: "2x2",
    7: "2x3",
    NUM_ACTIONS - 2: "2x1",
    NUM_ACTIONS - 1: "DUDO",
}

for k, v in test_map.items():
    assert action_num_to_str(k) == v, (k, v, action_num_to_str(k))


import random
import tqdm

node_map = {}


def train(iterations):
    util = 0.0

    for _ in tqdm.tqdm(range(iterations)):
        dice = [random.randint(1, NUM_SIDES), random.randint(1, NUM_SIDES)]
        history = History()
        util += cfr(dice, history, 1, 1)

    print(f"Average game value: {util / iterations}")
    for info_set in sorted(node_map, key=lambda x: (x[0], str(x[1]))):
        print(f"{info_set}: {node_map[info_set]}")
    return util / iterations


def get_last_bid_action(history):
    if not any(history.history):
        return -1
    return max(i for i, v in enumerate(history.history[:-1]) if v)

def payoff(dice, history):
    last_bid_action = get_last_bid_action(history)
    count, face = action_num_to_ints(last_bid_action)
    if count == 1:
        if dice[0] in {face, 1} or dice[1] in {face, 1}:
            return 1
        return -1
    # count is 2
    if dice[0] in {face, 1} and dice[1] in {face, 1}:
        return 1
    return -1


def cfr(dice, history, p0, p1):
    active_player = history.active_player()

    if history.is_terminal():
        return payoff(dice, history)

    info_set = (dice[active_player], history)
    if info_set not in node_map:
        node_map[info_set] = Node(info_set)
    node = node_map[info_set]

    strategy = node.get_strategy(p0 if active_player == 0 else p1)
    util = [0] * NUM_ACTIONS
    node_util = 0

    last_bid_action = get_last_bid_action(history)
    for a in range(last_bid_action + 1, NUM_ACTIONS):
        if last_bid_action == -1 and a == DUDO:
            continue
        next_history = history.next_history(a)

        if active_player == 0:
            util[a] = -cfr(dice, next_history, p0 * strategy[a], p1)
        else:
            util[a] = -cfr(dice, next_history, p0, p1 * strategy[a])

        node_util += strategy[a] * util[a]

    for a in range(NUM_ACTIONS):
        regret = util[a] - node_util
        node.regret_sum[a] += (p1 if active_player == 0 else p0) * regret

    return node_util

Content of stderr:
In file included from /mnt/data/cfr-neller-paper/.env/lib/python3.12/site-packages/numpy/_core/include/numpy/ndarraytypes.h:1909,
                 from /mnt/data/cfr-neller-paper/.env/lib/python3.12/site-packages/numpy/_core/include/numpy/ndarrayobject.h:12,
                 from /mnt/data/cfr-neller-paper/.env/lib/python3.12/site-packages/numpy/_core/include/numpy/arrayobject.h:5,
                 from /home/ngarvey/.cache/ipython/cython/_cython_magic_54dc16b2f0ed5cc48f6239e8d81fa5b127b7a3e3.c:1255:
      |  ^~~~~~~

In [30]:
import fractions
fractions.Fraction(train(5000)).limit_denominator(50)

 17%|█▋        | 854/5000 [04:56<23:59,  2.88it/s]  


KeyboardInterrupt: 