In [1]:
from typing import Dict

import numpy as np
import random

In [11]:
import numpy as np
import random

class Node:
	def __init__(self, num_actions):
		self.regret_sum = np.zeros(num_actions)
		self.strategy = np.zeros(num_actions)
		self.strategy_sum = np.zeros(num_actions)
		self.num_actions = num_actions

	def get_strategy(self):
		normalizing_sum = 0
		for a in range(self.num_actions):
			if self.regret_sum[a] > 0:
				self.strategy[a] = self.regret_sum[a]
			else:
				self.strategy[a] = 0
			normalizing_sum += self.strategy[a]

		for a in range(self.num_actions):
			if normalizing_sum > 0:
				self.strategy[a] /= normalizing_sum
			else:
				self.strategy[a] = 1.0/self.num_actions

		return self.strategy

	def get_average_strategy(self):
		avg_strategy = np.zeros(self.num_actions)
		normalizing_sum = 0
		
		for a in range(self.num_actions):
			normalizing_sum += self.strategy_sum[a]
		for a in range(self.num_actions):
			if normalizing_sum > 0:
				avg_strategy[a] = self.strategy_sum[a] / normalizing_sum
			else:
				avg_strategy[a] = 1.0 / self.num_actions
		
		return avg_strategy

class KuhnCFR:
	def __init__(self, iterations, decksize):
		self.nbets = 2
		self.iterations = iterations
		self.decksize = decksize
		self.cards = np.arange(decksize)
		self.bet_options = 2
		self.nodes = {}

	def cfr_iterations_external(self):
		util = np.zeros(2)
		for t in range(1, self.iterations + 1): 
			for i in range(2):
				random.shuffle(self.cards)
				util[i] += self.external_cfr(self.cards[:2], [], 2, 0, i, t)
				# print(i, util[i])
		print('Average game value: {}'.format(util[0]/(self.iterations)))
		for i in sorted(self.nodes):
			print(i, self.nodes[i].get_average_strategy())

	def external_cfr(self, cards, history, pot, nodes_touched, traversing_player, t):
		# print('THIS IS ITERATION', t)
		# print(cards, history, pot)
		plays = len(history)
		acting_player = plays % 2
		opponent_player = 1 - acting_player

        # Is terminal? Return payoffs.
		if plays >= 2:
			if history[-1] == 0 and history[-2] == 1: #bet fold
				if acting_player == traversing_player:
					return 1
				else:
					return -1
			if (history[-1] == 0 and history[-2] == 0) or (history[-1] == 1 and history[-2] == 1): # Check-Check or Bet-Call, go to showdown
				if acting_player == traversing_player:
					if cards[acting_player] > cards[opponent_player]:
						return pot/2 #profit
					else:
						return -pot/2
				else:
					if cards[acting_player] > cards[opponent_player]:
						return -pot/2
					else:
						return pot/2

		infoset = str(cards[acting_player]) + str(history)
		if infoset not in self.nodes:
			self.nodes[infoset] = Node(self.bet_options)

		nodes_touched += 1

		if acting_player == traversing_player:
			util = np.zeros(self.bet_options) #2 actions
			node_util = 0
			strategy = self.nodes[infoset].get_strategy()
			for a in range(self.bet_options):
				next_history = history + [a]
				pot += a
				util[a] = self.external_cfr(cards, next_history, pot, nodes_touched, traversing_player, t)
				node_util += strategy[a] * util[a]

			for a in range(self.bet_options):
				regret = util[a] - node_util
				self.nodes[infoset].regret_sum[a] += regret
			return node_util

		else: #acting_player != traversing_player
			strategy = self.nodes[infoset].get_strategy()
			util = 0
			if random.random() < strategy[0]:
				next_history = history + [0]
			else: 
				next_history = history + [1]
				pot += 1
			util = self.external_cfr(cards, next_history, pot, nodes_touched, traversing_player, t)
			for a in range(self.bet_options):
				self.nodes[infoset].strategy_sum[a] += strategy[a]
			return util

k = KuhnCFR(100000, 3)
k.cfr_iterations_external()

Average game value: -0.058046546251819646
0[0, 1] [9.99981252e-01 1.87476565e-05]
0[0] [0.66707662 0.33292338]
0[1] [9.99954983e-01 4.50166562e-05]
0[] [0.7981129 0.2018871]
1[0, 1] [0.45458968 0.54541032]
1[0] [9.99846427e-01 1.53572964e-04]
1[1] [0.66196807 0.33803193]
1[] [9.99567149e-01 4.32851393e-04]
2[0, 1] [1.28865979e-04 9.99871134e-01]
2[0] [2.99886043e-05 9.99970011e-01]
2[1] [1.49943022e-05 9.99985006e-01]
2[] [0.35010448 0.64989552]


In [2]:
CHECK = 0
BET = 1
NUM_ACTIONS = 2

def sample_action(strategy):
    return 0 if random.random() < strategy[0] else 1

class Node(object):
    """Store the current game state.
    """

    def __init__(self, pot=0, deciding_player=-1, history=None, deal=False, cards=None, total_cards=3):
        self._history = history if history is not None else []
        self._cards = cards if cards is not None else [None, None]
        self._pot = pot
        self._deciding_player = deciding_player
        self._deal = deal
        self._total_cards = total_cards

    @property
    def deciding_player_cards(self):
        return self._cards[self._deciding_player]

    @property
    def deciding_player(self):
        return self._deciding_player

    @property
    def chance_node(self):
        return self._deal

    @property
    def history(self):
        return self._history

    def sample_chance(self):
        assert self.chance_node
        deck = list(range(self._total_cards))
        random.shuffle(deck)

        return Node(
            pot=2,
            deciding_player=0,
            history=[],
            deal=False,
            cards=[deck[0], deck[1]],
            total_cards=self._total_cards
        )

    def terminal(self, perspective_player):
        """Return (is_terminal, utility).
        """

        is_terminal = len(self._history) >= 2

        if not is_terminal:
            return False, None

        if self._history[-1] == 0 and self._history[-2] == 1: #bet fold
            if self._deciding_player == perspective_player:
                return True, 1
            else:
                return True, -1
        if (self._history[-1] == 0 and self._history[-2] == 0) or (self._history[-1] == 1 and self._history[-2] == 1): #check check or bet call, go to showdown
            if self._deciding_player == perspective_player:
                if self._cards[self._deciding_player] > self._cards[1 - self._deciding_player]:
                    return True, self._pot/2 #profit
                else:
                    return True, -self._pot/2
            else:
                if self._cards[self._deciding_player] > self._cards[1 - self._deciding_player]:
                    return True, -self._pot/2
                else:
                    return True, self._pot/2

        return False, None

    def take_action(self, action):
        assert not self.chance_node

        new_pot = self._pot + (1 if action == BET else 0)

        return Node(
            pot=new_pot,
            deciding_player=1 - self._deciding_player,
            history=self._history + [action],
            deal=False,
            cards=self._cards,
            total_cards=self._total_cards
        )


class InfoSet(object):
    def __init__(self, num_actions):
        self.regret_sum = np.zeros(num_actions)
        self.strategy = np.zeros(num_actions)
        self.strategy_sum = np.zeros(num_actions)
        self.num_actions = num_actions

    def get_strategy(self):
        normalizing_sum = 0
        for a in range(self.num_actions):
            if self.regret_sum[a] > 0:
                self.strategy[a] = self.regret_sum[a]
            else:
                self.strategy[a] = 0
            normalizing_sum += self.strategy[a]

        for a in range(self.num_actions):
            if normalizing_sum > 0:
                self.strategy[a] /= normalizing_sum
            else:
                self.strategy[a] = 1.0/self.num_actions

        return self.strategy

    def get_average_strategy(self):
        avg_strategy = np.zeros(self.num_actions)
        normalizing_sum = 0
        
        for a in range(self.num_actions):
            normalizing_sum += self.strategy_sum[a]
        for a in range(self.num_actions):
            if normalizing_sum > 0:
                avg_strategy[a] = self.strategy_sum[a] / normalizing_sum
            else:
                avg_strategy[a] = 1.0 / self.num_actions
        
        return avg_strategy


def mccfr_iteration(node: Node, traversing_player: int, infosets: Dict[int, InfoSet]):
    if node.chance_node:
        return mccfr_iteration(node.sample_chance(), traversing_player, infosets)

    is_terminal, utility = node.terminal(traversing_player)
    if is_terminal:
        return utility

    info_set_key = (node.deciding_player, node.deciding_player_cards, tuple(node.history))
    if info_set_key not in infosets:
        infosets[info_set_key] = InfoSet(NUM_ACTIONS)
    infoset = infosets[info_set_key]

    if node.deciding_player == traversing_player:
        util = np.zeros(NUM_ACTIONS)
        infoset_util = 0
        strategy = infoset.get_strategy()

        for a in range(NUM_ACTIONS):
            next_node = node.take_action(a)
            util[a] = mccfr_iteration(next_node, traversing_player, infosets)
            infoset_util += strategy[a] * util[a]

        for a in range(NUM_ACTIONS):
            regret = util[a] - infoset_util
            infoset.regret_sum[a] += regret

        return infoset_util

    strategy = infoset.get_strategy()
    action = sample_action(strategy)
    next_node = node.take_action(action)
    util = mccfr_iteration(next_node, traversing_player, infosets)

    for a in range(NUM_ACTIONS):
        infoset.strategy_sum[a] += strategy[a]

    return util

def mccfr(iterations):
    infosets = {}

    util = np.zeros(2)
    for t in range(1, iterations + 1): 
        for traversing_player in range(2):
            node = Node(pot=2, deal=True)
            util[traversing_player] += mccfr_iteration(node, traversing_player, infosets)

    print('Average game value: {}'.format(util[0]/(iterations)))
    for i in sorted(infosets.keys()):
        print(i, infosets[i].get_average_strategy())

    return infosets

infosets = mccfr(100000)


Average game value: -0.047773684566606786
(0, 0, ()) [0.68582453 0.31417547]
(0, 0, (0, 1)) [1. 0.]
(0, 1, ()) [9.99790282e-01 2.09717782e-04]
(0, 1, (0, 1)) [0.35494489 0.64505511]
(0, 2, ()) [0.07216767 0.92783233]
(0, 2, (0, 1)) [2.08333333e-04 9.99791667e-01]
(1, 0, (0,)) [0.65714453 0.34285547]
(1, 0, (1,)) [9.99985071e-01 1.49289383e-05]
(1, 1, (0,)) [9.99880067e-01 1.19932838e-04]
(1, 1, (1,)) [0.64901079 0.35098921]
(1, 2, (0,)) [1.50802268e-04 9.99849198e-01]
(1, 2, (1,)) [3.01604536e-05 9.99969840e-01]


In [8]:
def play_against_random():
    root_chance_node = Node(deal=True)
    current_node = root_chance_node.sample_chance()
    turn = "BOT"

    while True:
        term, util = current_node.terminal(0)

        if term:
            return util

        valid_actions = [0, 1]

        if turn == "BOT":
            k = (current_node.deciding_player, current_node.deciding_player_cards, tuple(current_node.history))
            if k in infosets:
                bot_strategy = infosets[k].get_average_strategy()
            else:
                bot_strategy = np.ones(len(valid_actions)) / len(valid_actions)

            bot_move = sample_action(bot_strategy)
            current_node = current_node.take_action(bot_move)

            turn = "RAND"
        else:
            rand_strat = np.ones(len(valid_actions)) / len(valid_actions)
            rand_move = sample_action(rand_strat)

            current_node = current_node.take_action(rand_move)
            turn = "BOT"

wins = 0
games = 10000

for _ in range(games):
    wins += play_against_random()

print(f"Win percentage: {wins / games * 100:.2f}%")

Win percentage: 16.20%


In [23]:
# Start the game.
deck = list(range(3))
random.shuffle(deck)
player_card = deck[0]
bot_card = deck[1]
pot = 2

history = []

print("Your card is: {}".format(player_card), ", Pot is", pot)

Your card is: 2 , Pot is 2


In [24]:
my_action = BET
history = history + [my_action]
bot_strat = infosets[(1, bot_card, tuple(history))].get_average_strategy()
bot_action = sample_action(bot_strat)
history = history + [bot_action]

print("Bot did", "check" if bot_action == CHECK else "bet")

Bot did bet


In [22]:
bot_card

1