In [16]:
# Imports
import random
import numpy as np

In [17]:
# Data Definitions
CHECK = 0
# For now, fixed bet size.
BET = 1
NUM_ACTIONS = 2 # Check, Bet

In [18]:
class PokerNode:
    def __init__(self):
        self.infoset = ""
        self.regretSum = [0.0] * NUM_ACTIONS
        self.strategy = [0.0] * NUM_ACTIONS
        self.strategySum = [0.0] * NUM_ACTIONS

    def getStrategy(self, realizationWeight):
        normalizingSum = 0
        for action in range(NUM_ACTIONS):
            if self.regretSum[action] > 0:
                self.strategy[action] = self.regretSum[action]
            else:
                self.strategy[action] = 0
            normalizingSum += self.strategy[action]
        for action in range(NUM_ACTIONS):
            if normalizingSum > 0:
                self.strategy[action] /= normalizingSum
            else:
                self.strategy[action] = 1.0 / NUM_ACTIONS
            self.strategySum[action] += realizationWeight * self.strategy[action]
        return self.strategy

    def getAverageStrategy(self):
        averageStrategy = [0.0] * NUM_ACTIONS
        normalizingSum = 0.0
        for action in range(NUM_ACTIONS):
            normalizingSum += self.strategySum[action]
        for action in range(NUM_ACTIONS):
            if normalizingSum > 0:
                averageStrategy[action] = self.strategySum[action] / normalizingSum
            else:
                averageStrategy[action] = 1.0 / NUM_ACTIONS
        return averageStrategy

    def __str__(self):
        avgStratsNice = [round(num, 4) for num in self.getAverageStrategy()]
        return f"{self.infoSet:>4}: Pass: {avgStratsNice[0]} Bet: {avgStratsNice[1]}"

In [19]:
def cfr(cards, history, p0, p1):
    plays = len(history)
    player = plays % 2
    opponent = 1 - player

    # Getting return payoffs for terminal states
    if (plays > 1):
        terminalPass = history[plays - 1] == 'p'
        doubleBet = history[plays - 2 : plays] == ("bb")
        isPlayerCardHigher = cards[player] > cards[opponent]
        if terminalPass:
            if history == ("pp"):
                return 1 if isPlayerCardHigher else -1
            else:
                return 1
        elif doubleBet:
            return 2 if isPlayerCardHigher else -2

    infoSet = str(cards[player]) + history

    # Get info set node, or create it
    node = nodeMap.get(infoSet)
    if node == None:
        node = KuhnNode()
        node.infoSet = infoSet
        nodeMap[infoSet] = node

    # Recursively call cfr with additional history and probability for each action
    strategy = node.getStrategy(p0 if player == 0 else p1)
    util = [0.0] * NUM_ACTIONS
    nodeUtil = 0
    for action in range(NUM_ACTIONS):
        nextHistory = history + ("p" if action == 0 else "b")
        if player == 0:
            util[action] = - cfr(cards, nextHistory, p0 * strategy[action], p1)
        else:
            util[action] = - cfr(cards, nextHistory, p0, p1 * strategy[action])
        nodeUtil += strategy[action] * util[action]

    # Compute and accumulate cfr for each action
    for action in range(NUM_ACTIONS):
        regret = util[action] - nodeUtil
        node.regretSum[action] += (p1 if player == 0 else p0) * regret

    return nodeUtil

In [26]:
class PokerTrainer:
    def __init__(self):
        self.nodeMap = dict()
    
    def train(self, iterations):
        cards = list(range(1, 4))
        util = 0.0
        for iteration in range(iterations):
            random.shuffle(cards)
            util += self.cfr(cards, "", 1, 1)
        print("Average game value: ", util / iterations)
        for node in sorted(self.nodeMap.values(), key=(lambda node: node.infoSet)):
            print(node)

    def cfr(self, cards, history, p0, p1):
        plays = len(history)
        player = plays % 2
        opponent = 1 - player
    
        # Getting return payoffs for terminal states
        if (plays > 1):
            terminalPass = history[plays - 1] == 'p'
            doubleBet = history[plays - 2 : plays] == ("bb")
            isPlayerCardHigher = cards[player] > cards[opponent]
            if terminalPass:
                if history == ("pp"):
                    return 1 if isPlayerCardHigher else -1
                else:
                    return 1
            elif doubleBet:
                return 2 if isPlayerCardHigher else -2
    
        infoSet = str(cards[player]) + history
    
        # Get info set node, or create it
        node = self.nodeMap.get(infoSet)
        if node == None:
            node = PokerNode()
            node.infoSet = infoSet
            self.nodeMap[infoSet] = node
    
        # Recursively call cfr with additional history and probability for each action
        strategy = node.getStrategy(p0 if player == 0 else p1)
        util = [0.0] * NUM_ACTIONS
        nodeUtil = 0
        for action in range(NUM_ACTIONS):
            nextHistory = history + ("p" if action == 0 else "b")
            if player == 0:
                util[action] = - self.cfr(cards, nextHistory, p0 * strategy[action], p1)
            else:
                util[action] = - self.cfr(cards, nextHistory, p0, p1 * strategy[action])
            nodeUtil += strategy[action] * util[action]
    
        # Compute and accumulate cfr for each action
        for action in range(NUM_ACTIONS):
            regret = util[action] - nodeUtil
            node.regretSum[action] += (p1 if player == 0 else p0) * regret
    
        return nodeUtil


In [27]:
trainer = PokerTrainer()
trainer.train(1000)

Average game value:  -0.059772321804762725
   1: Pass: 0.7688 Bet: 0.2312
  1b: Pass: 0.9985 Bet: 0.0015
  1p: Pass: 0.6737 Bet: 0.3263
 1pb: Pass: 0.999 Bet: 0.001
   2: Pass: 0.9744 Bet: 0.0256
  2b: Pass: 0.516 Bet: 0.484
  2p: Pass: 0.9877 Bet: 0.0123
 2pb: Pass: 0.376 Bet: 0.624
   3: Pass: 0.2605 Bet: 0.7395
  3b: Pass: 0.0015 Bet: 0.9985
  3p: Pass: 0.0015 Bet: 0.9985
 3pb: Pass: 0.0029 Bet: 0.9971
