- Reference 1 : https://medium.com/ai-in-plain-english/building-a-poker-ai-part-8-leduc-holdem-and-a-more-generic-cfr-algorithm-in-python-9dd036bf9a30
- Reference 2 : https://github.com/int8/counterfactual-regret-minimization
- Reference 3:  https://github.com/scfenton6/leduc-cfr-poker-bot/blob/main/cfr_vs_others.py

In [None]:
import numpy as np
import math
import json
from random import shuffle
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

## 1 Basic CFR

In [None]:
##card dealt: d, check: x, fold: f, call: c, bet: b, raise: r'
chance_nodes = {'bc','xx', 'xbc', 'brc', 'xbrc'}

def rank(cards):
    ranks = {
        'KK': 1,
        'QQ': 2,
        'JJ': 3,
        'KQ': 4, 'QK': 4,
        'KJ': 5, 'JK': 5,
        'QJ': 6, 'JQ': 6
    }
    return ranks[cards]

def is_terminal(history):
    return history[-1:] == 'f' or ('d' in history and history.split('d')[1] in chance_nodes)

def is_chance_node(history):
    return history in chance_nodes

def terminal_util(history, card_player, card_opponent, card_flop):
    '''Return player's utility when we arrive at a terminal node'''
    ante = 0.1
    payoffs = {'xx':0, 'bf':0, 'xbf':0, 'brf':0.1, 'xbrf':0.1, 'bc':0.1, 'xbc':0.1, 'brc':0.2, 'xbrc':0.2}
    if 'd' not in history:  
        return ante + payoffs[history] 
    else:  # if there was a fold post-flop or if we went to showdown 
        payoffs = {'xx':0, 'bf':0, 'xbf':0, 'brf':0.1, 'xbrf':0.1, 'bc':0.1, 'xbc':0.1, 'brc':0.2, 'xbrc':0.2}
        preflop, flop = history.split('d')
        pot = ante + payoffs[preflop] + payoffs[flop] 

        if history[-1:] == 'f':
            return pot
            
        else:  # showdown
            hand_player = card_player + card_flop
            hand_opponent = card_opponent + card_flop

            if rank(hand_player) < rank(hand_opponent):
                return pot
            elif rank(hand_player) > rank(hand_opponent):
                return -pot
            else:
                return 0

def valid_actions(history):
    '''card dealt: d, check: x, fold: f, call: c, bet: b, raise: r'''
    if history[-1:] == '' or history[-1] == 'd' or history[-1] == 'x':
        return ['x', 'b']
    elif history[-1] == 'b':
        return ['f', 'c', 'r']
    elif history[-1] == 'r':
        return ['f', 'c']

def get_active_player(history):
    if 'd' not in history:
        return len(history) % 2
    else:  # after flop is dealt player with index 0 is the first to play
        return len(history.split('d')[1]) % 2

In [None]:
class Leduc:

    def __init__(self):
        self.deck = np.array(['K', 'K', 'Q', 'Q', 'J', 'J'])

    def cfr(self,i_map,history="", pr_1=1, pr_2=1):
  
        curr_player = get_active_player(history)
        card_player = self.deck[curr_player] 
        card_opponent = self.deck[1-curr_player] 

        if is_terminal(history):
            return  terminal_util(history, card_player, card_opponent, self.deck[2]) 

        # Post-Flop. Card Dealt again
        if is_chance_node(history):  
            next_history = history + 'd' 
            if history in {'xbc', 'brc'}:
                return  - self.cfr(i_map,next_history, pr_1, pr_2)
            else:
                return  self.cfr(i_map,next_history, pr_1, pr_2)

        
        info_set = self.get_info_set(i_map, history, card_player, self.deck[2])
        strategy = info_set.strategy

        val_act = valid_actions(history)
        action_utils = np.zeros(info_set.n_actions)

        for i, action in enumerate(val_act):
            next_history = history + action

            if curr_player == 0: 
                rate_minus_1 = self.cfr(i_map, next_history,pr_1 * strategy[i], pr_2)
                action_utils[i] =  -1 * rate_minus_1 +1
            else:
                rate_minus_1 = self.cfr(i_map, next_history,pr_1, pr_2 * strategy[i])
                action_utils[i] = -1 * rate_minus_1 +1

        node_value = np.power(action_utils, strategy).prod()
        #regrets = action_utils - util
        regrets = np.log(action_utils / node_value)

        if curr_player == 0:
            info_set.reach_pr += pr_1
            info_set.regret_sum += pr_2 * regrets  
        else:
            info_set.reach_pr += pr_2
            info_set.regret_sum += pr_1 * regrets

        return node_value-1


    
    def get_info_set(self, i_map, history, card, flop):
        """
        Retrieve information set from dictionary
        """
        if 'd' in history:
            key = card + flop + " " + history
        else:
            key = card + " " +  history
        if key in i_map:
            return i_map[key]     
        n_actions = 3 if history[-1:] == 'b' else 2
        info_set = InformationSet(key, n_actions)
        i_map[key] = info_set
        return i_map[key]

class InformationSet():

    def __init__(self, key, n_actions):
        self.key = key
        self.n_actions = n_actions
        self.regret_sum = np.zeros(self.n_actions)  
  
        self.strategy_sum = np.zeros(self.n_actions)  
        self.strategy = np.repeat(1/self.n_actions, self.n_actions) 
        # [0.5,0.5] [0.33,0.33,0.33]
        self.reach_pr = 0 

    def update_strategy(self):
        self.strategy_sum += self.reach_pr * self.strategy
        self.reach_pr_sum += self.reach_pr
        self.strategy = self.get_strategy()
        self.reach_pr = 0

    def get_strategy(self):
        strategy = self.to_nonnegative(self.regret_sum)
        total = sum(strategy)
        if total > 0:
            strategy /= total
            return strategy
        return np.repeat(1/self.n_actions, self.n_actions)


    def get_average_strategy(self):
        strategy = self.strategy_sum
        total = sum(strategy)
        if total > 0:
            strategy /= total
            return strategy
        return np.repeat(1/self.n_actions, self.n_actions)

    def __str__(self):
        strategies = ['{:03.2f}'.format(x)
                      for x in self.get_average_strategy()]
        return '{} {}'.format(self.key.ljust(6), strategies)

    def to_nonnegative(self, val):
        return np.where(val > 0, val, 0)


def display_results(ev, i_map):
    print('player 1 expected value: {}'.format(ev))
    #print('player 2 expected value: {}'.format(-1 * ev))

    print()
    print('player 1 strategies:')
    sorted_items = sorted(i_map.items(), key=lambda x: x[0])
    list_= []
    for _, v in filter(lambda x: get_active_player(x[0]) == 0, sorted_items):
        list_.append(v)
        print(v)
    print()
    print('player 2 strategies:')
    for _, v in filter(lambda x: get_active_player(x[0]) == 1, sorted_items):
        list_.append(v)
        print(v)
    return list_

def train(n_iterations = 100000):
    leduc = Leduc()
    i_map = {}  
    expected_game_value_p1 = 1
    expected_game_value_p2 = 1

    for _ in range(n_iterations):

        shuffle(leduc.deck)
        rate_minus_1 = leduc.cfr(i_map)
        p2_rate = -rate_minus_1

        expected_game_value_p1*= (rate_minus_1+1)
        expected_game_value_p2*= (p2_rate+1)

        for key in i_map:
            i_map[key].update_strategy()  

    #expected_game_value /= n_iterations
    print('************')
    print('expected_game_value_p1')
    print(expected_game_value_p1)
    avg_growth_rate = expected_game_value_p1 ** (1/n_iterations)
    print(avg_growth_rate)
    print('expected_game_value_p2')
    print(expected_game_value_p2)
    avg_growth_rate = expected_game_value_p2 ** (1/n_iterations)
    print(avg_growth_rate)

    return avg_growth_rate, i_map

if __name__ == "__main__":
    avg_growth_rate, i_map = train()

In [None]:
strategy_list = display_results(a, b)
df_tempo = pd.DataFrame(strategy_list)
file_path = '/content/drive/MyDrive/Thesis/Second-Round-Update/df_tempo.xlsx'  # Specify the desired file path
df_tempo.to_excel(file_path, index=False)

## 2 CFR Based on Time-Average Growth Rate Maximization

In [None]:
class Leduc:

    def __init__(self):
        self.deck = np.array(['K', 'K', 'Q', 'Q', 'J', 'J'])

    def cfr(self,i_map,history="", pr_1=1, pr_2=1):
  
        curr_player = get_active_player(history)
        card_player = self.deck[curr_player] 
        card_opponent = self.deck[1-curr_player] 

        if is_terminal(history):
            return  terminal_util(history, card_player, card_opponent, self.deck[2]) 

        # Post-Flop. Card Dealt again
        if is_chance_node(history):  
            next_history = history + 'd' 
            if history in {'xbc', 'brc'}:
                return  - self.cfr(i_map,next_history, pr_1, pr_2)
            else:
                return  self.cfr(i_map,next_history, pr_1, pr_2)
        info_set = self.get_info_set(i_map, history, card_player, self.deck[2])
        strategy = info_set.strategy

        val_act = valid_actions(history)
        action_utils = np.zeros(info_set.n_actions)

        for i, action in enumerate(val_act):
            next_history = history + action

            if curr_player == 0: 
                rate_minus_1 = self.cfr(i_map, next_history,pr_1 * strategy[i], pr_2)
                action_utils[i] =  -1 * rate_minus_1 +1
            else:
                rate_minus_1 = self.cfr(i_map, next_history,pr_1, pr_2 * strategy[i])
                action_utils[i] = -1 * rate_minus_1 +1

        ## use r here to guarantee the recall of cfr returns rate_minus_1
        util = np.power(action_utils, strategy).prod()
        
        ## we use ln(r) for maximization 
        log_r = np.log(action_utils)
        avg_log_r = sum(log_r * strategy)
        regrets = log_r - avg_log_r

        if curr_player == 0:
            info_set.reach_pr += pr_1
            info_set.regret_sum += pr_2 * regrets  # eq.(4) in "Introduction to Counterfactual Regret Minimisation"
        else:
            info_set.reach_pr += pr_2
            info_set.regret_sum += pr_1 * regrets

        return util-1


    
    def get_info_set(self, i_map, history, card, flop):
        """
        Retrieve information set from dictionary
        """
        if 'd' in history:
            key = card + flop + " " + history
        else:
            key = card + " " +  history
        if key in i_map:
            return i_map[key]     
        n_actions = 3 if history[-1:] == 'b' else 2
        info_set = InformationSet(key, n_actions)
        i_map[key] = info_set
        return i_map[key]

In [None]:
strategy_list = display_results(a, b)
df_tempo = pd.DataFrame(strategy_list)
file_path = '/content/drive/MyDrive/Thesis/Second-Round-Update/df_tempo.xlsx'  # Specify the desired file path
df_tempo.to_excel(file_path, index=False)