- Reference 1 : https://medium.com/ai-in-plain-english/building-a-poker-ai-part-8-leduc-holdem-and-a-more-generic-cfr-algorithm-in-python-9dd036bf9a30
- Reference 2 : https://github.com/scfenton6/leduc-cfr-poker-bot/blob/main/cfr_vs_others.py


In [None]:
import json
import matplotlib.pyplot as plt
from numpy.random import choice
import numpy as np
import pandas as pd
from random import shuffle
import ast

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Read the strategy file

In [None]:
def transform_column(row):
    return [float(val) for val in row.split(',')]

def strategy_dict(file_path):
  df_strategy = pd.read_excel(file_path,header=None )
  df_strategy.columns =[['Hand+History','Prob']]
  df_strategy['Hand+History'] = df_strategy['Hand+History'].replace('J', 'J ')
  df_strategy['Hand+History'] = df_strategy['Hand+History'].replace('Q', 'Q ')
  df_strategy['Hand+History'] = df_strategy['Hand+History'].replace('K', 'K ')

  df_strategy['Prob'] = '[' + df_strategy['Prob']+ ']'
  strategy_dict = dict(df_strategy[['Hand+History','Prob']].values)
  #df_strategy.head(5)
  for key in strategy_dict:
      strategy_dict[key] = ast.literal_eval(strategy_dict[key])
  return strategy_dict

file_path_cfr_log = '/content/drive/MyDrive/Thesis/Second-Round-Update/Leduc_CFR_Log_1.xlsx' 
file_path_cfr = '/content/drive/MyDrive/Thesis/Second-Round-Update/Leduc_CFR_6.xlsx'  # Specify the desired file path

cfr_log = strategy_dict(file_path_cfr_log)
cfr = strategy_dict(file_path_cfr)

## Game Settings

In [None]:
chance_nodes = {'bc','xx', 'xbc', 'brc', 'xbrc'}

def rank(cards):
    ranks = {
        'KK': 1,
        'QQ': 2,
        'JJ': 3,
        'KQ': 4, 'QK': 4,
        'KJ': 5, 'JK': 5,
        'QJ': 6, 'JQ': 6
    }
    return ranks[cards]

def is_terminal(history):
    return history[-1:] == 'f' or ('d' in history and history.split('d')[1] in chance_nodes)

def is_chance_node(history):
    return history in chance_nodes

def terminal_util(history, card_player, card_opponent, card_flop):
    '''Return player's utility when we arrive at a terminal node'''
    ante = 0.1
    payoffs = {'xx':0, 'bf':0, 'xbf':0, 'brf':0.1, 'xbrf':0.1, 'bc':0.1, 'xbc':0.1, 'brc':0.2, 'xbrc':0.2}
    if 'd' not in history: 
        return ante + payoffs[history]
    else:  
        payoffs = {'xx':0, 'bf':0, 'xbf':0, 'brf':0.1, 'xbrf':0.1, 'bc':0.1, 'xbc':0.1, 'brc':0.2, 'xbrc':0.2}
        preflop, flop = history.split('d')
        pot = ante + payoffs[preflop] + payoffs[flop]
        if history[-1:] == 'f':
            return pot
        else:  
            hand_player = card_player + card_flop
            hand_opponent = card_opponent + card_flop

            if rank(hand_player) < rank(hand_opponent):
                return pot
            elif rank(hand_player) > rank(hand_opponent):
                return -pot
            else:
                return 0

def valid_actions(history):
    '''card dealt: d, check: x, fold: f, call: c, bet: b, raise: r'''
    if history[-1:] == '' or history[-1] == 'd' or history[-1] == 'x':
        return ['x', 'b']
    elif history[-1] == 'b':
        return ['f', 'c', 'r']
    elif history[-1] == 'r':
        return ['f', 'c']

def get_active_player(history):
    if 'd' not in history:
        return len(history) % 2
    else:  
        return len(history.split('d')[1]) % 2

In [None]:
deck = np.array(['K', 'K', 'Q', 'Q', 'J', 'J'])

def leduc_round(iter_number, hero_strat, villain_strat):
    shuffle(deck)
    history = ""
    hero = iter_number % 2
    villain = 1 - hero

    while not is_terminal(history):
        if is_chance_node(history):
            history += 'd'

        curr_player = get_active_player(history) 
        card_player = deck[curr_player] 
        curr_strat = hero_strat if curr_player==hero else villain_strat  
        if 'd' in history: 
            curr_player_action = curr_strat(history, card_player, deck[2])  
        else:
            curr_player_action = curr_strat(history, card_player)

        history += curr_player_action

    if get_active_player(history) == hero: 
        return terminal_util(history, deck[hero], deck[villain], deck[2])

    else: 
        return -1 * terminal_util(history, deck[villain], deck[hero], deck[2])  


def simulate_poker_game(CFR_strats, OPPO_strats, n_rounds = 5000):
    p=0 #count win times
    cfr_accum_util_record = np.zeros(n_rounds)
    oppo_accum_util_record = np.zeros(n_rounds)

    stack_cfr_record = np.zeros(n_rounds)
    stack_oppo_record = np.zeros(n_rounds)

    accum_util_cfr = 0
    accum_util_oppo = 0

    stack_CFR= 1000
    stack_OPPO = 1000

    i = 0 # count cfr lose times
    j = 0 # count oppo lose times
    z = 0 # count cfr win times
    f =0 # count oppo win times

    for it in range(n_rounds):

        if stack_CFR <=100:
           i+=1
           stack_CFR = 1000
           stack_OPPO = 1000

        elif stack_OPPO <=100:
           j+=1
           stack_CFR = 1000
           stack_OPPO = 1000

        #elif stack_CFR >=2000:
           #z+=1
           #stack_CFR = 1000
           #stack_OPPO = 1000

        #elif stack_OPPO >=2000:
           #f +=1
           #stack_CFR = 1000
           #stack_OPPO = 1000

        percentage_CFR = leduc_round(it, CFR_strats, OPPO_strats)
        if percentage_CFR >=0:
          p+=1
        cfr_amount_per_round = percentage_CFR * stack_CFR
        oppo_amount_per_round = - percentage_CFR * stack_OPPO
        stack_CFR +=  cfr_amount_per_round
        stack_OPPO += oppo_amount_per_round

        accum_util_cfr += cfr_amount_per_round
        accum_util_oppo += oppo_amount_per_round

        cfr_accum_util_record[it] = accum_util_cfr
        oppo_accum_util_record[it] = accum_util_oppo

        stack_cfr_record[it] = stack_CFR
        stack_oppo_record[it] = stack_OPPO
    #print('-------')
    #print(p)
    #print(cfr_accum_util_record[-1])
    #print('-------')
    return i,j,z,f, cfr_accum_util_record,oppo_accum_util_record,stack_cfr_record, stack_oppo_record

In [None]:
leduc_strats= cfr
leduc_strats_log = cfr_log

def normalizer(list_):
  return [p / sum(list_) for p in list_]

def cfr_strat_as_function(history, card, flop=None):
    if flop:
        return choice(valid_actions(history), p = normalizer(leduc_strats[card+flop+" "+history]))
    else:
        return choice(valid_actions(history), p = normalizer(leduc_strats[card+" "+history]))

def cfr_log_strat_as_function(history, card, flop=None):
    if flop:
        return choice(valid_actions(history), p = normalizer(leduc_strats_log[card+flop+" "+history]))
    else:
        return choice(valid_actions(history), p = normalizer(leduc_strats_log[card+" "+history]))

def random_strat(history, card, flop=None):
    valid_action = valid_actions(history)
    return choice(valid_action)

def honest_strat(history, card, flop=None):
    def passive_move(history):
        if not history or history[-1]=='x' or history[-1]=='d':
            return 'x'
        elif history[-1] == 'b':
            return 'c'
        else:
            return 'f'

    def get_preflop_action(history, card):
        val_act = valid_actions(history)
        if card == "J":
            return val_act[0]
        elif card == "K":
            return val_act[-1]
        elif card == "Q":
            return passive_move(history)

    def get_postflop_action(history, card, flop):
        val_act = valid_actions(history)
        hand_rank = rank(card+flop)
        if 1 <= hand_rank <=3:
            return val_act[-1]
        elif 4 <= hand_rank <=5:
            return passive_move(history)
        else:
            return val_act[-1]

    if flop:
        return get_postflop_action(history, card, flop)
    else:
        return get_preflop_action(history, card)

n_rounds = 5000

## 1 Basic CFR v. Random

In [None]:
leduc_strats = cfr
i,j,z,f,cfr_accum_util_record_Agg,oppo_accum_util_record,stack_cfr_record, stack_oppo_record = simulate_poker_game(
    cfr_strat_as_function, 
    random_strat,
    n_rounds
    )

print('cfr lose times')
print(i)
print('oppo lose times')
print(j)
print('cfr win times')
print(z)
print('oppo win times')
print(f)

xx = np.arange(0, n_rounds, dtype=int)
plt.plot(xx, cfr_accum_util_record_Agg, label='CFR_accumulated_util')
plt.plot(xx, oppo_accum_util_record, label='Opponent_accumulated_util')
plt.legend(fontsize=7)
plt.show()

## 2 Basic CFR v. Aggresive/Passive

In [None]:
leduc_strats = cfr
i,j,z,f,cfr_accum_util_record_Agg,oppo_accum_util_record,stack_cfr_record, stack_oppo_record = simulate_poker_game(
    cfr_strat_as_function, 
    honest_strat,
    n_rounds
    )

print('cfr lose times')
print(i)
print('oppo lose times')
print(j)
print('cfr win times')
print(z)
print('oppo win times')
print(f)

xx = np.arange(0, n_rounds, dtype=int)
plt.plot(xx, cfr_accum_util_record_Agg, label='CFR_accumulated_util')
plt.plot(xx, oppo_accum_util_record, label='Opponent_accumulated_util')
plt.legend(fontsize=7)
plt.show()

## 3 Basic CFR v. CFR maximizing time average growth rate

In [None]:
leduc_strats= cfr
leduc_strats_log = cfr_log

i,j,z,f,cfr_accum_util_record_Agg,oppo_accum_util_record,stack_cfr_record, stack_oppo_record = simulate_poker_game(
    cfr_strat_as_function, 
    honest_strat,
    n_rounds
    )

print('cfr lose times')
print(i)
print('oppo lose times')
print(j)
print('cfr win times')
print(z)
print('oppo win times')
print(f)

xx = np.arange(0, n_rounds, dtype=int)
plt.plot(xx, cfr_accum_util_record_Agg, label='CFR_accumulated_util')
plt.plot(xx, oppo_accum_util_record, label='Opponent_accumulated_util')
plt.legend(fontsize=7)
plt.show()