## 1 Poker Cards Generator

In [None]:
import random
import numpy as np
from itertools import combinations
import pandas as pd

from treys import Card
from treys import Evaluator
evaluator = Evaluator()


##################################################################################
################################ Card Rank ######################################
##################################################################################

suits = ['S','H','D','C']
ranks = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A']
deck = [(rank, suit) for rank in ranks for suit in suits]

# Define the rank of each hand
hand_rank = [
    ("Royal flush", lambda hand: is_royal_flush(hand)),
    ("Straight flush", lambda hand: is_straight_flush(hand)),
    ("Four of a kind", lambda hand: is_four_of_a_kind(hand)),
    ("Full house", lambda hand: is_full_house(hand)),
    ("Flush", lambda hand: is_flush(hand)),
    ("Straight", lambda hand: is_straight(hand)),
    ("Three of a kind", lambda hand: is_three_of_a_kind(hand)),
    ("Two pair", lambda hand: is_two_pair(hand)),
    ("Pair", lambda hand: is_pair(hand)),
    ("High card", lambda hand: True)
]

# Define the functions to check for each hand rank
def is_royal_flush(hand):
    return is_flush(hand) and set([rank for rank, suit in hand]) == set(["T", "J", "Q", "K", "A"])

def is_straight_flush(hand):
    return is_flush(hand) and is_straight(hand)

def is_four_of_a_kind(hand):
    ranks = [rank for rank, suit in hand]
    for rank in ranks:
        if ranks.count(rank) == 4:
            return True
    return False

def is_full_house(hand):
    return is_three_of_a_kind(hand) and is_pair(hand)


def is_flush(hand):
    suits = [suit for rank, suit in hand]
    if len(set(suits)) == 1:
        return True
    return False


def is_straight(hand):
    ranks = [rank for rank, suit in hand]
    replace_mapping = {'K': '13',
                       'Q': '12',
                       'J': '11',
                       'T': '10',
                       'A': '1'}
    ranks = [replace_mapping.get(rank, rank) for rank in ranks ]
    ranks_int = [eval(i) for i in ranks]
    max_ranks =max(ranks_int)
    min_ranks = min(ranks_int)
    if (max_ranks - min_ranks == 4) and len(set(ranks)) == 5:
        return True
    return False

def is_three_of_a_kind(hand):
    ranks = [rank for rank, suit in hand]
    for rank in ranks:
        if ranks.count(rank) == 3:
            return True
    return False

def is_two_pair(hand):
    ranks = [rank for rank, suit in hand]
    pairs = set([rank for rank in ranks if ranks.count(rank) == 2])
    if len(pairs) == 2:
        return True
    return False

def is_pair(hand):
    ranks = [rank for rank, suit in hand]
    for rank in ranks:
        if ranks.count(rank) == 2:
            return True
    return False

# Define a function to rank a hand
def rank_hand(hand):
    for rank_name, rank_function in hand_rank:
        if rank_function(hand):
            return rank_name

def cards_dealt():
    suits = ['S','H','D','C']
    ranks = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A']
    deck = [(rank, suit) for rank in ranks for suit in suits]

    # Shuffle the deck
    random.shuffle(deck)

    # Deal two cards to each player
    player1_hand = [deck.pop(), deck.pop()]
    player2_hand = [deck.pop(), deck.pop()]

    # Deal five community cards
    community_cards = [deck.pop() for _ in range(5)]

    player1_full_hand = player1_hand + community_cards
    player2_full_hand = player2_hand + community_cards

    return player1_full_hand,player2_full_hand


## 2 Hand Strength Evaulator

In [None]:
##################################################################################
################################ Hand Strength ###################################
##################################################################################


# Use 0-1 to represent the Hand Strength

map_dict ={
'Royal flush':1,
'Straight flush':0.9,
'Four of a kind':0.8,
'Full house':0.7,
'Flush':0.6,
'Straight': 0.5,
'Three of a kind':0.4,
'Two pair':0.3,
'Pair'  : 0.2,
'High card':0.1,
}

def hand_strength (full_hand):
    HS = 0
    combination = combinations(full_hand,5)
    for combo in combination:
        combo_rank = rank_hand(combo)
        combo_rank_num =map_dict[combo_rank]
        if combo_rank_num > HS:
            HS = combo_rank_num
    return HS

def deusces_value (cards):

    suits_map = {'H':'h',
                'D':"d",
                'S':"s",
                 'C':'c',}
    ranks = [rank for rank, suit in cards]
    suits = [suit for rank, suit in cards]
    deusces_suits = [suits_map.get(suit, suit) for suit in suits]
    deusces_hand = [r + s for r,s in zip(ranks,deusces_suits)]
    board = deusces_hand[2:]
    hand = deusces_hand[:2]
    d_board = [Card.new(i) for i in board]
    d_hand =  [Card.new(i) for i in hand]
    deuces_value = (evaluator.evaluate(d_board, d_hand))
    return deuces_value

## 3 CFR Design and Training

In [None]:
nodes_matrix = {}

def hand_level(hs):
  if hs <0.4:
    return 'Weak'
  if hs ==0.4 or hs ==0.5:
    return "Medium"
  if hs >= 0.8:
    return 'Top'
  else: # 0.6,0.7
    return "Strong"

def get_node(card, history):

    hs= hand_strength(card)
    handlevel = hand_level(hs)
 
    key = handlevel + " " + history

    if key not in nodes_matrix:
        action_dict = {0: 'C', 1: 'B'}
        strategy = np.repeat(1/2, 2)
        node = {
            'key': key,
            'action_dict': action_dict,
            'n_actions': 2,
            'regret_sum': np.zeros(2),
            'strategy_sum': np.zeros(2),
            'strategy': strategy,
            'reach_pr': 0,
            'reach_pr_sum': 0
        }
      
        nodes_matrix[key] = node
        return node
    return nodes_matrix[key]

W = 50
def get_payoff(history, card_player, card_opponent):
    terminal_pass = history[-1] == 'C' # 'CC', 'BC', 'CBC'
    double_bet = history[-2:] == "BB"

    card_player_v= deusces_value (card_player)
    card_opponent_v= deusces_value (card_opponent)

    if terminal_pass:
        if history[-2:] == 'CC': # CC
            return 1/W if card_player_v < card_opponent_v else -1/W
        else: # BC  CBC
            return 1/W
    elif double_bet: #BB CBB
          return 2/W if card_player_v < card_opponent_v else -2/W


def cfr(history, pr_1, pr_2,cards,stack_a, stack_b):
    n = len(history)
    is_player_1 = n % 2 == 0
    player_card = cards[0] if is_player_1 else cards[1]

    ## check terminal
    if history[-2:] == 'CC' or history[-2:] == "BB" or history[-2:] == 'BC':
        card_player = cards[0] if is_player_1 else cards[1]
        card_opponent = cards[1] if is_player_1 else cards[0]
        reward = get_payoff(history, card_player, card_opponent) * stack_a
        return reward

    node = get_node(player_card, history)
    strategy = node['strategy']
    counterfactual_values = np.zeros(2)

    for act in range(2):
        next_history = history + node['action_dict'][act]

        if is_player_1:
    
            counterfactual_values[act] = -1 * cfr_B(next_history, pr_1 * strategy[act], pr_2,cards,stack_a, stack_b)
        else:
            counterfactual_values[act] = -1 * cfr_B(next_history, pr_1, pr_2 * strategy[act],cards,stack_a, stack_b)

    node_value = counterfactual_values.dot(strategy)
    #node_value = sum(counterfactual_values * strategy)
    regrets = counterfactual_values - node_value

    #for ix, action in enumerate(ACTIONS):
        #info_set['cumulative_regrets'][ix] += reach_probabilities[opponent] * (counterfactual_values[ix] - node_value)
    if is_player_1:
        node['reach_pr'] += pr_1
        node['regret_sum'] += pr_2 * regrets
    else:
        node['reach_pr'] += pr_2
        node['regret_sum'] += pr_1 * regrets

    return  node_value 


def cfr_B(history, pr_1, pr_2,cards,stack_a, stack_b):
    n = len(history)
    is_player_1 = n % 2 == 0
    player_card = cards[0] if is_player_1 else cards[1]

    if history[-2:] == 'CC' or history[-2:] == "BB" or history[-2:] == 'BC':
        card_player = cards[0] if is_player_1 else cards[1]
        card_opponent = cards[1] if is_player_1 else cards[0]
        reward = get_payoff(history, card_player, card_opponent) * stack_a
        return reward

    strategy = get_strategy_B(player_card)


    counterfactual_values = np.zeros(2)

    action_dict = {0: 'C', 1: 'B'}
    for act in range(2):
        next_history = history + action_dict[act]

        if is_player_1:
            counterfactual_values[act] = -1 * cfr(next_history, pr_1 * strategy[act], pr_2,cards,stack_a, stack_b)
        else:
            counterfactual_values[act] = -1 * cfr(next_history, pr_1, pr_2 * strategy[act],cards,stack_a, stack_b)

    node_value = counterfactual_values.dot(strategy)
    return  node_value 

def get_strategy(node):
    regrets = node['regret_sum']
    regrets[regrets < 0] = 0
    normalizing_sum = sum(regrets)
    if normalizing_sum > 0:
        return regrets / normalizing_sum
    else:
        return np.repeat(1/node['n_actions'], node['n_actions'])

def get_strategy_B(cards):
    hs= hand_strength(cards)
    hl = hand_level(hs)
    if hl == 'Weak': 
      return np.array([0.7,0.3])
    elif hl == 'Top':
      return np.array([0,1])
    elif hl == 'Strong': # 0.6,0.7
      return np.array([0.1,0.9])
    else: #hl == 'Medium 0.4,0.5
      return np.array([0.3,0.7])

def update_strategy(node):
    node['strategy_sum'] += node['reach_pr'] * node['strategy']
    node['reach_pr_sum'] += node['reach_pr']
    node['strategy'] = get_strategy(node)
    node['reach_pr'] = 0

def get_average_strategy(node):
    strategy_sum = node['strategy_sum']
    reach_pr_sum = node['reach_pr_sum']
    average_strategy = np.zeros(node['n_actions'])

    if reach_pr_sum > 0:
        for i in range(node['n_actions']):
            average_strategy[i] = strategy_sum[i] / reach_pr_sum
    else:
        average_strategy = np.repeat(1/node['n_actions'], node['n_actions'])
    return average_strategy



def train_take_turns(iterations,stack_a,stack_b):
    expected_game_value_a = 0
    player_list =['A','B']

    for _ in range(iterations):

        if (stack_a * stack_b <=0) or stack_a >= 2000 or stack_a >= 2000:
            stack_a = 1000
            stack_b = 1000

        first_action_player = player_list[_ % 2]
        full_hand_1, full_hand_2 = cards_dealt()
        cards =[full_hand_1, full_hand_2]

        if first_action_player == 'A':

            expected_game_value_a += cfr('', 1, 1,cards,stack_a,stack_b)
            stack_a +=expected_game_value_a
            stack_b -=expected_game_value_a


        if first_action_player == 'B':
            expected_game_value_a += -cfr_B('', 1, 1,cards,stack_a,stack_b)
            stack_a +=expected_game_value_a
            stack_b -=expected_game_value_a


        for _, v in nodes_matrix.items():
                #v.update_strategy()
                update_strategy(v)


    rate =  ((expected_game_value_a +1000)/1000) ** (1/iterations)
    return rate, nodes_matrix.items()
    


def display_results(ev, nodes_matrix):

    df_tracker = pd.DataFrame(columns =['History','Strategy'])
    H_ =[]
    S_=[]
   
    print('player 1 expected rate: {}'.format(ev))
    print('player 2 expected rate: {}'.format(-1 * ev))
    print('-------------------------------------------------')
    #print('player 1 strategies:')
    sorted_items = sorted(nodes_matrix, key=lambda x: x[0])
    for _, v in sorted_items:
        H_ .append(_)
        avg_strategies = get_average_strategy(v)
        strategies = ['{:03.2f}'.format(x) for x in avg_strategies]
        S_ .append(strategies)
      
    df_tracker ['History'] = H_
    df_tracker['Strategy'] = S_
    return df_tracker

In [None]:
nodes_matrix={}
iterations = 200000
stack_a = 1000
stack_b = 1000
expected_game_value, nodes_matrix= train_take_turns(iterations,stack_a,stack_b)
strategy_form = display_results(expected_game_value, nodes_matrix)
strategy_form