In [21]:
using StatsBase

### In This Notebook i will implement simple MCTS Algorithm to player Poker

In [2]:
struct Node
    children::Array{Node}
end

In [3]:
struct MCTSTree
    root::Node
end

In [None]:
function is_leaf(node::Node)
   return true 
end

In [4]:
function selection(current_game_state::Node)
    if !is_leaf(current_game_state)
        
    end
end

selection (generic function with 1 method)

In [5]:
function simulate(action, computation_budget::Int)
    next_stage(action)
    for i in 1:computation_budget 
        select()
    end
end

simulate (generic function with 1 method)

How to compute the policy at each DecisionNode.<br>
We want to compute the policy that maximaizes the expectation of reward<br>
At each DecisionNode we will expand the action and simulate the affect on the reward rewards / num_of_visit<br>
When the player needs to take an action - it will build the whole computation tree, and will sample from the vector<br>

In [7]:
INVALID_VALUE = -10

-10

In [9]:
JACK = 1
QUEEN = 2
KING = 3
FULL_DECK = [JACK, QUEEN, KING]

3-element Array{Int64,1}:
 1
 2
 3

In [10]:
FOLD = -1
CHECK = 0
BET = 1
ACTIONS = [FOLD, CHECK, BET]

3-element Array{Int64,1}:
 -1
  0
  1

In [93]:
function simulate_check(card, num_of_simulations)
    num_of_simulations_for_check = Int(ceil(num_of_simulations / 2))
    num_of_simulations_for_bet = num_of_simulations - num_of_simulations_for_check
    
    # Opp Checks also
    optional_cards = setdiff(FULL_DECK, [card])
    opp_cards = sample(optional_cards, num_of_simulations_for_check)
    my_cards = repeat([card], num_of_simulations_for_check)
    rewards = my_cards .> opp_cards
    rewards_check = sum(rewards * 2 .- 1)
    
    # Opp bets
    p_fold, p_call = compute_policy_fold_or_call(card)
    r_bet_fold = num_of_simulations_for_bet * p_fold * -1
    r_bet_call = simulate_call(card, Int(ceil(num_of_simulations_for_bet * p_call)))
    
    return rewards_check + r_bet_fold + r_bet_call
end

simulate_check (generic function with 1 method)

In [119]:
simulate_check(KING, 10000)

14288.870634112165

In [62]:
function simulate_bet(card, num_of_simulations)
    # TODO - the division should be accroding the probability to call or fold
    rewards_call = simulate_call(card , Int(floor(num_of_simulations / 2)))
    rewards_fold = 1 * ceil(num_of_simulations / 2)
    return rewards_call + rewards_fold
end

simulate_bet (generic function with 1 method)

In [64]:
function simulate_call(card, num_of_simulations)
    optional_cards = setdiff(FULL_DECK, [card])
    opp_cards = sample(optional_cards, num_of_simulations)
    my_cards = repeat([card], num_of_simulations)
    rewards = my_cards .> opp_cards
    rewards = rewards * 2 .- 1
    return sum(rewards * 2) # bet was on
end

simulate_call (generic function with 1 method)

In [147]:
function compute_policy_check_or_bet(card)
    num_of_simulations = 1000
    total_reward_bet = simulate_bet(card, num_of_simulations)
    total_reward_check = simulate_check(card, num_of_simulations)
    r_check, r_bet =  total_reward_check / num_of_simulations, total_reward_bet / num_of_simulations
    q_check, q_bet = exp(r_check), exp(r_bet)
    z = q_check + q_bet
    return [q_check / z, q_bet / z ]
end

compute_policy_check_or_bet (generic function with 1 method)

In [148]:
function compute_policy_fold_or_call(card)
    num_of_simulations = 1000
    total_reward_fold = -1 * num_of_simulations
    total_reward_call = simulate_call(card, num_of_simulations)
    r_fold, r_call = total_reward_fold / num_of_simulations, total_reward_call / num_of_simulations
    q_fold, q_call = exp(r_fold), exp(r_call)
    z = q_fold + q_call
    return [q_fold / z, q_call / z]
end

compute_policy_fold_or_call (generic function with 1 method)

In [13]:
function compute_policy(card, previous_player_betted=false)
    if previous_player_betted
        return compute_policy_fold_or_call(card)
    else
        return compute_policy_check_or_bet(card) 
    end
end

compute_policy (generic function with 2 methods)

In [125]:
function sample_check_bet(policy)
   sample([CHECK,BET], Weights(policy)) 
end

sample_check_bet (generic function with 1 method)

In [150]:
function sample_fold_call(policy)
   sample([FOLD,CHECK], Weights(policy)) 
end

sample_fold_call (generic function with 1 method)

In [138]:
function sample_action(policy, previous_player_betted=false)
    if previous_player_betted
        return sample_fold_call(policy)
    else
        return sample_check_bet(policy) 
    end
end

sample_action (generic function with 2 methods)

In [142]:
function round(first_player_card, second_player_card, history)
    score = 0
    
    first_player_policy = compute_policy(first_player_card)
    first_player_move = sample_action(first_player_policy)
    
    second_player_policy  = compute_policy(second_player_card, first_player_move == BET)
    second_player_move = sample_action(second_player_policy, first_player_move == BET)
    
    round_history = [((first_player_card, first_player_move, false), (second_player_card, second_player_move, first_player_move == BET))]
    
    doubled_pot = first_player_move == BET || second_player_move == BET
    if second_player_move == FOLD
        round_record = (round_history, 1)
        push!(history, round_record)
        return 1
    end
    if second_player_move == BET
        doubled_pot = true
        first_player_policy = compute_policy(first_player_card, second_player_move == BET)
        first_player_move = sample_action(first_player_policy, second_player_move == BET)
        
        push!(round_history, ((first_player_card, first_player_move, false),(INVALID_VALUE, INVALID_VALUE, false)))
        if first_player_move == FOLD
            return -1
        end
    end
    if first_player_card > second_player_card
        score =  1 * (1 + doubled_pot)
    else
        score = -1 * (1 + doubled_pot)
    end
    round_record = (round_history, score)
    push!(history, round_record)
    return score
end

round (generic function with 2 methods)

In [152]:
function game(num_of_rounds = 10)
    total_score = 0
    history = []
    for i in 1:num_of_rounds
        first_player_card, second_player_card = sample(FULL_DECK, 2; replace=false)
        score = round(first_player_card, second_player_card, history)
        total_score += score
    end
#     print(history)
    avg_score = total_score / num_of_rounds
    return avg_score
end

game (generic function with 2 methods)

In [154]:
game(10000)

-0.0314