In [56]:
using StatsBase

### In This Notebook i will implement simple Simulation based Algorithm to play Poker

In [57]:
INVALID_VALUE = -10

-10

In [58]:
JACK = 1
QUEEN = 2
KING = 3
FULL_DECK = [JACK, QUEEN, KING]

3-element Array{Int64,1}:
 1
 2
 3

In [59]:
FOLD = -1
CHECK = 0
BET = 1
ACTIONS = [FOLD, CHECK, BET]

3-element Array{Int64,1}:
 -1
  0
  1

In [60]:
function simulate_bet(card, num_of_simulations)
    rewards_call = simulate_call(card , Int(floor(num_of_simulations / 2)))
    rewards_fold = 1 * ceil(num_of_simulations / 2)
    return rewards_call + rewards_fold
end

simulate_bet (generic function with 1 method)

In [61]:
function simulate_call(card, num_of_simulations)
    optional_cards = setdiff(FULL_DECK, [card])
    opp_cards = sample(optional_cards, num_of_simulations)
    my_cards = repeat([card], num_of_simulations)
    rewards = my_cards .> opp_cards
    rewards = rewards * 2 .- 1
    rewards_factor_by_2 = rewards * 2 # after bet the price is doubled
    return sum(rewards_factor_by_2) # bet was on
end

simulate_call (generic function with 1 method)

In [62]:
function compute_policy_check_or_bet(card)
    num_of_simulations = 1000
    total_reward_bet = simulate_bet(card, num_of_simulations)
    total_reward_check = simulate_check(card, num_of_simulations)
    r_check, r_bet =  total_reward_check / num_of_simulations, total_reward_bet / num_of_simulations
    q_check, q_bet = exp(r_check), exp(r_bet)
    z = q_check + q_bet
    return [q_check / z, q_bet / z ]
end

compute_policy_check_or_bet (generic function with 1 method)

In [63]:
function compute_policy_fold_or_call(card)
    num_of_simulations = 1000
    total_reward_fold = -1 * num_of_simulations
    total_reward_call = simulate_call(card, num_of_simulations)
    r_fold, r_call = total_reward_fold / num_of_simulations, total_reward_call / num_of_simulations
    q_fold, q_call = exp(r_fold), exp(r_call)
    z = q_fold + q_call
    return [q_fold / z, q_call / z]
end

compute_policy_fold_or_call (generic function with 1 method)

In [64]:
function compute_policy(card, previous_player_betted=false)
    if previous_player_betted
        return compute_policy_fold_or_call(card)
    else
        return compute_policy_check_or_bet(card) 
    end
end

compute_policy (generic function with 2 methods)

In [65]:
function sample_check_bet(policy)
   sample([CHECK,BET], Weights(policy)) 
end

sample_check_bet (generic function with 1 method)

In [66]:
function sample_fold_call(policy)
   sample([FOLD,CHECK], Weights(policy)) 
end

sample_fold_call (generic function with 1 method)

In [67]:
function sample_action(policy, previous_player_betted=false)
    if previous_player_betted
        return sample_fold_call(policy)
    else
        return sample_check_bet(policy) 
    end
end

sample_action (generic function with 2 methods)

In [68]:
function simulate_check(card, num_of_simulations)
    num_of_simulations_for_check = Int(ceil(num_of_simulations / 2))
    num_of_simulations_for_bet = num_of_simulations - num_of_simulations_for_check
    
    # Opp Checks also
    optional_cards = setdiff(FULL_DECK, [card])
    opp_cards = sample(optional_cards, num_of_simulations_for_check)
    my_cards = repeat([card], num_of_simulations_for_check)
    rewards = my_cards .> opp_cards
    rewards_check = sum(rewards * 2 .- 1)
    
    # Opp bets
    p_fold, p_call = compute_policy_fold_or_call(card)
    r_bet_fold = num_of_simulations_for_bet * p_fold * -1
    r_bet_call = simulate_call(card, Int(ceil(num_of_simulations_for_bet * p_call)))
    
    return rewards_check + r_bet_fold + r_bet_call
end

simulate_check (generic function with 1 method)

In [69]:
function round(first_player_card, second_player_card, history)
    score = 0
    
    first_player_policy = compute_policy(first_player_card)
    first_player_move = sample_action(first_player_policy)
    
    second_player_policy  = compute_policy(second_player_card, first_player_move == BET)
    second_player_move = sample_action(second_player_policy, first_player_move == BET)
    
    round_history = [((first_player_card, first_player_move, false), (second_player_card, second_player_move, first_player_move == BET))]
    
    doubled_pot = first_player_move == BET || second_player_move == BET
    if second_player_move == FOLD
        round_record = (round_history, 1)
        push!(history, round_record)
        return 1
    end
    if second_player_move == BET
        doubled_pot = true
        first_player_policy = compute_policy(first_player_card, second_player_move == BET)
        first_player_move = sample_action(first_player_policy, second_player_move == BET)
        
        push!(round_history, ((first_player_card, first_player_move, false),(INVALID_VALUE, INVALID_VALUE, false)))
        if first_player_move == FOLD
            return -1
        end
    end
    if first_player_card > second_player_card
        score =  1 * (1 + doubled_pot)
    else
        score = -1 * (1 + doubled_pot)
    end
    round_record = (round_history, score)
    push!(history, round_record)
    return score
end

round (generic function with 1 method)

In [70]:
simulate_check(KING, 1)

1.0

In [71]:
function game(num_of_rounds = 10)
    total_score = 0
    history = []
    for i in 1:num_of_rounds
        first_player_card, second_player_card = sample(FULL_DECK, 2; replace=false)
        score = round(first_player_card, second_player_card, history)
        total_score += score
    end
#     print(history)
    avg_score = total_score / num_of_rounds
    return avg_score
end

game (generic function with 2 methods)

In [87]:
game(1000)

-0.173

In [73]:
compute_policy_check_or_bet(JACK)

2-element Array{Float64,1}:
 0.34625784765015727
 0.6537421523498427

In [74]:
compute_policy_check_or_bet(QUEEN)

2-element Array{Float64,1}:
 0.36749898541023246
 0.6325010145897675

In [75]:
simulate_check(KING, 1000), simulate_bet(KING, 1000)

(1430.2870634112166, 1500.0)

In [76]:
simulate_check(JACK, 1000), simulate_bet(JACK, 1000)

(-1135.5292893150024, -500.0)

In [77]:
simulate_check(QUEEN, 1000), simulate_bet(QUEEN, 1000)

(-159.29431499804704, 468.0)

In [78]:
compute_policy_check_or_bet(KING)

2-element Array{Float64,1}:
 0.48257882070443564
 0.5174211792955642

In [79]:
compute_policy_fold_or_call(JACK)

2-element Array{Float64,1}:
 0.7310585786300049
 0.2689414213699951

In [80]:
compute_policy_fold_or_call(QUEEN)

2-element Array{Float64,1}:
 0.26580728400862635
 0.7341927159913737

In [81]:
compute_policy_fold_or_call(KING)

2-element Array{Float64,1}:
 0.04742587317756678
 0.9525741268224331