In [10]:
# Import libraries.
using Turing, StatsPlots, Random

In [19]:
# Declare our Turing model.
@model function agent(opponent_agent, my_action, opponent_action, depth=0, discrete_sampler = PG, discrete_sampler_hyper_param=10, num_of_iterations=10)
    if depth > 0 
        opp_action_chain = sample(opponent_agent(agent, opponent_action, my_action, depth-1), discrete_sampler(discrete_sampler_hyper_param), num_of_iterations, progress=false);
        opp_alpha_1 = mean(opp_action_chain[:"alpha[1]"])
        opp_alpha_2 = mean(opp_action_chain[:"alpha[2]"])
        opp_alpha_3 = 1 - opp_alpha_1 - opp_alpha_2
        alpha ~ Dirichlet([opp_alpha_3, opp_alpha_1 , opp_alpha_2])
    else
        # Our prior belief about the probability of RPS.
        alpha ~ Dirichlet(ones(3))
    end
    my_action ~ Categorical(vec(alpha))
end

agent (generic function with 6 methods)

### model
1) prior on opponents <br>
2) history <br>
3) observation - history + prior => postrior <br> 
4) counter policy - 1) beating the next round 2) confuse the opponent (noise) <br>
5) depth > 0 -> no history <br>
6) sample action from the counter policy<br>
7) optimal noise parameter <br>
8) implment Bob and Alice <br>
9) conditioning as rejection sampling

In [36]:
function compute_counter_policy(opp_alpha_1, opp_alpha_2, opp_alpha_3)
    normalize_factor = opp_alpha_1 + opp_alpha_2 + opp_alpha_3
    println([opp_alpha_3 / normalize_factor, opp_alpha_1 / normalize_factor, opp_alpha_2 / normalize_factor])
    return [opp_alpha_3 / normalize_factor, opp_alpha_1 / normalize_factor, opp_alpha_2 / normalize_factor]
end

compute_counter_policy (generic function with 1 method)

In [25]:
function compute_distribution_from_history(actions_history)
    count = Dict("1" => 1, "2" => 1, "3" => 1)
    for i in 1:(length(actions_history)-1)
        opponent_action = actions_history[i]
        count[opponent_action] = count[opponent_action] + 1
    end
    return [v for (k,v) in count]
end

compute_distribution_from_history (generic function with 1 method)

In [35]:
@model function agent(opponent_agent, my_history, opponent_history, depth = 0, discrete_sampler = PG, discrete_sampler_hyper_param=1, num_of_iterations=1)
        if depth == 0
            opponent_history_distribution = compute_distribution_from_history(opponent_history)
            opp_alpha_1, opp_alpha_2, opp_alpha_3 = opponent_history_distribution
            println("depth 0: 1: $opp_alpha_1, 2: $opp_alpha_2, 3: $opp_alpha_3")
        else
            opp_action_chain = sample(opponent_agent(agent, opponent_history, my_history, depth-1), discrete_sampler(discrete_sampler_hyper_param), num_of_iterations, progress=true);
            opp_alpha_1 = mean(opp_action_chain[:"alpha[1]"])
            opp_alpha_2 = mean(opp_action_chain[:"alpha[2]"])
            opp_alpha_3 = mean(opp_action_chain[:"alpha[3]"])
            println("depth $depth: 1: $opp_alpha_1, 2: $opp_alpha_2, 3: $opp_alpha_3")
        end
        counter_policy = compute_counter_policy(opp_alpha_1, opp_alpha_2, opp_alpha_3)
        println("counter_policy: $counter_policy")
        my_history[length(my_history)] ~ Categorical(counter_policy)
        println("ended computation")
end

agent (generic function with 5 methods)

In [15]:
# Declare our Turing model.
@model function agent(opponent_agent, my_history, opponent_history, depth=0, discrete_sampler = PG, discrete_sampler_hyper_param=1, num_of_iterations=1)
    # Our prior belief about the probability of RPS.
    alpha ~ Dirichlet(ones(3))
    for i in 1:length(my_history)
        println("$(i): depth: $(depth)")
        my_action = my_history[i]
        opponent_action = opponent_history[i]
        if depth > 0
            println("before sample i is $(i) depth is $(depth)")
            # TODO - call only in the end
            opp_action_chain = sample(opponent_agent(agent, opponent_history, my_history, depth-1), discrete_sampler(discrete_sampler_hyper_param), num_of_iterations, progress=true);
            println("after sample i is $(i) depth is $(depth)")
            opp_alpha_1 = mean(opp_action_chain[:"alpha[1]"])
            opp_alpha_2 = mean(opp_action_chain[:"alpha[2]"])
            opp_alpha_3 = 1 - opp_alpha_1 - opp_alpha_2
            counter_opponent_policy = [opp_alpha_3, opp_alpha_1 , opp_alpha_2]
            counter_opponent_policy ~ Dirichlet(alpha)
        end
        println("ended i: $(i) observation depth is $(depth)")
        my_action ~ Categorical(alpha)
    end
    println("ended computation on $(length(my_history)) and $(length(opponent_history))")
end

agent (generic function with 5 methods)

In [33]:
my_action = 1
opponent_action = 1
my_depth = 1

1

In [34]:
chain = sample(agent(agent, [my_action], [opponent_action], my_depth), PG(1), 1 , progress = true)
chain

depth 0: 1: 1, 2: 1, 3: 1
[0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
counter_policy: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
ended computation
depth 0: 1: 1, 2: 1, 3: 1
[0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
counter_policy: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
ended computation


LoadError: ArgumentError: reducing over an empty collection is not allowed

In [14]:
function maximum_likelihood_action(list_of_pairs)
    max_key = -1
    max_value = -1
    for element in list_of_pairs
        key = element[1]
        value = element[2]
        if value > max_value
            max_value = value
            max_key = key
        end
    end
    println("maximum likelihood is $((max_key, max_value))")
    return (max_key, max_value)
end

maximum_likelihood_action (generic function with 1 method)

In [16]:
function move(agent, other_agent, my_history, other_agent_history, my_depth=1)
    other_agent_history = length(other_agent_history) > 0 ? other_agent_history : [1]
    my_history = length(my_history) > 0 ? my_history : [1]
    chain = sample(agent(other_agent, my_history, other_agent_history, my_depth), PG(1), 1, progress = false)
    println("chain computation is ended")
    alpha_1 = (1, mean(chain[:"alpha[1]"]))
    alpha_2 = (2, mean(chain[:"alpha[2]"]))
    alpha_3 = (3, 1 - alpha_1[2] - alpha_2[2])
    return maximum_likelihood_action([alpha_1, alpha_2, alpha_3])[1]
end

move (generic function with 2 methods)

In [7]:
move(agent, agent, [], [])

1: depth: 1
before sample i is 1 depth is 1
1: depth: 0
ended i: 1 observation depth is 0
ended computation on 1 and 1
1: depth: 0
ended i: 1 observation depth is 0
ended computation on 1 and 1
after sample i is 1 depth is 1
ended i: 1 observation depth is 1
ended computation on 1 and 1
1: depth: 1
before sample i is 1 depth is 1
1: depth: 0
ended i: 1 observation depth is 0
ended computation on 1 and 1
1: depth: 0
ended i: 1 observation depth is 0
ended computation on 1 and 1
after sample i is 1 depth is 1
ended i: 1 observation depth is 1
ended computation on 1 and 1
chain computation is ended
maximum likelihood is (3, 0.9666186395714202)


3

In [17]:
function game()
    first_player = agent
    second_player = agent
    num_of_simulations = 10
    first_player_history = []
    second_player_history = []
    first_player_depth = 1
    second_player_depth = 0
    for i in 1:num_of_simulations
        m1 = move(first_player, second_player, first_player_history, second_player_history, first_player_depth)
        println("player1 choose $m1")
        push!(first_player_history, m1)
        m2 = move(second_player, first_player, second_player_history, first_player_history, second_player_depth)
        println("player2 choose $m2")
        push!(second_player_history, m2)
        println("in simulation $i first player chose $m1 second player chose $m2")
    end
    return first_player_history, second_player_history
 end

game (generic function with 1 method)

In [31]:
game()

depth 0: 1: 1, 2: 1, 3: 1
[0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
counter_policy: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
ended computation
depth 0: 1: 1, 2: 1, 3: 1
[0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
counter_policy: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333]
ended computation


LoadError: ArgumentError: reducing over an empty collection is not allowed