In [1]:
# Import libraries.
using Turing, StatsPlots, Random

## Questions:
1) now it looks like mixture model <br>
2) how to use the history wisely <br>
3) how to use the deepening of computation <br>

In [50]:
# Declare our Turing model.
@model function agent(opponent_agent, my_action, opponent_action, opponent_agents_depth=0, depth=0, discrete_sampler = PG, discrete_sampler_hyper_param=10, num_of_iterations=10)
    if depth > 0 
        opp_action_chain = sample(opponent_agent(agent, opponent_action, my_action, depth-1, opponent_agents_depth), discrete_sampler(discrete_sampler_hyper_param), num_of_iterations, progress=false);
        opp_alpha_1 = mean(opp_action_chain[:"alpha[1]"])
        opp_alpha_2 = mean(opp_action_chain[:"alpha[2]"])
        opp_alpha_3 = 1 - opp_alpha_1 - opp_alpha_2
        alpha ~ Dirichlet([opp_alpha_3, opp_alpha_1 , opp_alpha_2])
    else
        # Our prior belief about the probability of RPS.
        alpha ~ Dirichlet(ones(3)/3)
    end
    my_action ~ Categorical(vec(alpha))
    return my_action
end

agent (generic function with 6 methods)

In [138]:
print(typeof(ones(3)/3))
Dirichlet(ones(3)/3)

Array{Float64,1}

Dirichlet{Float64}(alpha=[0.3333333333333333, 0.3333333333333333, 0.3333333333333333])

In [175]:
# Declare our Turing model.
@model function agent(opponent_agent, my_history, opponent_history, opponent_agents_depth=0, depth=0, discrete_sampler = PG, discrete_sampler_hyper_param=1, num_of_iterations=1)
    # Our prior belief about the probability of RPS.
    alpha ~ Dirichlet(ones(3)/3)
    for i in 1:length(my_history)
        my_action = my_history[i]
        opponent_action = opponent_history[i]
        if depth > 0 
            opp_action_chain = sample(opponent_agent(agent, opponent_history, my_history, depth-1, opponent_agents_depth), discrete_sampler(discrete_sampler_hyper_param), num_of_iterations, progress=true);
            opp_alpha_1 = mean(opp_action_chain[:"alpha[1]"])
            opp_alpha_2 = mean(opp_action_chain[:"alpha[2]"])
            opp_alpha_3 = 1 - opp_alpha_1 - opp_alpha_2
            counter_opponent_policy = [opp_alpha_3, opp_alpha_1 , opp_alpha_2]
            counter_opponent_policy ~ Dirichlet(alpha)
        end
        my_action ~ Categorical(alpha)
    end
end

agent (generic function with 6 methods)

In [176]:
my_action = 1
opponent_action = 0
opponent_depth = 0
my_depth = 1

1

In [177]:
chain = sample(agent(agent, [my_action], [opponent_action], opponent_depth, my_depth), PG(1), 1 , progress = true)
chain

[0.97282613276506, 0.027028725942920457, 0.00014514129201946064]
[0.01595130312069748, 0.24812294418883182, 0.7359257526904707]


Chains MCMC chain (1×9×1 Array{Float64,3}):

Log evidence      = 0.0
Iterations        = 1:1
Thinning interval = 1
Chains            = 1
Samples per chain = 1
parameters        = alpha[1], alpha[2], alpha[3], counter_opponent_policy[1], counter_opponent_policy[2], counter_opponent_policy[3], my_action
internals         = logevidence, lp

Summary Statistics
 [1m                 parameters [0m [1m    mean [0m [1m     std [0m [1m naive_se [0m [1m    mcse [0m [1m     es[0m ⋯
 [90m                     Symbol [0m [90m Float64 [0m [90m Float64 [0m [90m  Float64 [0m [90m Missing [0m [90m Missin[0m ⋯

                    alpha[1]    0.0020       NaN        NaN   missing   missin ⋯
                    alpha[2]    0.0072       NaN        NaN   missing   missin ⋯
                    alpha[3]    0.9907       NaN        NaN   missing   missin ⋯
  counter_opponent_policy[1]    0.0000       NaN        NaN   missing   missin ⋯
  counter_opponent_policy[2]    0.0000       NaN    

In [178]:
function maximum_likelihood_action(list_of_pairs)
    max_key = -1
    max_value = -1
    for element in list_of_pairs
        key = element[1]
        value = element[2]
        if value > max_value
            max_value = value
            max_key = key
        end
    end
    return (max_key, max_value)
end

maximum_likelihood_action (generic function with 1 method)

In [179]:
function move(agent, other_agent, my_history, other_agent_history)
    other_agent_history = length(other_agent_history) > 0 ? other_agent_history : [1]
    my_history = length(my_history) > 0 ? my_history : [1]
    chain = sample(agent(other_agent, my_history, other_agent_history, opponent_depth, my_depth), PG(1), 1, progress = false)
    alpha_1 = (1, mean(chain[:"alpha[1]"]))
    alpha_2 = (2, mean(chain[:"alpha[2]"]))
    alpha_3 = (3, 1 - alpha_1[2] - alpha_2[2])
    return maximum_likelihood_action([alpha_1, alpha_2, alpha_3])[1]
end

move (generic function with 3 methods)

In [180]:
move(agent, agent, [])

opponent_action=1 my_action=1[0.03259416503137236, 0.0015435567346529198, 0.9658622782339747]
[0.0003339011871190678, 0.038475085677245106, 0.9611910131356358]
[0.9495548929438405, 0.03951151722810654, 0.010933589828052896]
[0.48682616180124977, 0.3400327286673455, 0.17314110953140477]
[0.9876466143571176, 0.012347239511008497, 6.146131873904474e-6]
[0.8734759919582086, 0.12545934029380482, 0.0010646677479866585]
[0.03384295952750915, 0.18364298691312295, 0.7825140535593679]
[0.007700460852531843, 0.01208703910481812, 0.98021250004265]
[0.021976384873515928, 0.9545229269925619, 0.023500688133922157]
[0.14437211888930931, 0.8531669019530347, 0.0024609791576559966]
[0.006338731549530352, 0.8566043444433438, 0.13705692400712588

Chains MCMC chain (5×9×1 Array{Float64,3}):

Log evidence      = 0.0
Iterations        = 1:5
Thinning interval = 1
Chains            = 1
Samples per chain = 5
parameters        = alpha[1], alpha[2], alpha[3], counter_opponent_policy[1], counter_opponent_policy[2], counter_opponent_policy[3], my_action
internals         = logevidence, lp

Summary Statistics
 [1m                 parameters [0m [1m    mean [0m [1m     std [0m [1m naive_se [0m [1m    mcse [0m [1m       [0m ⋯
 [90m                     Symbol [0m [90m Float64 [0m [90m Float64 [0m [90m  Float64 [0m [90m Missing [0m [90m    Flo[0m ⋯

                    alpha[1]    0.2871    0.3824     0.1710   missing      -1. ⋯
                    alpha[2]    0.2832    0.3920     0.1753   missing     505. ⋯
                    alpha[3]    0.4296    0.5132     0.2295   missing      -9. ⋯
  counter_opponent_policy[1]    0.1692    0.2747     0.1228   missing     -29. ⋯
  counter_opponent_policy[2]    0.5608    0.5142    

]
[0.8732690935041916, 0.0027448111061910115, 0.12398609538961748]
[0.8213027437537287, 0.004975916173594137, 0.17372134007267712]
[0.6045235315870854, 0.04859137014467549, 0.34688509826823904]
[0.0149973895041553, 0.4452494910117894, 0.5397531194840552]
[0.0937109423142517, 0.00072040912128857, 0.9055686485644597]
[0.09693568716892655, 0.8172949331755782, 0.08576937965549523]
[0.3041915152116673, 0.03718163072519757, 0.6586268540631351]
[0.4919452714916164, 0.04545036875870278, 0.46260435974968084]
[0.035560407574087335, 0.9349752358333671, 0.02946435659254561]
[0.49031258443578357, 0.5085345755366688, 0.0011528400275476142]
[0.6074917324210467, 0.03021397308297532, 0.3622942944959779]
[0.4399650721947491, 0.559936627811487, 9.829999376386258e-5]
[0.5748882373415314, 0.11864352348188566, 0.30646823917658295]
[0.17104688951575658, 0.773860858606539, 0.055092251877704446]
[0.12740454915026878, 0.07308407301409053, 0.7995113778356407]
[0.03192626704526813, 0.27781540386684683, 0.69025832

3

In [181]:
function game()
    first_player = agent
    second_player = agent
    num_of_simulations = 10
    first_player_history = []
    second_player_history = []
    for i in 1:num_of_simulations
        m1 = move(first_player, second_player, first_player_history, second_player_history)
        push!(first_player_history, m1)
        m2 = move(second_player, first_player, second_player_history, first_player_history)
        push!(second_player_history, m2)
        println("in simulation $i first player chose $m1 second player chose $m2")
    end
    return first_player_history, second_player_history
end

game (generic function with 1 method)

In [None]:
game()

[0.0690367856699109, 0.16010280160831017, 0.770860412721779]
[0.053281834614139045, 0.4671278541305357, 0.47959031125532525]
[0.12137428024809593, 0.24251633201242087, 0.6361093877394832]
