# This Notebook presents an expriment of Theory Of Mind on Rock Paper Scissors
### The goal is to research and devlop a method for generic multi-agent games using Theory of Mind modeling.
### We will introduce the tradeoff between choosing optimal move and staying unpredictable.

In [10]:
# Import libraries.
using Turing, StatsPlots, Random, Memoization

## The model of agent as universal Probalistic Model
### gets some parmeters:
<ul>
    <li> opponent agent - representation of our belief on the opponent model (Turing.jl model)
    <li> my history - list of all moves this player done (At this point unused)
    <li> opponent history - list of all moves the opponent done, used to estimate a prior on the moves
    <li> depth - the depth we want the agent will dive modeling the mind of opponent agent
</ul>        

In [8]:
function best_move(opp_move)
    best_moves = Dict(1 => 2, 2 => 3, 3 => 1)
    move = best_moves[opp_move]
#     println(move)
#     p_deceive = 0.001
#     print(p_deceive)
#     deceive ~ Bernoulli(p_deceive)
#     print(deceive)
#     if deceive
#         move = opp_move
#     end
    return move
end

best_move (generic function with 1 method)

In [9]:
@memoize memoized_sample(args...) = sample(args..., progress = false)

memoized_sample (generic function with 1 method)

In [4]:
function get_random_sample(samples)
    return Random.rand(samples)
end

get_random_sample (generic function with 1 method)

In [5]:
function most_common(samples)
    count = Dict(1 => 0, 2 => 0, 3 => 0)
    for i in 1:length(samples)
        count[samples[i]] += 1
    end
    max_k, max_v = -1 , -1
    for (k, v) in count
        if v > max_v
            max_k , max_v = k, v
        end
    end
    return max_k
end

most_common (generic function with 1 method)

In [11]:
@model function agent(opponent_agent, opponent_history, opp_prior_policy = [1,1,1], depth = 1, discrete_sampler = PG, discrete_sampler_hyper_param=3, num_of_iterations=100)
    if depth == 0
        beta_opp ~ Dirichlet(opp_prior_policy)
        for opp_move in opponent_history
            opp_move ~ Categorical(beta_opp) # observe
        end
        next_opp_move ~ Categorical(beta_opp) # sample
    else
        opponent_model = opponent_agent(agent, opponent_history, opp_prior_policy, depth-1)
        next_opp_move_chain  = memoized_sample(opponent_model, IS(), num_of_iterations); # if you use MH - you need to burn samples in the chain first
        result = generated_quantities(opponent_model, next_opp_move_chain)
        next_opp_move_from_chain = get_random_sample(result)
        next_opp_move ~ Dirac(next_opp_move_from_chain)
    end
    move = best_move(next_opp_move)
    return move
end

agent (generic function with 6 methods)

the infece is about the opponent model and not my model

### This method is used for the simulation, each model make a move at the end of this function, as a result of reasoning of the model

In [12]:
function move(agent, other_agent, other_agent_history, other_player_prior, my_depth=1)
    other_agent_history = length(other_agent_history) > 0 ? other_agent_history : [1]
    other_agent_history = Array{Int}(other_agent_history)
    m = agent(other_agent, other_agent_history, other_player_prior, my_depth)
    chain = memoized_sample(m, IS(), 10)
    moves = generated_quantities(m, chain)
    return most_common(moves)
end

move (generic function with 2 methods)

### Game simulation , given two agent models , depth params, let them play num of simulation times

In [13]:
function game(first_player_depth = 1, second_player_depth = 1, first_player_prior_policy = [1,1,1] , second_player_prior_policy = [1, 1, 1])
    first_player = agent
    second_player = agent
#     first_player_prior_policy = [1, 1, 1]
#     second_player_prior_policy = [1, 1, 1]
    num_of_simulations = 10
    first_player_history = []
    second_player_history = []
    for i in 1:num_of_simulations
        m1 = move(first_player, second_player, second_player_history, second_player_prior_policy, first_player_depth)
        push!(first_player_history, m1)
        m2 = move(second_player, first_player, first_player_history, first_player_prior_policy, second_player_depth)
        push!(second_player_history, m2)
    end
    return first_player_history, second_player_history
 end

game (generic function with 5 methods)

In [14]:
function score(history)
    first_player_history, second_player_history = history
    first_wins = 0
    ties = 0
    second_wins = 0
    wins = Dict(1 => 3, 2 => 1, 3 => 2)
    for i in 1:length(first_player_history)
        if wins[first_player_history[i]] == second_player_history[i]
            second_wins += 1
        elseif wins[second_player_history[i]] == first_player_history[i]
            first_wins += 1
        else
            ties += 1
        end
    end
    return first_wins, ties, second_wins
end

score (generic function with 1 method)

In [15]:
score([[1, 2], [3, 1]])

(0, 0, 2)

In [16]:
function display_score(score)
    num_of_wins_first, num_of_ties, num_of_wins_second = score
    println("first player won: $num_of_wins_first")
    println("second player won: $num_of_wins_second") 
    println("ties: $num_of_ties") 
end

display_score (generic function with 1 method)

both player plays without theory of mind - equivilent

In [17]:
display_score(score(game(0, 0)))

first player won: 6
second player won: 0
ties: 4


2nd player plays with one level of theory of mind -> 2nd wins

In [18]:
display_score(score(game(0, 1)))

first player won: 1
second player won: 7
ties: 2


In [19]:
display_score(score(game(0, 1)))

first player won: 1
second player won: 5
ties: 4


1st player plays with one level of theory of mind -> 1st wins

In [20]:
@time begin
    display_score(score(game(1, 0)))
end

first player won: 4
second player won: 3
ties: 3
  0.506312 seconds (1.68 M allocations: 100.754 MiB, 6.40% gc time)


In [21]:
display_score(score(game(1, 0)))

first player won: 6
second player won: 1
ties: 3


both players play with one level of theory of mind -> preety much equivelent

In [22]:
@time begin
    display_score(score(game(1, 1)))
end

first player won: 2
second player won: 5
ties: 3
  0.916748 seconds (3.25 M allocations: 193.958 MiB, 6.95% gc time)


In [23]:
@time begin
    display_score(score(game(1, 1)))
end

first player won: 2
second player won: 3
ties: 5
  0.822252 seconds (3.25 M allocations: 193.957 MiB, 6.66% gc time)


In [24]:
@time begin
    display_score(score(game(0, 2)))
end

first player won: 3
second player won: 5
ties: 2
  8.216833 seconds (34.57 M allocations: 2.013 GiB, 6.78% gc time)


In [25]:
display_score(score(game(0, 2)))

first player won: 3
second player won: 3
ties: 4


In [26]:
@time begin
    display_score(score(game(2, 0)))
end

first player won: 1
second player won: 4
ties: 5
  8.563095 seconds (33.84 M allocations: 1.977 GiB, 6.86% gc time)


In [27]:
@time begin
    display_score(score(game(2, 0)))
end

first player won: 4
second player won: 1
ties: 5
  8.210171 seconds (33.84 M allocations: 1.976 GiB, 6.80% gc time)


In [28]:
@time begin
    display_score(score(game(1, 2)))
end

first player won: 0
second player won: 9
ties: 1
  9.099315 seconds (36.10 M allocations: 2.102 GiB, 6.62% gc time)


In [29]:
@time begin
    display_score(score(game(1, 2)))
end

first player won: 5
second player won: 5
ties: 0
  8.801835 seconds (36.10 M allocations: 2.102 GiB, 6.62% gc time)


In [30]:
@time begin
    display_score(score(game(2, 1)))
end

first player won: 4
second player won: 1
ties: 5
  8.733024 seconds (35.41 M allocations: 2.068 GiB, 6.82% gc time)


In [31]:
display_score(score(game(2, 1)))

first player won: 2
second player won: 4
ties: 4


In [32]:
@time begin
    display_score(score(game(2, 2)))
end

first player won: 3
second player won: 4
ties: 3
 17.299565 seconds (68.26 M allocations: 3.980 GiB, 6.71% gc time)


In [33]:
@time begin 
    display_score(score(game(0, 3)))
end

first player won: 4
second player won: 3
ties: 3
170.879199 seconds (724.47 M allocations: 42.183 GiB, 6.91% gc time)


In [34]:
@time begin
    display_score(score(game(3, 0)))
end

first player won: 4
second player won: 3
ties: 3
169.748290 seconds (709.14 M allocations: 41.417 GiB, 6.57% gc time)


In [35]:
display_score(score(game(0, 0, [20,1,1],[1,1,1])))

first player won: 0
second player won: 7
ties: 3


In [36]:
display_score(score(game(0, 0, [20,1,1],[1,1,1])))

first player won: 5
second player won: 3
ties: 2


In [37]:
display_score(score(game(0, 1, [20,1,1],[1,1,1])))

first player won: 2
second player won: 5
ties: 3


In [38]:
display_score(score(game(0, 1, [20,1,1],[1,1,1])))

first player won: 4
second player won: 4
ties: 2


In [39]:
display_score(score(game(1, 0, [20,1,1],[1,1,1])))

first player won: 5
second player won: 2
ties: 3


In [40]:
display_score(score(game(1, 0, [20,1,1],[1,1,1])))

first player won: 0
second player won: 3
ties: 7


In [41]:
display_score(score(game(1, 1, [20,1,1],[1,1,1])))

first player won: 5
second player won: 2
ties: 3


In [42]:
display_score(score(game(1, 1, [20,1,1],[1,1,1])))

first player won: 5
second player won: 3
ties: 2


In [43]:
display_score(score(game(0, 0,[1,1,1], [20,1,1])))

first player won: 3
second player won: 4
ties: 3


In [44]:
display_score(score(game(0, 0,[1,1,1], [20,1,1])))

first player won: 4
second player won: 1
ties: 5


In [45]:
display_score(score(game(0, 1,[1,1,1], [20,1,1])))

first player won: 4
second player won: 4
ties: 2


In [46]:
display_score(score(game(0, 1,[1,1,1], [20,1,1])))

first player won: 1
second player won: 3
ties: 6


In [47]:
display_score(score(game(1, 0,[1,1,1], [20,1,1])))

first player won: 3
second player won: 5
ties: 2


In [48]:
display_score(score(game(1, 0,[1,1,1], [20,1,1])))

first player won: 1
second player won: 2
ties: 7


In [49]:
display_score(score(game(1, 1,[1,1,1], [20,1,1])))

first player won: 0
second player won: 6
ties: 4


In [50]:
display_score(score(game(1, 1,[1,1,1], [20,1,1])))

first player won: 5
second player won: 4
ties: 1


In [51]:
display_score(score(game(0, 2,[1,1,1], [20,1,1])))

first player won: 1
second player won: 3
ties: 6


In [52]:
display_score(score(game(0, 2,[20,1,1], [1,1,1])))

first player won: 4
second player won: 4
ties: 2


In [53]:
display_score(score(game(2, 0,[1,1,1], [20,1,1])))

first player won: 7
second player won: 2
ties: 1


In [54]:
display_score(score(game(2, 0, [20,1,1],[1,1,1])))

first player won: 5
second player won: 3
ties: 2


In [55]:
display_score(score(game(1, 2, [20,1,1] ,[1,1,1])))

first player won: 1
second player won: 7
ties: 2


In [56]:
display_score(score(game(1, 2,[1,1,1], [20,1,1])))

first player won: 2
second player won: 2
ties: 6


In [57]:
display_score(score(game(2, 1, [20,1,1] ,[1,1,1])))

first player won: 3
second player won: 5
ties: 2


In [58]:
display_score(score(game(2, 1 ,[1,1,1], [20,1,1])))

first player won: 5
second player won: 1
ties: 4


In [59]:
display_score(score(game(2, 2, [20,1,1] ,[1,1,1])))

first player won: 0
second player won: 6
ties: 4


In [60]:
display_score(score(game(2, 2, [1,1,1], [20,1,1])))

first player won: 7
second player won: 3
ties: 0


In [61]:
@time begin
    display_score(score(game(4, 0)))
end

first player won: 4
second player won: 2
ties: 4
3968.815856 seconds (14.89 G allocations: 869.774 GiB, 25.62% gc time)


In [None]:
@time begin
    display_score(score(game(0, 4)))
end

In [None]:
@time begin
    display_score(score(game(5, 0)))
end

In [None]:
@time begin
    display_score(score(game(0, 5)))
end

In [None]:
@time begin
    display_score(score(game(5, 4)))
end

In [None]:
@time begin
    display_score(score(game(6, 0)))
end