# This Notebook presents an expriment of Theory Of Mind on Rock Paper Scissors
### The goal is to research and devlop a method for generic multi-agent games using Theory of Mind modeling.
### We will introduce the tradeoff between choosing optimal move and staying unpredictable.

In [10]:
# Import libraries.
using Turing, StatsPlots, Random, Memoization

## The model of agent as universal Probalistic Model
### gets some parmeters:
<ul>
    <li> opponent agent - representation of our belief on the opponent model (Turing.jl model)
    <li> my history - list of all moves this player done (At this point unused)
    <li> opponent history - list of all moves the opponent done, used to estimate a prior on the moves
    <li> depth - the depth we want the agent will dive modeling the mind of opponent agent
</ul>        

In [8]:
function best_move(opp_move)
    best_moves = Dict(1 => 2, 2 => 3, 3 => 1)
    move = best_moves[opp_move]
#     println(move)
#     p_deceive = 0.001
#     print(p_deceive)
#     deceive ~ Bernoulli(p_deceive)
#     print(deceive)
#     if deceive
#         move = opp_move
#     end
    return move
end

best_move (generic function with 1 method)

In [9]:
@memoize memoized_sample(args...) = sample(args..., progress = false)

memoized_sample (generic function with 1 method)

In [4]:
function get_random_sample(samples)
    return Random.rand(samples)
end

get_random_sample (generic function with 1 method)

In [5]:
function most_common(samples)
    count = Dict(1 => 0, 2 => 0, 3 => 0)
    for i in 1:length(samples)
        count[samples[i]] += 1
    end
    max_k, max_v = -1 , -1
    for (k, v) in count
        if v > max_v
            max_k , max_v = k, v
        end
    end
    return max_k
end

most_common (generic function with 1 method)

In [11]:
@model function agent(opponent_agent, opponent_history, opp_prior_policy = [1,1,1], depth = 1, discrete_sampler = PG, discrete_sampler_hyper_param=3, num_of_iterations=10)
    if depth == 0
        beta_opp ~ Dirichlet(opp_prior_policy)
        for opp_move in opponent_history
            opp_move ~ Categorical(beta_opp) # observe
        end
        next_opp_move ~ Categorical(beta_opp) # sample
    else
        opponent_model = opponent_agent(agent, opponent_history, opp_prior_policy, depth-1)
        next_opp_move_chain  = memoized_sample(opponent_model, IS(), num_of_iterations); # if you use MH - you need to burn samples in the chain first
        result = generated_quantities(opponent_model, next_opp_move_chain)
        next_opp_move_from_chain = get_random_sample(result)
        next_opp_move ~ Dirac(next_opp_move_from_chain)
    end
    move = best_move(next_opp_move)
    return move
end

agent (generic function with 6 methods)

the infece is about the opponent model and not my model

### This method is used for the simulation, each model make a move at the end of this function, as a result of reasoning of the model

In [12]:
function move(agent, other_agent, other_agent_history, other_player_prior, my_depth=1)
    other_agent_history = length(other_agent_history) > 0 ? other_agent_history : [1]
    other_agent_history = Array{Int}(other_agent_history)
    m = agent(other_agent, other_agent_history, other_player_prior, my_depth)
    chain = memoized_sample(m, IS(), 10)
    moves = generated_quantities(m, chain)
    return most_common(moves)
end

move (generic function with 2 methods)

### Game simulation , given two agent models , depth params, let them play num of simulation times

In [8]:
function game(first_player_depth = 1, second_player_depth = 1, first_player_prior_policy = [1,1,1] , second_player_prior_policy = [1, 1, 1])
    first_player = agent
    second_player = agent
#     first_player_prior_policy = [1, 1, 1]
#     second_player_prior_policy = [1, 1, 1]
    num_of_simulations = 10
    first_player_history = []
    second_player_history = []
    for i in 1:num_of_simulations
        m1 = move(first_player, second_player, second_player_history, second_player_prior_policy, first_player_depth)
        push!(first_player_history, m1)
        m2 = move(second_player, first_player, first_player_history, first_player_prior_policy, second_player_depth)
        push!(second_player_history, m2)
    end
    return first_player_history, second_player_history
 end

game (generic function with 5 methods)

In [9]:
function score(history)
    first_player_history, second_player_history = history
    first_wins = 0
    ties = 0
    second_wins = 0
    wins = Dict(1 => 3, 2 => 1, 3 => 2)
    for i in 1:length(first_player_history)
        if wins[first_player_history[i]] == second_player_history[i]
            second_wins += 1
        elseif wins[second_player_history[i]] == first_player_history[i]
            first_wins += 1
        else
            ties += 1
        end
    end
    return first_wins, ties, second_wins
end

score (generic function with 1 method)

In [10]:
score([[1, 2], [3, 1]])

(0, 0, 2)

In [11]:
function display_score(score)
    num_of_wins_first, num_of_ties, num_of_wins_second = score
    println("first player won: $num_of_wins_first")
    println("second player won: $num_of_wins_second") 
    println("ties: $num_of_ties") 
end

display_score (generic function with 1 method)

both player plays without theory of mind - equivilent

In [12]:
display_score(score(game(0, 0)))

first player won: 33
second player won: 36
ties: 31


2nd player plays with one level of theory of mind -> 2nd wins

In [13]:
display_score(score(game(0, 1)))

first player won: 24
second player won: 42
ties: 34


In [14]:
display_score(score(game(0, 1)))

first player won: 38
second player won: 30
ties: 32


1st player plays with one level of theory of mind -> 1st wins

In [15]:
@time begin
    display_score(score(game(1, 0)))
end

first player won: 39
second player won: 32
ties: 29
543.965242 seconds (2.94 G allocations: 159.021 GiB, 5.44% gc time)


In [16]:
display_score(score(game(1, 0)))

first player won: 41
second player won: 26
ties: 33


both players play with one level of theory of mind -> preety much equivelent

In [17]:
@time begin
    display_score(score(game(1, 1)))
end

first player won: 27
second player won: 45
ties: 28
1146.733107 seconds (5.89 G allocations: 318.147 GiB, 5.25% gc time)


In [18]:
@time begin
    display_score(score(game(1, 1)))
end

first player won: 36
second player won: 34
ties: 30
1184.320170 seconds (5.89 G allocations: 318.147 GiB, 5.34% gc time)


In [None]:
@time begin
    display_score(score(game(0, 2)))
end

In [None]:
display_score(score(game(0, 2)))

In [None]:
@time begin
    display_score(score(game(2, 0)))
end

In [None]:
@time begin
    display_score(score(game(2, 0)))
end

In [None]:
@time begin
    display_score(score(game(1, 2)))
end

In [None]:
@time begin
    display_score(score(game(1, 2)))
end

In [None]:
@time begin
    display_score(score(game(2, 1)))
end

In [None]:
display_score(score(game(2, 1)))

In [None]:
@time begin
    display_score(score(game(2, 2)))
end

In [None]:
@time begin 
    display_score(score(game(0, 3)))
end

In [None]:
@time begin
    display_score(score(game(3, 0)))
end

In [None]:
display_score(score(game(0, 0, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(0, 0, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(0, 1, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(0, 1, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(1, 0, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(1, 0, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(1, 1, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(1, 1, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(0, 0,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(0, 0,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(0, 1,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(0, 1,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(1, 0,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(1, 0,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(1, 1,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(1, 1,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(0, 2,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(0, 2,[20,1,1], [1,1,1])))

In [None]:
display_score(score(game(2, 0,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(2, 0, [20,1,1],[1,1,1])))

In [None]:
display_score(score(game(1, 2, [20,1,1] ,[1,1,1])))

In [None]:
display_score(score(game(1, 2,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(2, 1, [20,1,1] ,[1,1,1])))

In [None]:
display_score(score(game(2, 1 ,[1,1,1], [20,1,1])))

In [None]:
display_score(score(game(2, 2, [20,1,1] ,[1,1,1])))

In [None]:
display_score(score(game(2, 2, [1,1,1], [20,1,1])))