# This Notebook presents an expriment of Theory Of Mind on Rock Paper Scissors
### The goal is to research and devlop a method for generic multi-agent games using Theory of Mind modeling.
### We will introduce the tradeoff between choosing optimal move and staying unpredictable.

In [271]:
# Import libraries.
using Turing, StatsPlots, Random

### model
1) prior on opponents <br>
2) history <br>
3) observation - history + prior => postrior <br> 
4) counter policy - 1) beating the next round 2) confuse the opponent (noise) <br>
5) depth > 0 -> no history <br>
6) sample action from the counter policy<br>
7) optimal noise parameter <br>
8) implment Bob and Alice <br>
9) conditioning as rejection sampling

### computer_counter_policy
Given infered information about the opponent.<br>
Decide how to exploit it.<br>
In the future we will optimize those parameters, to make the agent's move less predictible <br>

In [272]:
function compute_counter_policy(opp_next_move, noise_factor = 0.1)
    counter_move_dict = Dict(1 => 2, 2 => 3, 3 => 1)
    counter_move = counter_move_dict[opp_next_move]
    policy = ones(3) * (noise_factor / 2)
    policy[counter_move] = 1 - noise_factor
    return policy
end

compute_counter_policy (generic function with 3 methods)

different counter policy computing.
not optimal! will be changed

In [273]:
function compute_counter_policy(opp_alpha_1, opp_alpha_2, opp_alpha_3)
    normalize_factor = opp_alpha_1 + opp_alpha_2 + opp_alpha_3
    return [opp_alpha_3 / normalize_factor, opp_alpha_1 / normalize_factor, opp_alpha_2 / normalize_factor]
end

compute_counter_policy (generic function with 3 methods)

given history we want to know the distributin of the moves

In [274]:
function compute_distribution_from_history(actions_history)
    count = Dict(1 => 1, 2 => 1, 3 => 1)
    for i in 1:(length(actions_history)-1)
        opponent_action = actions_history[i]
        count[opponent_action] = count[opponent_action] + 1
    end
    return [v for (k,v) in count]
end

compute_distribution_from_history (generic function with 1 method)

## The core of the notebook
### Modeling Theory of Mind in Rock Paper Sciors

## The model of agent as universal Probalistic Model
### gets some parmeters:
<ul>
    <li> opponent agent - representation of our belief on the opponent model (Turing.jl model)
    <li> my history - list of all moves this player done (At this point unused)
    <li> opponent history - list of all moves the opponent done, used to estimate a prior on the moves
    <li> depth - the depth we want the agent will dive modeling the mind of opponent agent
</ul>        

In [305]:
@model function agent(opponent_agent, my_history, opponent_history, depth = 0, discrete_sampler = PG, discrete_sampler_hyper_param=5, num_of_iterations=5)
        if depth == 0
            opponent_history_distribution = compute_distribution_from_history(opponent_history)
            opp_alpha_1, opp_alpha_2, opp_alpha_3 = opponent_history_distribution
            counter_policy_prior_params = TArray(compute_counter_policy(opp_alpha_1, opp_alpha_2, opp_alpha_3))
        else
            opp_action_chain = sample(opponent_agent(agent, opponent_history, my_history, depth-1), discrete_sampler(discrete_sampler_hyper_param), num_of_iterations, progress=true);
            opp_next_move = round(Int64, mean(opp_action_chain[:"next_move"]))
            counter_policy_prior_params = TArray(compute_counter_policy(opp_next_move))
        end
        counter_policy ~ Dirichlet(counter_policy_prior_params)
        next_move ~ Categorical(counter_policy)
        return round(Int64, next_move)
end

agent (generic function with 5 methods)

In [356]:
chain = sample(agent(agent, ones(10) , 2* ones(10)), PG(1), 1)
chain[:"next_move"]

2-dimensional AxisArray{Float64,2,...} with axes:
    :iter, 1:1:1
    :chain, 1:1
And data, a 1×1 Array{Float64,2}:
 2.0

### simple aux function that return the most common sample - majority vote

In [298]:
function most_common(samples)
    count = Dict(1 => 0, 2 => 0, 3 => 0)
    for i in 1:length(samples)
        count[samples[i]] += 1
    end
    max_k, max_v = -1 , -1
    for (k, v) in count
        if v > max_v
            max_k , max_v = k, v
        end
    end
    return max_k
end




most_common (generic function with 1 method)

[35mSampling:  30%|█████████████                            |  ETA: 0:00:01[39m

### This method is used for the simulation, each model make a move at the end of this function, as a result of reasoning of the model

In [306]:
function move(agent, other_agent, my_history, other_agent_history, my_depth=1)
    other_agent_history = length(other_agent_history) > 0 ? other_agent_history : [1]
    my_history = length(my_history) > 0 ? my_history : [1]
    my_history = Array{Int}(my_history)
    other_agent_history = Array{Int}(other_agent_history)
    chain = sample(agent(other_agent, my_history, other_agent_history, my_depth), PG(5), 5, progress = true)
    return most_common(chain[:"next_move"])
end

move (generic function with 6 methods)

In [331]:
move(agent, agent, [1, 2], [2 , 3])
move(agent, agent, [1, 2, 3], [2 , 3, 2])
move(agent, agent, [1, 2, 2, 1], [2 , 3, 2, 3])

[1, 2]
[2, 3]
[1, 2, 3]
[2, 3, 2]
[1, 2, 2, 1]
[2, 3, 2, 3]


3

### Game simulation , given two agent models , depth params, let them play num of simulation times

In [343]:
function game(first_player_depth = 1, second_player_depth = 1)
    first_player = agent
    second_player = agent
    num_of_simulations = 100
    first_player_history = []
    second_player_history = []
    for i in 1:num_of_simulations
        m1 = move(first_player, second_player, first_player_history, second_player_history, first_player_depth)
#         println("player1 choose $m1")
        push!(first_player_history, m1)
        m2 = move(second_player, first_player, second_player_history, first_player_history, second_player_depth)
#         println("player2 choose $m2")
        push!(second_player_history, m2)
#         println("in simulation $i first player chose $m1 second player chose $m2")
    end
    return first_player_history, second_player_history
 end

game (generic function with 3 methods)

In [344]:
function score(history)
    first_player_history, second_player_history = history
    first_wins = 0
    ties = 0
    second_wins = 0
    wins = Dict(1 => 3, 2 => 1, 3 => 2)
    for i in 1:length(first_player_history)
        if wins[first_player_history[i]] == second_player_history[i]
            first_wins += 1
        elseif wins[second_player_history[i]] == first_player_history[i]
            second_wins += 1
        else
            ties += 1
        end
    end
    return first_wins, ties, second_wins
end

score (generic function with 2 methods)

In [345]:
function display_score(score)
    num_of_wins_first, num_of_ties, num_of_wins_second = score
    println("first player won: $num_of_wins_first")
    println("second player won: $num_of_wins_second") 
    println("ties: $num_of_ties") 
end

display_score (generic function with 2 methods)

both player plays without theory of mind - equivilent

In [350]:
display_score(score(game(0, 0)))

first player won: 1
second player won: 1
ties: 98


2nd player plays with one level of theory of mind -> 2nd wins

In [351]:
display_score(score(game(0, 1)))

[32mSampling: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


first player won: 15
second player won: 28
ties: 57


1st player plays with one level of theory of mind -> 1st wins

In [352]:
display_score(score(game(1, 0)))

[32mSampling: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


first player won: 27
second player won: 17
ties: 56


both players play with one level of theory of mind -> preety much equivelent

In [353]:
display_score(score(game(1, 1)))

[32mSampling: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


first player won: 23
second player won: 20
ties: 57
