## Rock-Paper-Scissors

Simple Game

Import library

In [None]:
import Pkg
if !haskey(Pkg.dependencies(), "Ipopt")
    Pkg.add("Ipopt")
end
if !haskey(Pkg.dependencies(), "JuMP")
    Pkg.add("JuMP")
end

using JuMP, Ipopt

Struct Simple Game

(Source: Algorithms for decision making)

In [2]:
struct SimpleGame
    γ  # discount factor
    ℐ  # agents
    𝒜  # joint action space
    R  # joint reward function
end

Struct Simple Game Policy and utility calculate function

(Source: Algorithms for decision making, modified)

In [3]:
struct SimpleGamePolicy
    p # dictionary mapping actions to probabilities

    function SimpleGamePolicy(p::Base.Generator)
        return SimpleGamePolicy(Dict(p))
    end

    function SimpleGamePolicy(p::Dict)
        vs = collect(values(p))
        vs ./= sum(vs)
        return new(Dict(k => v for (k,v) in zip(keys(p), vs)))
    end

    SimpleGamePolicy(ai) = new(Dict(ai => 1.0))
end

# get probability that policy πi choose action ai
(πi::SimpleGamePolicy)(ai) = get(πi.p, ai, 0.0)

# get a random action according to policy πi
function (πi::SimpleGamePolicy)()
    actions = vec(collect(keys(πi.p)))
    probs = vec(collect(values(πi.p)))
    return actions[findfirst(cumsum(probs) .> rand())]
end

# construct joint space
joint(X) = vec(collect(Iterators.product(X...)))

# function to calculate utility of agent i when playing simple game 𝒫 with joint policy π
function utility(𝒫::SimpleGame, π, i)
    𝒜, R = 𝒫.𝒜, 𝒫.R
    p(a) = prod(πj(aj) for (πj, aj) in zip(π, a))
    return sum(R[a][i]*p(a) for a in joint(𝒜))
end

utility (generic function with 1 method)

### Rock-Paper-Scissors game

Constants

In [4]:
const N_AGENTS = 2
const ACTIONS = [:rock, :paper, :scissors]

3-element Vector{Symbol}:
 :rock
 :paper
 :scissors

struct RockPaperScissors

In [5]:
struct RockPaperScissors
    simpleGame::SimpleGame

    function RockPaperScissors(
        γ::Real,                        # discount factor
        rewards::Dict{Symbol, Real},    # reward if winning by select an action, opponent's losing reward is negated
        )

        # construct joint rewards
        joint_rewards = construct_joint_rewards(rewards)

        simpleGame = SimpleGame(γ, vec(collect(1:N_AGENTS)), [ACTIONS for _ in 1:N_AGENTS], joint_rewards)

        return new(simpleGame)
    end

    function construct_joint_rewards(rewards::Dict{Symbol, Real})
        joint_rewards = Dict{Tuple{Symbol, Symbol}, Tuple{Real, Real}}()

        # tie
        joint_rewards[(:rock, :rock)] = joint_rewards[(:paper, :paper)] = joint_rewards[(:scissors, :scissors)] = (0, 0)

        # rock beats scisssors
        joint_rewards[(:rock, :scissors)] = (rewards[:rock], -rewards[:rock])
        joint_rewards[(:scissors, :rock)] = (-rewards[:rock], rewards[:rock])

        # scisssors beats paper
        joint_rewards[(:scissors, :paper)] = (rewards[:scissors], -rewards[:scissors])
        joint_rewards[(:paper, :scissors)] = (-rewards[:scissors], rewards[:scissors])

        # paper beats rock
        joint_rewards[(:paper, :rock)] = (rewards[:paper], -rewards[:paper])
        joint_rewards[(:rock, :paper)] = (-rewards[:paper], rewards[:paper])

        return joint_rewards
    end
end

### Nash Equilibrium

(Source: Algorithms for decision making)

In [6]:
struct NashEquilibrium end

function tensorform(𝒫::SimpleGame)
    ℐ, 𝒜, R = 𝒫.ℐ, 𝒫.𝒜, 𝒫.R
    ℐ′ = eachindex(ℐ)
    𝒜′ = [eachindex(𝒜[i]) for i in ℐ]
    R′ = [R[a] for a in joint(𝒜)]
    return ℐ′, 𝒜′, R′
end

function solve(M::NashEquilibrium, 𝒫::SimpleGame)
    ℐ, 𝒜, R = tensorform(𝒫)
    model = Model(Ipopt.Optimizer)
    @variable(model, U[ℐ])
    @variable(model, π[i=ℐ, 𝒜[i]] ≥ 0)
    @NLobjective(model, Min,
        sum(U[i] - sum(prod(π[j,a[j]] for j in ℐ) * R[y][i]
            for (y,a) in enumerate(joint(𝒜))) for i in ℐ))
    @NLconstraint(model, [i=ℐ, ai=𝒜[i]],
        U[i] ≥ sum(
            prod(j==i ? (a[j]==ai ? 1.0 : 0.0) : π[j,a[j]] for j in ℐ)
            * R[y][i] for (y,a) in enumerate(joint(𝒜))))
    @constraint(model, [i=ℐ], sum(π[i,ai] for ai in 𝒜[i]) == 1)
    optimize!(model)
    πi′(i) = SimpleGamePolicy(𝒫.𝒜[i][ai] => value(π[i,ai]) for ai in 𝒜[i])
    return [πi′(i) for i in ℐ]
end


solve (generic function with 1 method)

Problem configuration

In [7]:
rewards = Dict{Symbol, Real}(
    :rock => 1.0, 
    :paper => 1.0,
    :scissors => 1.0,
    )

rps = RockPaperScissors(0.9, rewards)

RockPaperScissors(SimpleGame(0.9, [1, 2], [[:rock, :paper, :scissors], [:rock, :paper, :scissors]], Dict{Tuple{Symbol, Symbol}, Tuple{Real, Real}}((:scissors, :rock) => (-1.0, 1.0), (:rock, :rock) => (0, 0), (:paper, :rock) => (1.0, -1.0), (:scissors, :paper) => (1.0, -1.0), (:rock, :paper) => (-1.0, 1.0), (:paper, :paper) => (0, 0), (:scissors, :scissors) => (0, 0), (:paper, :scissors) => (-1.0, 1.0), (:rock, :scissors) => (1.0, -1.0))))

Find a Nash equilibrium for Rock-Paper-Scissors

In [8]:
π = solve(NashEquilibrium(), rps.simpleGame)


******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

This is Ipopt version 3.14.4, running with linear solver MUMPS 5.4.1.

Number of nonzeros in equality constraint Jacobian...:        6
Number of nonzeros in inequality constraint Jacobian.:       24
Number of nonzeros in Lagrangian Hessian.............:       15

Total number of variables............................:        8
                     variables with only lower bounds:        6
                variables with lower and upper bounds:        0
                     variables with only upper bounds:        0
Total number of equality constraints.................:        2
Total number of inequality co

2-element Vector{SimpleGamePolicy}:
 SimpleGamePolicy(Dict(:scissors => 0.33333333333333337, :rock => 0.33333333333333337, :paper => 0.33333333333333337))
 SimpleGamePolicy(Dict(:scissors => 0.33333333333333337, :rock => 0.33333333333333337, :paper => 0.33333333333333337))

## Simulation

Simulate 1 match of simple game

In [9]:
# simulation simple game: play until reward < threshold
δ = 1e-16 # threshold

function simulation(𝒫::SimpleGame, π::Vector{SimpleGamePolicy})
    γ, ℐ, R = 𝒫.γ, 𝒫.ℐ, 𝒫.R
    rate = 1.0 # reward rate decrease after each turn
    total = [0 for _ in ℐ] # total rewards
    # start playing
    while true
        a = [πi() for πi in π] # random joint action
        rw = [r*rate for r in R[a...]]
        total = total .+ rw
        rate *= γ

        # stop if all reward < δ
        if all(r -> r < δ, rw)
            break
        end
    end
    return total
end

simulation (generic function with 1 method)

Monte Carlo Simulation

In [10]:
function MonteCarloSimulation(𝒫::SimpleGame, π::Vector{SimpleGamePolicy}, num_iter::Int)
    ℐ = 𝒫.ℐ
    total = [0 for _ in ℐ] # total rewards
    # play num_iter matchs
    for iter = 1 : num_iter
        iter_rw = simulation(𝒫, π)
        total = total .+ iter_rw
    end

    println("Mean = ", total / num_iter)
    println("Expected utility = ", [utility(𝒫, π, i) for i in ℐ])
end

MonteCarloSimulation (generic function with 1 method)

In [11]:
@time MonteCarloSimulation(rps.simpleGame, π, 1000000)

Mean = 

[0.0005446716660989412, -0.0005446716660989412]


Expected utility = [0.0, 0.0]
 16.558196 seconds (122.19 M allocations: 5.311 GiB, 6.72% gc time, 13.57% compilation time)
