In [1]:
using Match

In [2]:
mutable struct Player
    name::String
    previousMove::Array{String,1}
    valueFunction::Dict
    symbol::String
    totalWins::Int32
end

In [3]:
initialState = ["-","-","-","-","-","-","-","-","-"]
player1=Player("player1", initialState, Dict([(initialState,0.5)]),"o",0)
player2=Player("player2", initialState, Dict([(initialState,0.5)]),"x",0)

Player("player2", ["-", "-", "-", "-", "-", "-", "-", "-", "-"], Dict(["-", "-", "-", "-", "-", "-", "-", "-", "-"]=>0.5), "x", 0)

In [4]:
function possibleMoves(state, player)
    moves=[]
    for i in 1:length(state)
        if state[i]=="-"      
            move=copy(state)
            move[i]=player.symbol
            push!(moves, move)
        end
    end
    return moves
end      

possibleMoves (generic function with 1 method)

In [5]:
function isWinningMove(state, player)
    symbol=player.symbol
    stateMatrix=reshape(state.==symbol,(3,3))
    @match(stateMatrix,[h1,h2,h3])
    @match(stateMatrix,[v1 v2 v3])
    d1=state[[1,5,9]].==symbol
    d2=state[[3,5,7]].==symbol
    return any(map(all,[h1,h2,h3,v1,v2,v3,d1,d2]))
end

isWinningMove (generic function with 1 method)

In [6]:
function playRound(state, player, exploration=0.05)
    moves=possibleMoves(state,player)
    if length(moves) == 0
        return nothing
    end
    costs=[]
    for m in moves
        if haskey(player.valueFunction, m)
            push!(costs,player.valueFunction[m])
        else
            if isWinningMove(m, player)
                player.valueFunction[m]=1. 
                push!(costs,1.)
            else
                player.valueFunction[m]=.5 
                push!(costs,.5)
            end
        end
    end
    if rand()<exploration
        println("Exploring...")
        next=rand(moves)
    else
        next=moves[argmax(costs)]
    end
    println("It's playing ", player.name, " the move ", next)
    return next
end

playRound (generic function with 2 methods)

In [14]:
function play(player1, player2, alpha=0.005, exploration=0.5, player1Starts=true)
    current = initialState
    switchPlayer = player1Starts
    player = switchPlayer ? player1 : player2
    switchPlayer = !switchPlayer
    while true
        next = playRound(current, player, exploration)
        if next != nothing
            player.valueFunction[player.previousMove] = player.valueFunction[player.previousMove] + alpha*(player.valueFunction[next] - player.valueFunction[player.previousMove])
            player.previousMove = next
            player = switchPlayer ? player1 : player2
            switchPlayer = !switchPlayer
            current=next
            if isWinningMove(current, player1)
                player1.totalWins+=1
                player2.valueFunction[current]=0.
                player2.valueFunction[player2.previousMove] = player2.valueFunction[player2.previousMove] + alpha*(player2.valueFunction[current] - player2.valueFunction[player2.previousMove])
                println("Player ", player1.name, " wins!\n")
                break
            elseif isWinningMove(current, player2)
                player2.totalWins+=1
                player1.valueFunction[current]=0.
                player1.valueFunction[player1.previousMove] = player1.valueFunction[player1.previousMove] + alpha*(player1.valueFunction[current] - player1.valueFunction[player1.previousMove])
                println("Player ", player2.name, " wins!\n")
                break
            end
        else
            println("No one wins!\n")
            break
        end
    end
end

play (generic function with 4 methods)

In [38]:
for i in 1:1000
    play(player1, player2, 0.25, .25, true)
    println("Total wins ", player1.name, ": ", player1.totalWins," | ", "Total wins ", player2.name, ": ", player2.totalWins)
    #sleep(.1)
    IJulia.clear_output(true)
end

Exploring...
It's playing player1 the move ["-", "-", "-", "-", "o", "-", "-", "-", "-"]
It's playing player2 the move ["-", "-", "x", "-", "o", "-", "-", "-", "-"]
It's playing player1 the move ["-", "o", "x", "-", "o", "-", "-", "-", "-"]
It's playing player2 the move ["-", "o", "x", "-", "o", "-", "-", "x", "-"]
Exploring...
It's playing player1 the move ["-", "o", "x", "-", "o", "-", "o", "x", "-"]
It's playing player2 the move ["-", "o", "x", "-", "o", "x", "o", "x", "-"]
It's playing player1 the move ["-", "o", "x", "-", "o", "x", "o", "x", "o"]
It's playing player2 the move ["x", "o", "x", "-", "o", "x", "o", "x", "o"]
Exploring...
It's playing player1 the move ["x", "o", "x", "o", "o", "x", "o", "x", "o"]
No one wins!

Total wins player1: 529 | Total wins player2: 186


In [37]:
player1.totalWins=0
player2.totalWins=0

0

In [42]:
play(player1, player2, 0.1, .0, true)

It's playing player1 the move ["-", "-", "-", "-", "o", "-", "-", "-", "-"]
It's playing player2 the move ["-", "-", "x", "-", "o", "-", "-", "-", "-"]
It's playing player1 the move ["-", "o", "x", "-", "o", "-", "-", "-", "-"]
It's playing player2 the move ["-", "o", "x", "-", "o", "-", "-", "x", "-"]
It's playing player1 the move ["-", "o", "x", "-", "o", "o", "-", "x", "-"]
It's playing player2 the move ["-", "o", "x", "x", "o", "o", "-", "x", "-"]
It's playing player1 the move ["-", "o", "x", "x", "o", "o", "-", "x", "o"]
It's playing player2 the move ["x", "o", "x", "x", "o", "o", "-", "x", "o"]
It's playing player1 the move ["x", "o", "x", "x", "o", "o", "o", "x", "o"]
No one wins!

