In [324]:
include("chess.jl")  # This handles the board position and moves

makeMove! (generic function with 1 method)

In [61]:
# initialize a position
pos = Board(["King" => (3,3), "Rook" => (3,2)], ["King" => (4,5)])
wMoves = moves(pos, 1)

println(wMoves[1])
makeMove!(pos,wMoves[1],1)
pos

("Rook",(4,2))


Board(["Rook"=>(4,2),"King"=>(3,3)],["King"=>(4,5)])

In [88]:
iter = 0;
pos = Board(["King" => (3,3), "Rook" => (3,2)], ["King" => (4,7)])
while(iter < 15)
    moveChoicesW = moves(pos,1)
    m = moveChoicesW[rand(1:end)]
    println("Move W: $m")
    makeMove!(pos,m,1)

    moveChoicesB = moves(pos,2)
    m2 = moveChoicesB[rand(1:end)]
    println("Move B: $m2")
    makeMove!(pos,m2,2)
    iter += 1
    
    if(!validPos(pos))
        println("ERROR")
    end 
end

pos

Move W: ("Rook",(3,1))
Move B: ("King",(5,8))
Move W: ("King",(3,2))
Move B: ("King",(6,7))
Move W: ("King",(4,2))
Move B: ("King",(7,8))
Move W: ("Rook",(1,1))
Move B: ("King",(6,8))
Move W: ("Rook",(1,2))
Move B: ("King",(6,7))
Move W: ("King",(3,3))
Move B: ("King",(7,7))
Move W: ("Rook",(6,2))
Move B: ("King",(7,8))
Move W: ("King",(4,4))
Move B: ("King",(8,7))
Move W: ("Rook",(1,2))
Move B: ("King",(8,8))
Move W: ("King",(5,5))
Move B: ("King",(8,7))
Move W: ("Rook",(7,2))
Move B: ("King",(8,8))
Move W: ("Rook",(7,3))
Move B: ("King",(8,7))
Move W: ("King",(5,6))
Move B: ("King",(8,8))
Move W: ("Rook",(8,3))
Move B: ("King",(7,8))
Move W: ("King",(5,7))
Move B: ("King",(7,7))


Board(["Rook"=>(8,3),"King"=>(5,7)],["King"=>(7,7)])

In [433]:
# Reward function
# Gives reward for checkmate +20, penalty for stalemate -10, small gradient on black king to sides of board
function reward(state::Board, gameOver::Bool)
    WIN = 50
    DRAW = -10    
    if(length(state.piecesWhite) == 1) #draw
        return DRAW
    end
    if(gameOver == true)
        if(blackInCheck(state))
            return WIN
        else
            return DRAW
        end
    end
    
    kingPos = state.piecesBlack["King"]
    return 0 #max(abs(kingPos[1]-4.5),abs(kingPos[2]-4.5)) # encourage pushing king away from center
end

reward (generic function with 1 method)

In [451]:
# Outputs the state as string for hashmap to work properly
function board2str(state::Board)
    string = "K$(state.piecesWhite["King"])R$(state.piecesWhite["Rook"])K$(state.piecesBlack["King"])"
end


moveCount = 0
α = .1 # learning rate
γ = .9 # horizon
λ = .9 # eligibility trace

testState =  randomState() #Board(["King" => (3,3), "Rook" => (8,2)], ["King" => (1,1)])

# state = randomState();
state = deepcopy(testState)


WHITE = 1;
BLACK = 2;

# Initialize data-structures
Q = Dict{String,Dict{(String,(Int64,Int64)),Float64}}()
stateHistory = Board[]
actionHistory = (String,(Int64,Int64))[]
gameCount = 0;
while (gameCount < 1000)
    ϵ = .4
    movesWhite = moves(state,WHITE)
    state_str = board2str(state)
    # Add state/action pairs to Q(s,a) dictionary
    if(!haskey(Q,state_str))
        Q[state_str] = [a => 0.0 for a in movesWhite]
    end
    
    if(gameCount > 600)
        ϵ = .7
    end
    # Exploration strategy: optimal choice for now ...
    if(rand() < ϵ) # Optimal Q
        action = reduce((x, y) -> Q[state_str][x] >= Q[state_str][y] ? x : y, keys(Q[state_str]))
    else # random explore
        action = movesWhite[rand(1:end)]
    end
    value = Q[state_str][action]
    # Save state/action pair for eligibility traces
    push!(stateHistory,deepcopy(state))
    push!(actionHistory,deepcopy(action))
    # Updates the game state
    makeMove!(state, action, WHITE)
    
    # Opponent moves
    opponentMoves = moves(state,BLACK)
    
    # opponent strategy -- random
    gameOver = false
    if(length(opponentMoves)>0)
        responseAction = opponentMoves[rand(1:end)]
        makeMove!(state, responseAction, BLACK)
    end
    if(length(opponentMoves) == 0 || length(state.piecesWhite) == 1)
        gameOver = true
    end
    # state = s_{t+1}
    # reward = r_{t}
    r = reward(state, gameOver)

    if(gameOver || moveCount > 200)
#         println("--------- GAME OVER ------- moves: $moveCount")
        δ = (r - Q[state_str][action])
        Q[state_str][action] += α * δ
        
        # Eligibility trace update when game is won
        if(r >= 20)
            for i = 1:length(stateHistory)
                s_old = pop!(stateHistory) 
                a_old = pop!(actionHistory)            
                Q[board2str(s_old)][a_old] += α * λ^i * δ
            end
            println("Won in $moveCount moves")
        end
        state = deepcopy(testState)
        moveCount = 0
        gameCount +=1
        empty!(stateHistory)
        empty!(actionHistory)
        continue
    end

    # Q-learning update
    s_t_next = board2str(state)
    val = 0;
    if(haskey(Q,s_t_next))
        val = maximum(values(Q[s_t_next]))
    end
    
    Q[state_str][action] +=  α *(r + γ * val - Q[state_str][action])
    
    moveCount += 1    
end

Won in 81 moves
Won in 53 moves
Won in 41 moves
Won in 15 moves
Won in 32 moves
Won in 51 moves
Won in 44 moves
Won in 49 moves


In [453]:
state = deepcopy(testState)
println(testState)
Q[board2str(state)]
#rand()

Board(["Rook"=>(5,1),"King"=>(2,4)],["King"=>(2,7)])


Dict{(String,(Int64,Int64)),Float64} with 22 entries:
  ("King",(1,3)) => 0.14436240871396208
  ("King",(3,4)) => 6.805893702263946e-6
  ("King",(3,5)) => 1.913781602160708e-5
  ("King",(2,5)) => 0.0002691784435611119
  ("King",(3,3)) => 2.952393625058695e-6
  ("Rook",(1,1)) => 0.0
  ("Rook",(4,1)) => 0.0
  ("Rook",(5,2)) => 0.0
  ("Rook",(6,1)) => 0.0
  ("Rook",(5,4)) => 0.0
  ("Rook",(7,1)) => 8.532315112685395e-6
  ("Rook",(5,8)) => 0.0
  ("King",(1,4)) => 0.027484531397566937
  ("Rook",(5,5)) => 0.0
  ("Rook",(5,7)) => 0.0
  ("Rook",(5,6)) => 0.0
  ("Rook",(3,1)) => 9.831352523777668e-9
  ("Rook",(8,1)) => 0.0
  ("Rook",(2,1)) => 0.0
  ("Rook",(5,3)) => 0.0
  ("King",(2,3)) => 0.0013951666786317105
  ("King",(1,5)) => 1.2137472251577367e-10