In [1]:
import plaidml.keras
import os
plaidml.keras.install_backend()
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"

from MKAgent.RandomAgent import RandomAgent
from MKAgent.Agent import Agent
from DQNAgent.RLAgent import DQNAgent
from DQNAgent.DQNGame import DQNGame
from DQNAgent.Model import Model
from Game.Board import Board
from Game.Side import Side

import numpy as np

In [2]:
board = Board(7,7)
agent1 = RandomAgent()
agent2 = RandomAgent()
game = DQNGame(board, agent1, agent2)
moves, boards, winningSide = game.play()

In [3]:
def getReward(currentBoard, nextBoard, playingSide, didResultInPlayingAgain, winningSide):
    WINNING = 100
    DRAW = -50
    SCORING = 10
    PLAY_AGAIN = 20
    
    if (nextBoard == None):
        if (winningSide == playingSide):
            return WINNING
        elif (winningSide == playingSide.opposite()):
            return -WINNING
        else:
            return DRAW
    
    reward = (((nextBoard.getSeedsInStore(playingSide) 
                    - currentBoard.getSeedsInStore(playingSide)) * SCORING) 
                +  ((nextBoard.getSeedsInStore(playingSide.opposite())  
                    - currentBoard.getSeedsInStore(playingSide.opposite())) * -SCORING))
    
    if didResultInPlayingAgain:
        reward += PLAY_AGAIN
        
    return reward

def getStateActionNextStateReward(boards, moves, winningSide):
    boardMoves = list(zip(boards, moves))
    boardMoves.append((None, None))
    
    memory = []
    for index, (board, move) in enumerate(boardMoves[:-1]):
        currentBoard = board
        if (boardMoves[index + 1][0] == None):
            playedAgain = False
        else:
            playedAgain = move.getSide() == boardMoves[index + 1][1].getSide()
            
        if (playedAgain):
            nextBoard = boardMoves[index + 1][0]
        else:
            nextBoard = next((boardMove for boardMove in boardMoves[(index + 1):-1] if boardMove[1].getSide() == move.getSide()), None)
            if (nextBoard != None):
                nextBoard = nextBoard[0]
        
        reward = getReward(currentBoard, nextBoard, move.getSide(), playedAgain, winningSide)
        done = (nextBoard == None)
        
        currentState = Model.boardToStateStatic(move.getSide(), currentBoard)
        action = move.getHole()
        
        nextState = None
        if (nextBoard != None):
            nextState = Model.boardToStateStatic(move.getSide(), nextBoard)
            
        memory.append((currentState, action, reward, nextState, done))
    
    return memory

In [4]:
board = Board(7,7)
agent1 = RandomAgent()
agent2 = RandomAgent()
game = DQNGame(board, agent1, agent2)
moves, boards, winningSide = game.play()

for i in range(len(moves)):
    print(boards[i].board)
    print(f"Move made: {moves[i].getHole()}, Side: {moves[i].getSide()}")
    print("\n")
    
print(f"Winning Side: {winningSide}")

[[0 7 7 7 7 7 7 7]
 [0 7 7 7 7 7 7 7]]
Move made: 6, Side: Side.SOUTH


[[0 8 8 8 8 8 7 7]
 [1 7 7 7 7 7 0 8]]
Move made: 6, Side: Side.NORTH


[[1 8 8 8 8 8 0 8]
 [1 8 8 8 8 8 0 8]]
Move made: 4, Side: Side.SOUTH


[[1 9 9 9 9 8 0 8]
 [2 8 8 8 0 9 1 9]]
Move made: 3, Side: Side.NORTH


[[ 2  9  9  0 10  9  1  9]
 [ 2  9  9  9  1  9  1  9]]
Move made: 1, Side: Side.SOUTH


[[ 2 10 10  0 10  9  1  9]
 [ 3  0 10 10  2 10  2 10]]
Move made: 6, Side: Side.NORTH


[[ 2 10 10  0 10  9  0 10]
 [ 3  0 10 10  2 10  2 10]]
Move made: 6, Side: Side.SOUTH


[[ 2 10 10  0 10  9  0 10]
 [ 4  0 10 10  2 10  0 11]]
Move made: 4, Side: Side.SOUTH


[[ 2 10  0  0 10  9  0 10]
 [15  0 10 10  0 11  0 11]]
Move made: 4, Side: Side.NORTH


[[ 3 10  0  0  0 10  1 11]
 [15  1 11 11  1 12  1 11]]
Move made: 4, Side: Side.SOUTH


[[ 3 10  0  0  0 10  1 11]
 [15  1 11 11  0 13  1 11]]
Move made: 6, Side: Side.NORTH


[[ 3 10  0  0  0 10  0 12]
 [15  1 11 11  0 13  1 11]]
Move made: 3, Side: Side.SOUTH


[[ 3 11 

In [5]:
memories = getStateActionNextStateReward(boards, moves, winningSide)
for state, action, reward, nextState, done in memories:
    print(state)
    print(action)
    print(reward)
    print(nextState)
    print(done)
    print()

[[0 7 7 7 7 7 7 7 0 7 7 7 7 7 7 7]]
6
0
[[1 8 8 8 8 8 0 8 1 8 8 8 8 8 0 8]]
False

[[0 8 8 8 8 8 7 7 1 7 7 7 7 7 0 8]]
6
0
[[1 9 9 9 9 8 0 8 2 8 8 8 0 9 1 9]]
False

[[1 8 8 8 8 8 0 8 1 8 8 8 8 8 0 8]]
4
0
[[ 2  9  9  9  1  9  1  9  2  9  9  0 10  9  1  9]]
False

[[1 9 9 9 9 8 0 8 2 8 8 8 0 9 1 9]]
3
0
[[ 2 10 10  0 10  9  1  9  3  0 10 10  2 10  2 10]]
False

[[ 2  9  9  9  1  9  1  9  2  9  9  0 10  9  1  9]]
1
10
[[ 3  0 10 10  2 10  2 10  2 10 10  0 10  9  0 10]]
False

[[ 2 10 10  0 10  9  1  9  3  0 10 10  2 10  2 10]]
6
-120
[[ 2 10  0  0 10  9  0 10 15  0 10 10  0 11  0 11]]
False

[[ 3  0 10 10  2 10  2 10  2 10 10  0 10  9  0 10]]
6
30
[[ 4  0 10 10  2 10  0 11  2 10 10  0 10  9  0 10]]
False

[[ 4  0 10 10  2 10  0 11  2 10 10  0 10  9  0 10]]
4
100
[[15  1 11 11  1 12  1 11  3 10  0  0  0 10  1 11]]
False

[[ 2 10  0  0 10  9  0 10 15  0 10 10  0 11  0 11]]
4
10
[[ 3 10  0  0  0 10  1 11 15  1 11 11  0 13  1 11]]
False

[[15  1 11 11  1 12  1 11  3 10  0  0  0 10  1 11]]
4

In [6]:
model = Model()
model.load("DQNAgent/model_output/weights_0250.hdf5")

INFO:plaidml:Opening device "metal_amd_radeon_pro_460.0"


In [11]:
board = Board(7,7)
board.board = np.array([[48,0,0,4,0,0,0,0],[41,0,2,1,1,0,1,0]])
state = Model.boardToStateStatic(Side.NORTH, board)
print(f"State: {state}")
print(Model.getNextValidActionArrayStatic(state))
qValues = model.model.predict(state)[0]
print(qValues)
print()
print(model.predictLegalQValues(state))
print(model.act(state, True))

State: [[48  0  0  4  0  0  0  0 41  0  2  1  1  0  1  0]]
[-1000000000 -1000000000           0 -1000000000 -1000000000 -1000000000
 -1000000000]
[-1.7728274e+10 -1.8139314e+10 -1.8483263e+10 -1.7350328e+10
 -1.7311179e+10 -1.7164944e+10 -1.7306841e+10]

[-1.87282744e+10 -1.91393142e+10 -1.84832635e+10 -1.83503283e+10
 -1.83111788e+10 -1.81649444e+10 -1.83068411e+10]
6


In [9]:
print(type(DQNAgent(model)) == DQNAgent)

True


In [None]:
model = Model()
model.load("DQNAgent/model_output/weights_0250.hdf5")
agent1 = DQNAgent(model, True)
agent2 = RandomAgent()
game = DQNGame(Board(7,7), agent1, agent2)
moves, boards, winningSide = game.play()