# Starting by building the game and the user interface

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop
import keras

Using TensorFlow backend.


In [2]:
def showBoard(board):
    '''Shows the board'''
    for val in board:
        print(val)

def checkFree(x,board):
    '''Takes an x,y postion and checks if that point on the board is free'''
    b = board.reshape(1,9)
    if b[0][x] ==0:
        return True
    else:
        return False
def checkWin(toggle, board):
    '''checks wins for each player for diagonals, rows, columns'''
    if toggle:
        high = 6
    else:
        high = 15
    if board.diagonal().sum() == high:
        return True
    if np.flip(board,1).diagonal().sum() == high:
        return True
    for val in range(0,3):
        if board[val,:].sum()==high:
            return True
    for val in range(0,3):
        if board[:,val].sum()==high:
            return True
    return False

def getAvailablePositions(board):
    pos = []
    for i in range(9):
        if checkFree(i,board):
            pos.append(i)
    return pos

def placePiece(x,y,nought_or_cross,board):
    '''Takes a x, y position and a X or O with X=1, and O=2'''
    new_board = np.zeros((3,3))
    for i in range(3):
        for j in range(3):
            new_board [i][j] = board[i][j]
    
    new_board[x,y] = nought_or_cross
    return new_board

def getReward(result,num_moves):
    '''Reward a game won or lost'''
    if result == 'lost':
        return -10+(-num_moves)
    elif result == 'win':
        return 10+(-num_moves)
    else:
        return 0+(-num_moves)
    
def getMove(action):
    '''Gets the move based on the chosen action number'''
    moves = [[0,0],[0,1],[0,2],[1,0],[1,1],[1,2],[2,0],[2,1],[2,2]]
    return moves[action]

def index_map(index, positions):
    return positions[index]

In [3]:
def createBrain():
    model = Sequential()
    model.add(Dense(164, kernel_initializer='lecun_uniform', input_shape=(9,)))
    model.add(Activation('relu'))
    model.add(Dropout(0.2)) #I'm not using dropout, but maybe you wanna give it a try?

    model.add(Dense(150, kernel_initializer='lecun_uniform'))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))

    model.add(Dense(9, kernel_initializer='lecun_uniform'))
    model.add(Activation('linear')) #linear output so we can have range of real-valued outputs

    #rms = RMSprop()
    adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(loss='mse', optimizer=adam)
    
    return model

In [4]:
import random
#run function

epsilon = 1
gamma = 0.9
epochs = 100000

model1 = createBrain()
model2 = createBrain()

for epoch in range(0,epochs):
    
    toggle = False
    board = np.zeros((3,3))
    count1 = 0
    count2 = 0
    result = 'play'
    if epoch%100 == 0:
        print("Game #: %s" % (epoch,))
    while result != 'lose' and result != 'win' and result !='draw':
        toggle = not toggle
        if toggle == True:
            qval = model1.predict(board.reshape(1,9), batch_size=1)
            if epsilon > random.random():
                action = np.random.choice(getAvailablePositions(board))
            else:
                #print(getAvailablePositions(board))
                action = getAvailablePositions(board)[np.argmax(qval[0][getAvailablePositions(board)])]
            move = getMove(action)
            x = move[0]
            y = move[1]
            n_or_c = 5
        else:
            qval = model2.predict(board.reshape(1,9), batch_size=1)
            if epsilon > random.random():
                action = np.random.choice(getAvailablePositions(board))
            else:
                #print(getAvailablePositions(board))
                action = getAvailablePositions(board)[np.argmax(qval[0][getAvailablePositions(board)])]
            move = getMove(action)
            x = move[0]
            y = move[1]
            n_or_c = 2
        
        new_board = placePiece(int(x),int(y),n_or_c, board)
        
        if checkWin(toggle, new_board):
             result = 'lose'
        elif checkWin(not toggle, new_board):
            result = 'win'
        elif not getAvailablePositions(new_board):
            result = 'draw'
        
        if toggle == True:
            reward = getReward(result, count1)
            newQ = model1.predict(new_board.reshape(1,9), batch_size=1)
            maxQ = np.max(newQ)
            y = np.zeros((1,9))
            y[:] = qval[:]
            if result != 'lose' or result != 'win' or result !='draw': #non-terminal state
                update = (reward + (gamma * maxQ))
            else: #terminal state
                update = reward
            y[0][action] = update #target output
            model1.fit(board.reshape(1,9), y, batch_size=1, epochs=1, verbose=0)
            count1 = count1 + 1
        else:
            reward = getReward(result, count2)
            newQ = model2.predict(new_board.reshape(1,9), batch_size=1)
            maxQ = np.max(newQ)
            y = np.zeros((1,9))
            y[:] = qval[:]
            if result != 'lose' or result != 'win' or result !='draw': #non-terminal state
                update = (reward + (gamma * maxQ))
            else: #terminal state
                update = reward
            y[0][action] = update #target output
            model2.fit(board.reshape(1,9), y, batch_size=1, epochs=1, verbose=0)
            count2 = count2 + 1
            
        board = new_board
        #showBoard(board)
        if epsilon > 0.1:
            epsilon -= (1/epochs)


  app.launch_new_instance()


Game #: 0
Game #: 100
Game #: 200
Game #: 300
Game #: 400
Game #: 500
Game #: 600
Game #: 700
Game #: 800
Game #: 900
Game #: 1000
Game #: 1100
Game #: 1200
Game #: 1300
Game #: 1400
Game #: 1500
Game #: 1600
Game #: 1700
Game #: 1800
Game #: 1900
Game #: 2000
Game #: 2100
Game #: 2200
Game #: 2300
Game #: 2400
Game #: 2500
Game #: 2600
Game #: 2700
Game #: 2800
Game #: 2900
Game #: 3000
Game #: 3100
Game #: 3200
Game #: 3300
Game #: 3400
Game #: 3500
Game #: 3600
Game #: 3700
Game #: 3800
Game #: 3900
Game #: 4000
Game #: 4100
Game #: 4200
Game #: 4300
Game #: 4400
Game #: 4500
Game #: 4600
Game #: 4700
Game #: 4800
Game #: 4900
Game #: 5000
Game #: 5100
Game #: 5200
Game #: 5300
Game #: 5400
Game #: 5500
Game #: 5600
Game #: 5700
Game #: 5800
Game #: 5900
Game #: 6000
Game #: 6100
Game #: 6200
Game #: 6300
Game #: 6400
Game #: 6500
Game #: 6600
Game #: 6700
Game #: 6800
Game #: 6900
Game #: 7000
Game #: 7100
Game #: 7200
Game #: 7300
Game #: 7400
Game #: 7500
Game #: 7600
Game #: 770

In [5]:
toggle = False
board = np.zeros((3,3))
result = 'play'

while result != 'lose' and result != 'win' and result !='draw':
    toggle = not toggle
    
    if toggle == True:
        qval = model1.predict(board.reshape(1,9), batch_size=1)
        action = getAvailablePositions(board)[(np.argmax(qval[0][getAvailablePositions(board)]))]
        move = getMove(action)
        x = move[0]
        y = move[1]
        n_or_c = 5
    else:
        qval = model2.predict(board.reshape(1,9), batch_size=1)
        action = getAvailablePositions(board)[(np.argmax(qval[0][getAvailablePositions(board)]))]
        move = getMove(action)
        x = move[0]
        y = move[1]
        n_or_c = 2

    new_board = placePiece(int(x),int(y),n_or_c, board)

    if checkWin(toggle, new_board):
         result = 'lose'
    elif checkWin(not toggle, new_board):
        result = 'win'
    elif not getAvailablePositions(new_board):
        result = 'draw'

    board = new_board
    showBoard(board)
    print('--------------')


[ 0.  0.  5.]
[ 0.  0.  0.]
[ 0.  0.  0.]
--------------
[ 0.  0.  5.]
[ 0.  2.  0.]
[ 0.  0.  0.]
--------------
[ 0.  5.  5.]
[ 0.  2.  0.]
[ 0.  0.  0.]
--------------
[ 2.  5.  5.]
[ 0.  2.  0.]
[ 0.  0.  0.]
--------------
[ 2.  5.  5.]
[ 0.  2.  5.]
[ 0.  0.  0.]
--------------
[ 2.  5.  5.]
[ 0.  2.  5.]
[ 0.  0.  2.]
--------------


## As can be seen, the game is played to a draw. However there are some small issues of the players not capitalizing on the errors of the opposing player. This could be due to not enough training. As we only ran 100 Games.

## Ideally we want the game to be a draw. This is the solved game state. 

## After 1000 Games player 2 is winning. Which is not ideal. 

## After 10000 games the other player is winning. I wonder if there is an issue with the way it understands the actions based on the turn. 

In [98]:
board = np.zeros((3,3)) 
qval = model1.predict(board.reshape(1,9), batch_size=1)

In [99]:
np.argmax(qval)

2