In [None]:
import random
from keras.models import Sequential
from keras.layers import Dense
import numpy as np

In [None]:
BLANK=' '
AI_PLAYER='X'
HUMAN_PLAYER='O'
TRAINING_EPOCHS=10000
TRAINING_EPSILON=.4
REWARD_WIN=10
REWARD_LOSS=-100
REWARD_TIE=-10

In [None]:
class Player:
    @staticmethod
    def showBoard(board):
        print('|'.join(board[0:3]))
        print('|'.join(board[3:6]))
        print('|'.join(board[6:9]))


In [None]:
class HumanPlayer(Player):
    def reward(self, value, board):
        pass
    def makeMove(self, board):
        while True:
            try:
                self.showBoard(board)
                move=int(input("Enter Next Move [Cell 1-9]"))
                if not(move-1 in range(9)):
                    raise ValueError
            except ValueError:
                print("Invalid Move, Try Again\n")
            else:
                return move-1



In [None]:
class AIPlayer(Player):
    def __init__(self, epsilon=0.4, alpha=0.3, gamma=0.9):
        self.EPSILON=epsilon
        self.ALPHA=alpha
        self.GAMMA=gamma

        self.q=Sequential()
        self.q.add(Dense(32, input_dim=9+27, activation='relu'))
        self.q.add(Dense(1, activation='relu'))
        self.q.compile(optimizer='adam', loss='mean_squared_error')

        self.move=None
        self.board=[' ',' ',' ',' ',' ',' ',' ',' ',' ']

    def encodeInput(self, board, action):
        #One Hot Encoding the values of X, O as (1,0,0) and (0,1,0) and ' ' as (0,0,1)
        vectorRepresentation=[]
        for cell in board:
            for ticker in ['X','O',' ']:
                if cell==ticker:
                    vectorRepresentation.append(1)
                else:
                    vectorRepresentation.append(0)

        #Encoding the Actions [0,1,0,0,0,0,0,0,0] means putting X in second cell etc
        for move in range(9):
            if action==move:
                vectorRepresentation.append(1)
            else:
                vectorRepresentation.append(0)
        return np.array([vectorRepresentation])

    def availableMoves(self, board):
        return [i for i in range(9) if board[i]==' ']

    def getQ(self, state, action):
        return self.q.predict([self.encodeInput(state, action)], batch_size=1)

    def makeMove(self, board):
        self.board=tuple(board)
        actions=self.availableMoves(board)

        #Action with epsilon probabilty
        if random.random()<self.EPSILON:
            self.move=random.choice(actions)
            return self.move

        #Action with best QValue
        qValues=[self.getQ(self.board, a) for a in actions]
        maxQValue=max(qValues)
        if qValues.count(maxQValue)>1:
            bestActions=[i for i in range(len(actions)) if qValues[i]==maxQValue]
            bestMove=actions[random.choice(bestActions)]
        else:
            bestMove=actions[qValues.index(maxQValue)]
        self.move=bestMove
        return self.move

    def reward(self, reward, board):
        if self.move:
            Q=self.getQ(self.board, self.move)
            QPrime=max([self.getQ(tuple(self.board),a) for a in self.availableMoves(self.board)])
            self.q.fit(self.encodeInput(self.board, self.move), Q+self.ALPHA*(reward + self.GAMMA*QPrime - Q), epochs=3, verbose=0)
        self.move=None
        self.board=None


In [None]:
class TicTacToe:
    def __init__(self, player1, player2):
        self.player1=player1
        self.player2=player2
        self.firstPlayerTurn=random.choice([True, False])
        self.board=[' ',' ',' ',' ',' ',' ',' ',' ',' ']

    def isGameOver(self, playerTickers):

        # consider both players (X and O players - these are the tickers)
        for playerTicker in playerTickers:

            # check horizontal dimension (so the rows)
            for i in range(3):
                if self.board[3 * i + 0] == playerTicker and\
                        self.board[3 * i + 1] == playerTicker and\
                        self.board[3 * i + 2] == playerTicker:
                    return True, playerTicker

            # check vertical dimension (so the columns)
            for j in range(3):
                if self.board[j + 0] == playerTicker and \
                        self.board[j + 3] == playerTicker and \
                        self.board[j + 6] == playerTicker:
                    return True, playerTicker

            # check diagonal dimensions (top left to bottom right + top right to bottom left)
            if self.board[0] == playerTicker and self.board[4] == playerTicker and\
                    self.board[8] == playerTicker:
                return True, playerTicker

            if self.board[2] == playerTicker and self.board[4] == playerTicker and self.board[6] == playerTicker:
                return True, playerTicker

        # finally we can deal with the 'draw' cases
        if self.board.count(' ') == 0:
            return True, None
        else:
            return False, None

    def play(self):
        while True:
            if self.firstPlayerTurn:
                player= self.player1
                otherPlayer=self.player2
                playerTickers = (AI_PLAYER, HUMAN_PLAYER) #Symbols
            else:
                player=self.player2
                otherPlayer=self.player1
                playerTickers= (HUMAN_PLAYER, AI_PLAYER) #Symbols

            gameOver, winner= self.isGameOver(playerTickers)
            if gameOver:
                if winner==playerTickers[0]:
                    player.showBoard(self.board)
                    print("\n %s WON "% player.__class__.__name__)
                    player.reward(REWARD_WIN, self.board)
                    otherPlayer.reward(REWARD_LOSS, self.board)
                if winner==playerTickers[1]:
                    player.showBoard(self.board)
                    print("\n %s WON "% otherPlayer.__class__.__name__)
                    otherPlayer.reward(REWARD_WIN, self.board)
                    player.reward(REWARD_LOSS, self.board)
                else:
                    player.showBoard(self.board)
                    print("\nTIE")
                    otherPlayer.reward(REWARD_TIE, self.board)
                    player.reward(REWARD_TIE, self.board)
                break

            self.firstPlayerTurn = not self.firstPlayerTurn

            move=player.makeMove(self.board)
            self.board[move]=playerTickers[0]




In [None]:
if __name__=='__main__':
    AIPlayer1=AIPlayer()
    AIPlayer2=AIPlayer()
    print("TRAINING AIPLAYER1\n")

    AIPlayer1.EPSILON=TRAINING_EPSILON
    AIPlayer2.EPSILON=TRAINING_EPSILON
    for i in range(500):
        print("Iteration ",i)
        game=TicTacToe(AIPlayer1, AIPlayer2)
        game.play()
    print('\nTraining is Done')



In [None]:
AIPlayer1.EPSILON = 0
HumanPlayer1= HumanPlayer()
game = TicTacToe(AIPlayer1, HumanPlayer1)
game.play()

 | | 
 | | 
 | | 
Enter Next Move [Cell 1-9]1
O| | 
 | | 
 |X| 
Enter Next Move [Cell 1-9]2
O|O| 
 |X| 
 |X| 
Enter Next Move [Cell 1-9]3
O|O|O
 |X| 
 |X| 

 HumanPlayer WON 
