In [2]:
import random

class TicTacToe:
    def __init__(self, player1, player2): #initializing a constructor
        self.player1 = player1
        self.player2 = player2
        self.player_1_turn = random.choice([True, False]) #variable for whose turn it should be
        self.board = [' '] * 9
        
    def print_board(self, ): #function to print the board
        b = self.board
        
        board = f'''
        {b[0]} | {b[1]} | {b[2]}
        ----------
        {b[3]} | {b[4]} | {b[5]}
        ----------
        {b[6]} | {b[7]} | {b[8]}
        
        '''
        print(board)
    
    def check_winner(self, char): #fuction to check the winner
        b = self.board
        winner = False
        conditions = [    #all possibility for a game to end as a winner
            (0, 1, 2), (3, 4, 5), (6, 7, 8),
            (0, 3, 6), (1, 4, 7), (2, 5, 8),
            (0, 4, 8), (2, 4, 6)
        ]
        
        for p1, p2, p3 in conditions:    #for loop to check if any player has won the game
            if char == b[p1] == b[p2] == b[p3]:
                winner = True
                return winner
        return winner
    
    def play(self, ):    #function to start a game
        self.player1.new_game()
        self.player2.new_game()
        self.print_board()
        
        while True:
            if self.player_1_turn:    #if statement to the assign the players
                player = self.player1
                other_player = self.player2
            else:
                player = self.player2
                other_player = self.player1
            print(f'{player.name} turn')
            try:    #try if its a draw
                move = player.make_move(self.board)
            except (IndexError, ValueError):
                print('Its a draw')
                break
            char = player.char
            if move < 0 or move > 8:    #if statement to display error if the move is invalid
                print('Invalid move, Enter again')
                player.reward(-0.25, self.board)
                continue

            if self.board[move] != ' ':    #to display error if player enters on a filled space
                print('Invalid move, Enter again')
                player.reward(-0.25, self.board)
                continue
            self.board[move] = char

            if self.check_winner(char):  
                player.winner()
                player.reward(5, self.board)
                other_player.reward(-5, self.board)
                self.print_board()
                break
            player.reward(0.25, self.board)
            self.player_1_turn = not self.player_1_turn
            self.print_board()

In [3]:
class Player:    #a player class to enter players
    def __init__(self, name, char):
        self.name = name
        self.char = char
        #self.type_ = 'Human'
    
    def make_move(self, board):    #function in order to input moves
        ans = input(f"{self.name}'s Move: ")
        return int(ans)
    
    def new_game(self, ):    #function to display who is X and who choose O
        print(f'{self.name} is {self.char}')    
    
    def winner(self,):    #function print out the winner
        print(f'{self.name} is the winner')
    
    def reward(self, score, board):
        print(f'{self.name} gets {score}')
    
    def available_moves(Self, board): 
        return [x for x in range(9) if board[x] == ' ']

In [3]:
player1 = Player('Hari', 'H')
player2 = Player('Ram', 'R')

In [4]:
game = TicTacToe(player1, player2)
game.play()

Hari is H
Ram is R

          |   |  
        ----------
          |   |  
        ----------
          |   |  
        
        
Hari turn
Hari's Move: 0
Hari gets 0.25

        H |   |  
        ----------
          |   |  
        ----------
          |   |  
        
        
Ram turn
Ram's Move: 3
Ram gets 0.25

        H |   |  
        ----------
        R |   |  
        ----------
          |   |  
        
        
Hari turn
Hari's Move: 4
Hari gets 0.25

        H |   |  
        ----------
        R | H |  
        ----------
          |   |  
        
        
Ram turn
Ram's Move: 8
Ram gets 0.25

        H |   |  
        ----------
        R | H |  
        ----------
          |   | R
        
        
Hari turn
Hari's Move: 9
Invalid move, Enter again
Hari gets -0.25
Hari turn
Hari's Move: 2
Hari gets 0.25

        H |   | H
        ----------
        R | H |  
        ----------
          |   | R
        
        
Ram turn
Ram's Move: 1
Ram gets 0.25

        H | R | 

In [4]:
class AI(Player):   #class to create an AI which can never lose a game
    def __init__(self, name, char, epsilon, gamma, alpha):
        self.name = name
        self.char = char
        self.epsilon = epsilon # Epsilon-greedy
        self.gamma = gamma # Discount Factor
        self.alpha = alpha # Learning rate
        self.q_table ={}
    
    def new_game(self, ):
        self.prev_board = ()
        self.prev_action = None
    
    def winner(self):
        pass
    
    def get_Q(self, state, action):
        if self.q_table.get((state, action)) == None:
            self.q_table[(state, action)] = 5
        return self.q_table[(state, action)]
    
    def make_move(self, board):
        # save_old_state
        self.prev_board = tuple(board)
        
        available_action = self.available_moves(board)
        
        # Select Random Action
        if random.random() < self.epsilon:
            action_taken = random.choice(available_action)
            self.prev_action = action_taken
#             self.epsilon *= 0.9
            return action_taken
        
        # Select action with MAX Q value
        q_values = []
        for action in available_action:
            q_values.append(self.get_Q(self.prev_board, action))
        
        max_q_val = max(q_values)
        index = q_values.index(max_q_val)
        
        action_taken = available_action[index]
        self.prev_action = action_taken
        return action_taken
    
    def reward(self, value, board):
        if self.prev_action is not None:
            self.q_learn(self.prev_board, self.prev_action, value, tuple(board))
            
    def q_learn(self, state, action, reward, new_state):
        prev_q_val = self.get_Q(state, action)
        available_action = self.available_moves(state)
        max_q = max([self.get_Q(new_state, action) for action in available_action])
        
        self.q_table[(state, action)] = prev_q_val + self.alpha * (reward + self.gamma * (max_q) - prev_q_val)