In [None]:
# import the required modules
import numpy as np

In [None]:
# a class built to handle playing tic-tac-toe
class TicTacToe:
    def __init__(self):
        # the state of the game board, 0 means empty, 1 means 'X', 0 means '0'
        self.state = '000000000'
        # who's turn it is
        self.turn = 2
        # who won, 0 for in progress, -1 for tie, 1 and 2 for 1 or 2 victories respectively
        self.winner = 0 
        # keep of track of which number corresponds to which letter, for drawing purposes
        self.mark_dict = {'0': ' ', '1': 'X', '2': 'O'}
    
    # a function to print a row of the game board
    def print_helper(self, string):
        return '|'.join([self.mark_dict[char] for char in string])
    
    # a function to print the entire game board
    def print_state(self):
        print(self.print_helper(self.state[:3]))
        print("-----")
        print(self.print_helper(self.state[3:6]))
        print("-----")
        print(self.print_helper(self.state[6:]))
        print("\n")
    
    # make a move at the given position
    def make_move(self, idx):
        if self.winner != 0:
            return -1
        if self.state[idx] == '0':
            self.state = self.state[:idx] + str(self.turn) + self.state[idx + 1:]
        else:
            return -1
        self.turn = 1 if self.turn == 2 else 2
        return self.check_win()
    
    # returns a list of indices for possible moves for the current player
    def generate_possible_moves(self):
        empty_idxs = [idx for idx, char in enumerate(self.state) if char == '0']
        new_states = []
        for idx in empty_idxs:
            new_states.append(self.state[:idx] + str(self.turn) + self.state[idx+1:])
        return empty_idxs, new_states
    
    # check if the game has been won and update the winner accordingly
    def check_win(self):
        win_slots = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 4, 7], [2, 5, 8], [3, 6, 9], [1, 5, 9], [3, 5, 7]]
        for slot in win_slots:
            if int(self.state[slot[0] - 1]) == 1 and int(self.state[slot[1] - 1]) == 1 and int(self.state[slot[2] - 1]) == 1:
                self.winner = 1
                return 1
            elif int(self.state[slot[0] - 1]) == 2 and int(self.state[slot[1] - 1]) == 2 and int(self.state[slot[2] - 1]) == 2:
                self.winner = 2
                return 2
        if len([char for char in self.state if char == '0']) == 0:
            self.winner = -1
            return -1
        return 0
    
    # a static method to check the status of a game
    @staticmethod
    def game_status(state): 
        win_slots = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 4, 7], [2, 5, 8], [3, 6, 9], [1, 5, 9], [3, 5, 7]]
        for slot in win_slots:
            if int(state[slot[0] - 1]) == 1 and int(state[slot[1] - 1]) == 1 and int(state[slot[2] - 1]) == 1:
                return 1
            elif int(state[slot[0] - 1]) == 2 and int(state[slot[1] - 1]) == 2 and int(state[slot[2] - 1]) == 2:
                return 2
        if len([char for char in state if char == '0']) == 0:
            return -1
        return 0

new_game = TicTacToe()
new_game.print_state()
new_game.make_move(0)
new_game.print_state()
new_game.make_move(1)
new_game.print_state()

In [None]:
# the ML agent that will learn how to play Tic-Tac-Toe
class Agent:
    def __init__(self, learning_rate):
        # game states it has seen before and their corresponding values
        
        # the last state, so we can update the values approapriately after we make choices
        
        # how much the Agent should adjust values when finding good / bad states
        
        
    def get_value(self, state):
        pass
    
    def make_move(self, game, explore_prob=0.1):
        # check game state, and then update as needed
        
        # Looking at possible moves
        possible_moves, new_states = game.generate_possible_moves()
        vals = [self.get_value(state) for state in new_states]
        
        # Add some randomness to balance explore / exploit
        if np.random.random() < explore_prob:
            # Make exploratory move
            pass
        else:
            # Make exploitative move;
            pass
    
    def new_game(self):
        self.prev_state = None

In [None]:
# a basic, hard-coded opponent to test our bot against
class Opponent:
    def __init__(self, level=0):
        # 0 = random, 1 = win if possible, otherwise random, 2 = win and block losses, otherwise random
        self.level = level
    
    # make a random move
    def make_random_move(self, game):
        pass
    
    def make_move(self, game):
        possible_moves, new_states = game.generate_possible_moves()
        
        if self.level == 0:
            # Random move
            pass
    
        if self.level == 1:
            # Get a win if directly possible, otherwise random
            
            # random
            return self.make_random_move(game)
        
        if self.level == 2:
            # Get a win if possible, block an immediate loss, otherwise random
            
            # random
            return self.make_random_move(game)

In [None]:
# untrained tallies
wins = 0
ties = 0
losses = 0
total_games = 1000

# play total_games number of games, WITHOUT training / learning
for _ in range(total_games):
    pass

# output the benchmark results
print(f"Record: {wins}-{losses}-{ties}")
print(f"Win Percentage: {100 * wins / total_games}")

In [None]:
# tallies with training
training_games = 10000
trained_agent = Agent(0.05)

# play training_games number of games, training the same Agent
for _ in range(training_games):
    pass

In [None]:
# trained tallies
wins = 0
ties = 0
losses = 0
total_games = 1000

# play total_games number of games, to test the trained Agent
for _ in range(total_games):
    pass

# output the results of the trained agent and compare to benchmark
print(f"Record: {wins}-{losses}-{ties}")
print(f"Win Percentage: {100 * wins / total_games}")