In [1]:
import numpy as np

class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3))  # Initialize a 3x3 board
        self.current_player = 1  # Player 1 starts (X)

    def reset(self):
        self.board = np.zeros((3, 3))
        self.current_player = 1

    def make_move(self, row, col):
        if self.board[row, col] == 0:  # Check if the cell is empty
            self.board[row, col] = self.current_player
            return True
        return False

    def check_winner(self):
        for i in range(3):
            if abs(sum(self.board[i, :])) == 3:  # Check rows
                return self.current_player
            if abs(sum(self.board[:, i])) == 3:  # Check columns
                return self.current_player
        if abs(sum(np.diag(self.board))) == 3:  # Check diagonal
            return self.current_player
        if abs(sum(np.diag(np.fliplr(self.board)))) == 3:  # Check anti-diagonal
            return self.current_player
        return 0 if np.all(self.board != 0) else None  # Draw or game not finished

    def switch_player(self):
        self.current_player = 2 if self.current_player == 1 else 1

    def print_board(self):
        print(self.board)


In [2]:
class QLearningAgent:
    def __init__(self):
        self.q_table = {}  # Dictionary to hold the Q-values
        self.learning_rate = 0.1
        self.discount_factor = 0.9
        self.exploration_prob = 1.0  # Start with exploration
        self.exploration_decay = 0.995
        self.min_exploration_prob = 0.1

    def get_state_key(self, state):
        return str(state.reshape(9))  # Convert board to a tuple key

    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(np.argwhere(state.flatten() == 0).flatten())  # Random action
        else:
            state_key = self.get_state_key(state)
            return np.argmax(self.q_table.get(state_key, np.zeros(9)))  # Best action

    def update_q_table(self, state, action, reward, next_state):
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)
        current_q = self.q_table.get(state_key, np.zeros(9))[action]
        max_next_q = np.max(self.q_table.get(next_state_key, np.zeros(9)))

        # Q-learning update rule
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q)
        if state_key not in self.q_table:
            self.q_table[state_key] = np.zeros(9)
        self.q_table[state_key][action] = new_q

    def decay_exploration(self):
        if self.exploration_prob > self.min_exploration_prob:
            self.exploration_prob *= self.exploration_decay


In [3]:
def test(agent, games=100):
    agent.exploration_prob = 0.0
    wins = 0

    for _ in range(games):
        game = TicTacToe()
        state = game.board.copy()

        while True:
            action = agent.choose_action(state)
            row, col = divmod(action, 3)
            if game.make_move(row, col):
                winner = game.check_winner()
                if winner is not None:
                    if winner == 1:
                        wins += 1
                    break

                # Random move for the opponent
                available_actions = np.argwhere(game.board.flatten() == 0).flatten()
                opponent_action = np.random.choice(available_actions)
                opponent_row, opponent_col = divmod(opponent_action, 3)
                game.make_move(opponent_row, opponent_col)

                winner = game.check_winner()
                if winner is not None:
                    if winner == 2:
                        break
                    # Continue the game if it's a draw

                state = game.board.copy()

    print(f'Win rate: {wins / games * 100:.2f}%')

# Create the agent and train it
agent = QLearningAgent()
train(agent, episodes=10000)

# Test the trained model
test(agent, games=100)


NameError: name 'train' is not defined