In [None]:
import random

: 

In [None]:
BOARD_SIZE = 3
WINNING_COMBOS = [
    [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Rows
    [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Columns
    [0, 4, 8], [2, 4, 6]              # Diagonals
]

In [None]:
def print_board(board):
    for i in range(0, 9, BOARD_SIZE):
        print(' '.join(['X' if board[i+j] == 1 else 'O' if board[i+j] == -1 else '.' for j in range(BOARD_SIZE)]))
        

def is_winner(board, player):
    for combo in WINNING_COMBOS:
        if all(board[i] == player for i in combo):
            return True
    return False

def is_full(board):
    return all(cell != 0 for cell in board)

def get_empty_positions(board):
    return [i for i in range(len(board)) if board[i] == 0]

def make_move(board, position, player):
    board[position] = player

def get_opponent(player):
    return -player

In [None]:
class TicTacToeAgent:
    def __init__(self, player):
        self.player = player
        self.q_table = {}
        self.learning_rate = 0.1
        self.discount_factor = 0.9
        self.exploration_rate = 0.2

    def get_q_value(self, state, action):
        return self.q_table.get((tuple(state), action), 0)

    def update_q_value(self, state, action, reward, next_state, next_action):
        max_future_q = max(self.get_q_value(next_state, a) for a in get_empty_positions(next_state))
        current_q = self.get_q_value(state, action)
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_future_q - current_q)
        self.q_table[(tuple(state), action)] = new_q

    def choose_action(self, state):
        empty_positions = get_empty_positions(state)
        if random.random() < self.exploration_rate:
            return random.choice(empty_positions)
        q_values = [(self.get_q_value(state, pos), pos) for pos in empty_positions]
        return max(q_values)[1]

    def train(self, games=1000):
        for _ in range(games):
            state = [0] * 9
            current_player = 1
            while not is_full(state) and not is_winner(state, 1) and not is_winner(state, -1):
                action = self.choose_action(state)
                make_move(state, action, current_player)
                if is_winner(state, current_player):
                    reward = 1 if current_player == self.player else -1
                else:
                    reward = 0
                next_state = state.copy()
                next_action = self.choose_action(next_state)
                self.update_q_value(state, action, reward, next_state, next_action)
                state = next_state
                current_player = get_opponent(current_player)


In [None]:
def play_game(agent_x, agent_o):
    board = [0] * 9
    current_player = 1  # X starts first
    while not is_full(board) and not is_winner(board, 1) and not is_winner(board, -1):
        if current_player == 1:
            action = agent_x.choose_action(board)
        else:
            action = agent_o.choose_action(board)
        make_move(board, action, current_player)
        current_player = get_opponent(current_player)
    
    print_board(board)
    if is_winner(board, 1):
        return 1
    elif is_winner(board, -1):
        return -1
    else:
        return 0

# Training the agent
agent_x = TicTacToeAgent(1)
agent_o = TicTacToeAgent(-1)
agent_x.train(games=1000)

# Test the trained agent by playing a game
result = play_game(agent_x, agent_o)
print(f"Game result: {'X wins' if result == 1 else 'O wins' if result == -1 else 'Draw'}")
