In [24]:
import numpy as np

# Constants
PLAYER_X = 1
PLAYER_O = -1
EMPTY = 0
BOARD_SIZE = 3
Q_LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.9
EPSILON = 0.1
EPISODES = 10000

# Initialize the Q-table
q_table = np.zeros((3 ** (BOARD_SIZE ** 2), BOARD_SIZE ** 2))

# Convert the state to a unique integer for indexing the Q-table
def state_to_index(state):
    index = 0
    for i in range(BOARD_SIZE):
        for j in range(BOARD_SIZE):
            index = index * 3 + state[i][j] + 1
    return index - 1

# Generate all possible board states
def generate_all_states():
    states = []
    for _ in range(3 ** (BOARD_SIZE ** 2)):
        state = []
        num = _
        for _ in range(BOARD_SIZE):
            row = []
            for _ in range(BOARD_SIZE):
                row.append(num % 3 - 1)
                num //= 3
            state.append(row)
        states.append(state)
    return states

# Check if the game is over
def is_game_over(state):
    for i in range(BOARD_SIZE):
        for j in range(BOARD_SIZE):
            if state[i][j] == EMPTY:
                return False
    return True

# Check if a player has won
def check_winner(state, player):
    for i in range(BOARD_SIZE):
        if all(state[i][j] == player for j in range(BOARD_SIZE)):
            return True
        if all(state[j][i] == player for j in range(BOARD_SIZE)):
            return True
    if all(state[i][i] == player for i in range(BOARD_SIZE)) or all(state[i][BOARD_SIZE - i - 1] == player for i in range(BOARD_SIZE)):
        return True
    return False

# Make a random valid move
def make_random_move(state):
    empty_cells = [(i, j) for i in range(BOARD_SIZE) for j in range(BOARD_SIZE) if state[i][j] == EMPTY]
    return empty_cells[np.random.randint(len(empty_cells))]

# Choose an action using epsilon-greedy policy
def choose_action(state, q_table, epsilon):
    if np.random.rand() < epsilon:
        return make_random_move(state)
    else:
        index = state_to_index(state)
        return divmod(np.argmax(q_table[index]), BOARD_SIZE)

# Update the Q-table
def update_q_table(q_table, state, action, reward, next_state):
    index = state_to_index(state)
    next_index = state_to_index(next_state)
    max_q_value = np.max(q_table[next_index])
    q_table[index][action[0] * BOARD_SIZE + action[1]] += Q_LEARNING_RATE * (reward + DISCOUNT_FACTOR * max_q_value - q_table[index][action[0] * BOARD_SIZE + action[1]])

# Train the AI player using Q-learning
def train_q_learning():
    states = generate_all_states()
    for episode in range(EPISODES):
        state = [[EMPTY] * BOARD_SIZE for _ in range(BOARD_SIZE)]
        player = PLAYER_X
        while not is_game_over(state):
            action = choose_action(state, q_table, EPSILON)
            next_state = [row[:] for row in state]
            next_state[action[0]][action[1]] = player
            if check_winner(next_state, player):
                reward = 1  # Player wins
            elif is_game_over(next_state):
                reward = 0  # Draw
            else:
                reward = 0.5  # Intermediate state
            update_q_table(q_table, state, action, reward, next_state)
            state = next_state
            player *= -1

# Play a game against the AI player
def play_game():
    state = [[EMPTY] * BOARD_SIZE for _ in range(BOARD_SIZE)]
    player = PLAYER_X
    while not is_game_over(state):
        print("Current board:")
        for row in state:
            print(" ".join(["X" if cell == PLAYER_X else "O" if cell == PLAYER_O else "-" for cell in row]))

        if player == PLAYER_X:
            try:
                row, col = map(int, input("Enter your move (row and column) Number: ").split())
                if 0 <= row < BOARD_SIZE and 0 <= col < BOARD_SIZE and state[row][col] == EMPTY:
                    state[row][col] = PLAYER_X
                else:
                    print("Invalid move. Try again.")
                    continue
            except ValueError:
                print("Invalid input. Enter two integers separated by a space.")
        else:
            action = choose_action(state, q_table, 0)  # Choose the best action
            state[action[0]][action[1]] = PLAYER_O

        player *= -1

    for row in state:
        print(" ".join(["X" if cell == PLAYER_X else "O" if cell == PLAYER_O else "-" for cell in row]))
    if check_winner(state, PLAYER_X):
        print("You win!")
    elif check_winner(state, PLAYER_O):
        print("AI wins!")
    else:
        print("It's a draw!")

if __name__ == "__main__":
    train_q_learning()
    play_game()


Current board:
- - -
- - -
- - -
Enter your move (row and column) Number: 2 2
Current board:
- - -
- - -
- - X
Current board:
- O -
- - -
- - X
Enter your move (row and column) Number: 1 1
Current board:
- O -
- X -
- - X
Current board:
- O -
- X -
O - X
Enter your move (row and column) Number: 1 2
Current board:
- O -
- X X
O - X
Current board:
O O -
- X X
O - X
Enter your move (row and column) Number: 0 2
Current board:
O O X
- X X
O - X
Current board:
O O X
- X X
O - X
Enter your move (row and column) Number: 1 0
Current board:
O O X
X X X
O - X
Current board:
O O X
X X X
O - X
Enter your move (row and column) Number: 2 1
O O X
X X X
O X X
You win!
