In [3]:
import numpy as np

In [5]:
# Define the Tic-Tac-Toe board size (3x3)
BOARD_SIZE = 3

In [6]:
# Create an empty Tic-Tac-Toe board
board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)

In [7]:
# Function to check if a player has won
def check_win(player):
    # Check rows, columns, and diagonals
    for i in range(BOARD_SIZE):
        if np.all(board[i] == player) or np.all(board[:, i] == player):
            return True
    if np.all(np.diag(board) == player) or np.all(np.diag(np.fliplr(board)) == player):
        return True
    return False

In [8]:
# Function to check if the game is a draw
def check_draw():
    return np.all(board != 0)

In [9]:
# Function to make a move
def make_move(player, row, col):
    if board[row, col] == 0:
        board[row, col] = player

In [10]:
# Q-learning parameters
LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.9
EPSILON = 0.1
NUM_EPISODES = 10000

In [11]:
# Q-table initialization
q_table = np.zeros((3**9, BOARD_SIZE**2))

In [12]:
# Function to convert the board state to an integer
def state_to_int(state):
    return sum(state.reshape(-1) * (3 ** np.arange(BOARD_SIZE**2)))

In [13]:
# Q-learning training
for episode in range(NUM_EPISODES):
    board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)
    state = state_to_int(board)
    done = False

    while not done:
        if np.random.rand() < EPSILON:
            action = np.random.choice(BOARD_SIZE**2)
        else:
            action = np.argmax(q_table[state])

        row = action // BOARD_SIZE
        col = action % BOARD_SIZE
        make_move(1, row, col)
        new_state = state_to_int(board)

        if check_win(1):
            reward = 1
            done = True
        elif check_draw():
            reward = 0
            done = True
        else:
            reward = 0

        q_table[state, action] += LEARNING_RATE * (
            reward + DISCOUNT_FACTOR * np.max(q_table[new_state]) - q_table[state, action]
        )
        state = new_state

In [14]:
# Function to let the AI play against a human
def play_ai():
    board = np.zeros((BOARD_SIZE, BOARD_SIZE), dtype=int)
    done = False

    while not done:
        print(board)
        print("Your move (row and column, e.g., 0 1):")
        row, col = map(int, input().split())
        make_move(-1, row, col)

        if check_win(-1):
            print("You win!")
            done = True
        elif check_draw():
            print("It's a draw!")
            done = True
        else:
            ai_action = np.argmax(q_table[state_to_int(board)])
            row = ai_action // BOARD_SIZE
            col = ai_action % BOARD_SIZE
            make_move(1, row, col)

            if check_win(1):
                print("AI wins!")
                done = True

In [15]:
play_ai()

[[0 0 0]
 [0 0 0]
 [0 0 0]]
Your move (row and column, e.g., 0 1):
0 1
AI wins!


In [17]:
play_ai()

[[0 0 0]
 [0 0 0]
 [0 0 0]]
Your move (row and column, e.g., 0 1):
1 2
AI wins!
