In [1]:
import numpy as np
import random

# Create empty board
def create_board():
    return np.zeros(9)

# Make a move on the board
def make_move(board, position, player):
    board[position] = player
    return board

# Check if a player has won
def check_winner(board, player):
    # Define winning combinations
    wins = [[0,1,2], [3,4,5], [6,7,8],  # Rows
            [0,3,6], [1,4,7], [2,5,8],  # Columns
            [0,4,8], [2,4,6]]           # Diagonals

    for win in wins:
        if all(board[i] == player for i in win):
            return True
    return False

# Check if game is a draw
def check_draw(board):
    return 0 not in board

# Get list of empty positions
def get_empty_positions(board):
    return [i for i, value in enumerate(board) if value == 0]

# Q-learning functions
def get_state_key(board):
    return str(list(board))

# Initialize Q-table (dictionary to store state-action values)
q_table = {}

# Choose action based on Q-values or random exploration
def choose_action(board, epsilon=0.1):
    if random.random() < epsilon:
        return random.choice(get_empty_positions(board))

    state = get_state_key(board)
    if state not in q_table:
        return random.choice(get_empty_positions(board))

    return max(get_empty_positions(board),
              key=lambda x: q_table[state].get(x, 0))

# Train the AI
def train_ai(episodes=5000):
    for episode in range(episodes):
        board = create_board()

        while True:
            # AI move (Player 1)
            state = get_state_key(board)
            if state not in q_table:
                q_table[state] = {}

            pos = choose_action(board)
            board = make_move(board, pos, 1)

            # Check if AI wins
            if check_winner(board, 1):
                q_table[state][pos] = 1
                break

            # Check draw
            if check_draw(board):
                q_table[state][pos] = 0.5
                break

            # Random opponent move (Player -1)
            opponent_pos = random.choice(get_empty_positions(board))
            board = make_move(board, opponent_pos, -1)

            # Check if opponent wins
            if check_winner(board, -1):
                q_table[state][pos] = -1
                break

            # Check draw
            if check_draw(board):
                q_table[state][pos] = 0.5
                break

# Play game against AI
def play_game():
    board = create_board()

    while True:
        # Print current board
        print("\nCurrent board:")
        print(board.reshape(3,3))

        # AI move
        ai_pos = choose_action(board, epsilon=0)
        board = make_move(board, ai_pos, 1)
        print("\nAI moved at position:", ai_pos)
        print(board.reshape(3,3))

        # Check if AI wins
        if check_winner(board, 1):
            print("AI wins!")
            break

        # Check draw
        if check_draw(board):
            print("It's a draw!")
            break

        # Player move
        while True:
            try:
                pos = int(input("\nEnter your move (0-8): "))
                if pos in get_empty_positions(board):
                    break
                print("Invalid move, try again")
            except:
                print("Invalid input, try again")

        board = make_move(board, pos, -1)

        # Check if player wins
        if check_winner(board, -1):
            print("\nYou win!")
            print(board.reshape(3,3))
            break

        # Check draw
        if check_draw(board):
            print("\nIt's a draw!")
            print(board.reshape(3,3))
            break

# Main game
print("Training AI...")
train_ai()
print("Training complete!")
print("\nGame starts!")
print("You are O (player -1)")
print("AI is X (player 1)")
print("Positions are numbered 0-8, left to right, top to bottom")
print("Example board with positions:")
print(np.arange(9).reshape(3,3))

play_game()

Training AI...
Training complete!

Game starts!
You are O (player -1)
AI is X (player 1)
Positions are numbered 0-8, left to right, top to bottom
Example board with positions:
[[0 1 2]
 [3 4 5]
 [6 7 8]]

Current board:
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]

AI moved at position: 0
[[1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]

Enter your move (0-8): 2

Current board:
[[ 1.  0. -1.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]

AI moved at position: 1
[[ 1.  1. -1.]
 [ 0.  0.  0.]
 [ 0.  0.  0.]]

Enter your move (0-8): 6

Current board:
[[ 1.  1. -1.]
 [ 0.  0.  0.]
 [-1.  0.  0.]]

AI moved at position: 4
[[ 1.  1. -1.]
 [ 0.  1.  0.]
 [-1.  0.  0.]]

Enter your move (0-8): 8

Current board:
[[ 1.  1. -1.]
 [ 0.  1.  0.]
 [-1.  0. -1.]]

AI moved at position: 7
[[ 1.  1. -1.]
 [ 0.  1.  0.]
 [-1.  1. -1.]]
AI wins!
