In [None]:
import random
from collections import defaultdict

# Q-Learning Agent
Q = defaultdict(lambda: defaultdict(float))

def get_action(board, eps=0.2):
    moves = [i for i, x in enumerate(board) if x == ' ']
    state = ''.join(board)
    if random.random() < eps:
        return random.choice(moves)
    return max(moves, key=lambda a: Q[state][a])

def check_win(board):
    wins = [(0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,4,7),(2,5,8),(0,4,8),(2,4,6)]
    for i, j, k in wins:
        if board[i] == board[j] == board[k] != ' ':
            return board[i]
    return 'D' if ' ' not in board else None

def update_q(states, actions, reward):
    for s, a in zip(states, actions):
        Q[s][a] += 0.1 * (reward - Q[s][a])

# Train
print("Training...")
for ep in range(50000):
    board, states, actions = [' ']*9, [], []
    while True:
        # AI turn
        state = ''.join(board)
        action = get_action(board)
        board[action] = 'X'
        states.append(state)
        actions.append(action)
        
        w = check_win(board)
        if w:
            update_q(states, actions, 1 if w=='X' else -1 if w=='O' else 0)
            break
        
        # Opponent
        opp_moves = [i for i, x in enumerate(board) if x == ' ']
        board[random.choice(opp_moves)] = 'O'
        if check_win(board):
            update_q(states, actions, -1)
            break
    
    if (ep + 1) % 10000 == 0:
        print(f"Episode {ep + 1}/50000")

# Play
print("\nLet's play! You are O, AI is X. Positions 0-8:")
print("0|1|2\n-+-+-\n3|4|5\n-+-+-\n6|7|8\n")
board = [' ']*9

while True:
    # AI
    action = get_action(board, 0)
    board[action] = 'X'
    print(f"AI plays {action}")
    print(f"\n {board[0]}|{board[1]}|{board[2]}\n-+-+-\n {board[3]}|{board[4]}|{board[5]}\n-+-+-\n {board[6]}|{board[7]}|{board[8]}\n")
    
    w = check_win(board)
    if w:
        print("AI wins!" if w=='X' else "You win!" if w=='O' else "Draw!")
        break
    
    # You
    while True:
        try:
            m = int(input("Your move (0-8): "))
            if board[m] == ' ':
                board[m] = 'O'
                break
            print("Spot taken!")
        except:
            print("Invalid input!")
    
    w = check_win(board)
    if w:
        print(f"\n {board[0]}|{board[1]}|{board[2]}\n-+-+-\n {board[3]}|{board[4]}|{board[5]}\n-+-+-\n {board[6]}|{board[7]}|{board[8]}\n")
        print("AI wins!" if w=='X' else "You win!" if w=='O' else "Draw!")
        break

Training...
Episode 10000/50000
Episode 20000/50000
Episode 30000/50000
Episode 40000/50000
Episode 50000/50000

Let's play! You are O, AI is X. Positions 0-8:
0|1|2
-+-+-
3|4|5
-+-+-
6|7|8

AI plays 0

 X| | 
-+-+-
  | | 
-+-+-
  | | 



Your move (0-8):  2


AI plays 6

 X| |O
-+-+-
  | | 
-+-+-
 X| | 



Your move (0-8):  5


AI plays 3

 X| |O
-+-+-
 X| |O
-+-+-
 X| | 

AI wins!
