In [1]:
import numpy as np
import random
import pickle

In [2]:
class TicTacToeQLearning:
    def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.2):
        self.q_table = {}
        self.alpha, self.gamma, self.epsilon = alpha, gamma, epsilon
        self.state_history = []

    def get_q_value(self, state, action):
        return self.q_table.get((tuple(state), action), 0.0)

    def choose_action(self, board, available_moves):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(available_moves)
        q_values = {move: self.get_q_value(board, move) for move in available_moves}
        return max(q_values, key=q_values.get)

    def update_q_table(self, reward):
        for state, action in reversed(self.state_history):
            self.q_table[(tuple(state), action)] = self.get_q_value(state, action) + self.alpha * (reward - self.get_q_value(state, action))
            reward *= self.gamma
        self.state_history = []

    def save_q_table(self, filename='q_table.pkl'):
        with open(filename, 'wb') as f:
            pickle.dump(self.q_table, f)

    def load_q_table(self, filename='q_table.pkl'):
        with open(filename, 'rb') as f:
            self.q_table = pickle.load(f)

In [3]:
def train_agent(episodes=10000):
    agent = TicTacToeQLearning()
    for _ in range(episodes):
        board, available_moves, player = [0] * 9, list(range(9)), 1
        while available_moves:
            action = agent.choose_action(board, available_moves)
            board[action] = player
            agent.state_history.append((board[:], action))
            available_moves.remove(action)
            if check_winner(board, player):
                agent.update_q_table(1 if player == 1 else -1)
                break
            player *= -1
        else:
            agent.update_q_table(0)
    agent.save_q_table()
    return agent

In [4]:
def check_winner(board, player):
    return any(all(board[i] == player for i in state) for state in [
        (0, 1, 2), (3, 4, 5), (6, 7, 8),
        (0, 3, 6), (1, 4, 7), (2, 5, 8),
        (0, 4, 8), (2, 4, 6)
    ])

In [5]:
def play_interactive_game():
    agent = TicTacToeQLearning()
    agent.load_q_table()
    board, available_moves, player = [0] * 9, list(range(9)), 1
    while available_moves:
        print_board(board)
        move = int(input("Enter your move (0-8): ")) if player == 1 else agent.choose_action(board, available_moves)
        if move not in available_moves:
            continue
        print(f"AI chooses: {move}" if player == -1 else "")
        board[move] = player
        available_moves = [m for m in available_moves if m != move]
        if check_winner(board, player):
            print_board(board)
            print("Player 1 wins!" if player == 1 else "AI wins!")
            return
        player *= -1
    print_board(board)
    print("It's a draw!")

In [6]:
def print_board(board):
    symbols = {1: 'X', -1: 'O', 0: ' '}
    print("\n".join([" | ".join(symbols[board[i]] for i in range(j, j + 3)) for j in range(0, 9, 3)]), "\n")

In [7]:
if __name__ == '__main__':
    train_agent()
    print("Training Complete! Q-Table Saved.")
    play_interactive_game()

Training Complete! Q-Table Saved.
  |   |  
  |   |  
  |   |   

Enter your move (0-8): 2

  |   | X
  |   |  
  |   |   

AI chooses: 0
O |   | X
  |   |  
  |   |   

Enter your move (0-8): 4

O |   | X
  | X |  
  |   |   

AI chooses: 1
O | O | X
  | X |  
  |   |   

Enter your move (0-8): 8

O | O | X
  | X |  
  |   | X 

AI chooses: 3
O | O | X
O | X |  
  |   | X 

Enter your move (0-8): 7

O | O | X
O | X |  
  | X | X 

AI chooses: 6
O | O | X
O | X |  
O | X | X 

AI wins!
