#Tic-Tac-Toe

Tic Tac Toe game implemented with Q-learning in Python

**Imports and Constants**

In [None]:
import pygame
import numpy as np
import random
import pickle

**Game Settings**
*   WIDTH, HEIGHT: Dimensions of the game window.
*   LINE_WIDTH: Width of the lines drawn for the grid.
*   BOARD_ROWS, BOARD_COLS: Tic Tac Toe is 3x3, so both are set to 3.
*   SQUARE_SIZE: Size of each cell in the grid.

In [None]:
WIDTH, HEIGHT = 300, 300
LINE_WIDTH = 10
BOARD_ROWS, BOARD_COLS = 3, 3
SQUARE_SIZE = WIDTH // BOARD_COLS

WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
GREEN = (0, 255, 0)

**Q-learning Settings**

In [None]:
epsilon = 0.9  # Exploration rate
alpha = 0.2    # Learning rate
gamma = 0.9    # Discount factor

**Game State Constants**

Constants representing empty cells, player X, and player O.

In [None]:
EMPTY = 0
X = 1
O = -1

**TicTacToe Class**
\
Initializes the game board as a 3x3 grid of zeros (empty).Sets the current player to X.


Key Methods:
\
reset: Resets the board and current player.
\
is_winner: Checks if the given player has won by checking rows, columns, and diagonals.
\
is_full: Checks if the board is full (no empty cells).
\
available_actions: Returns a list of coordinates for empty cells.
\
make_move: Updates the board with the current player's move and switches to the other player.

In [None]:
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))  # Initialize the game board
        self.current_player = X  # Set the current player to X

    def reset(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))  # Reset the board
        self.current_player = X  # Reset the current player to X

    def is_winner(self, player):
        # Check rows, columns, and diagonals for a win
        for row in range(BOARD_ROWS):
            if np.all(self.board[row, :] == player):
                return True
        for col in range(BOARD_COLS):
            if np.all(self.board[:, col] == player):
                return True
        if np.all(np.diag(self.board) == player) or np.all(np.diag(np.fliplr(self.board)) == player):
            return True
        return False

    def is_full(self):
        return np.all(self.board != EMPTY)  # Check if the board is full

    def available_actions(self):
        # Return a list of available actions (empty spaces)
        return [(r, c) for r in range(BOARD_ROWS) for c in range(BOARD_COLS) if self.board[r, c] == EMPTY]

    def make_move(self, row, col):
        # Make a move if the space is empty
        if self.board[row, col] == EMPTY:
            self.board[row, col] = self.current_player
            self.current_player = O if self.current_player == X else X
            return True
        return False

**QLearningAgent Class**
\
Initializes an empty Q-table to store state-action values.

Key Methods
\
save_q_table: Saves the Q-table to a file using pickle.
\
load_q_table: Loads the Q-table from a file, initializing it if the file does not exist.
\
get_state_key: Converts the board state to a string key for the Q-table.
\
choose_action: Chooses an action using the epsilon-greedy strategy:
*   With probability epsilon, it explores by selecting a random action.
*   Otherwise, it selects the action with the highest Q-value from the Q-table.

learn: Updates the Q-value for the taken action using the Q-learning formula based on the received reward and estimated future rewards.

In [None]:
class QLearningAgent:
    def __init__(self):
        self.q_table = {}  # Initialize the Q-table

    def save_q_table(self, filename='q_table.pkl'):
        # Save the Q-table to a file
        try:
            with open(filename, 'wb') as f:
                pickle.dump(self.q_table, f)  # Save Q-table to file
            print(f"Q-table saved to {filename}")  # Confirm successful save
        except Exception as e:
            print(f"Error saving Q-table: {e}")

    def load_q_table(self, filename='q_table.pkl'):
        # Load the Q-table from a file
        try:
            with open(filename, 'rb') as f:
                self.q_table = pickle.load(f)  # Load Q-table
                print("Q-table loaded successfully.")
        except FileNotFoundError:
            print("Q-table file not found, starting fresh.")
            self.q_table = {}  # Initialize to empty if no Q-table file exists
        except Exception as e:
            print(f"Error loading Q-table: {e}")

    def get_state_key(self, board):
        return str(board.reshape(9))  # Convert board state to a key

    def choose_action(self, state, available_actions):
        # Choose an action based on epsilon-greedy strategy
        if random.uniform(0, 1) < epsilon:
            return random.choice(available_actions)  # Explore
        else:
            q_values = [self.q_table.get((state, (r, c)), 0) for r, c in available_actions]
            max_q = max(q_values)  # Get maximum Q-value
            return available_actions[q_values.index(max_q)]  # Choose the best action

    def learn(self, state, action, reward, next_state, available_actions):
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)
        current_q = self.q_table.get((state_key, action), 0)

        # Get maximum Q-value of the next state
        future_q = max([self.q_table.get((next_state_key, a), 0) for a in available_actions], default=0)

        # Update the Q-value for the state-action pair
        self.q_table[(state_key, action)] = current_q + alpha * (reward + gamma * future_q - current_q)

        print(f"Updated Q-value for {state_key}, {action}: {self.q_table[(state_key, action)]}")

**Drawing the Board**

This function draws the game grid and the current state of the board, rendering Xs and Os as per the current game state.

In [None]:
def draw_board(board):
    # Draw grid lines
    for r in range(1, BOARD_ROWS):
        pygame.draw.line(screen, BLACK, (0, r * SQUARE_SIZE), (WIDTH, r * SQUARE_SIZE), LINE_WIDTH)
    for c in range(1, BOARD_COLS):
        pygame.draw.line(screen, BLACK, (c * SQUARE_SIZE, 0), (c * SQUARE_SIZE, HEIGHT), LINE_WIDTH)

    # Draw X and O
    for r in range(BOARD_ROWS):
        for c in range(BOARD_COLS):
            if board[r, c] == X:
                pygame.draw.line(screen, GREEN, (c * SQUARE_SIZE + 10, r * SQUARE_SIZE + 10), (c * SQUARE_SIZE + SQUARE_SIZE - 10, r * SQUARE_SIZE + SQUARE_SIZE - 10), LINE_WIDTH)
                pygame.draw.line(screen, GREEN, (c * SQUARE_SIZE + SQUARE_SIZE - 10, r * SQUARE_SIZE + 10), (c * SQUARE_SIZE + 10, r * SQUARE_SIZE + SQUARE_SIZE - 10), LINE_WIDTH)
            elif board[r, c] == O:
                pygame.draw.circle(screen, RED, (c * SQUARE_SIZE + SQUARE_SIZE // 2, r * SQUARE_SIZE + SQUARE_SIZE // 2), SQUARE_SIZE // 3)

**Main Game Loop**

Game Initialization: Creates instances of TicTacToe and QLearningAgent, and loads the Q-table.

Game Loop: Continues while the game is running:
*   Clears the screen and draws the board.
*   Checks for a win or draw condition.
*   If the game continues, the AI makes a move based on the current state and updates the Q-table.

End of Game: Saves the Q-table before quitting.

In [None]:
# Initialize Pygame
pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption('Tic Tac Toe with Q-Learning')

def main():
    game = TicTacToe()  # Create a TicTacToe instance
    agent = QLearningAgent()  # Create a QLearningAgent instance

    # Load Q-table
    agent.load_q_table()

    clock = pygame.time.Clock()
    running = True

    while running:
        screen.fill(WHITE)  # Clear the screen
        draw_board(game.board)  # Draw the game board

        # Check if the game has ended and print results
        if game.is_winner(X):
            print("X wins!")
            reward = -1  # Punish AI if X wins
            game.reset()  # Reset the game
            agent.save_q_table()  # Save Q-table
            continue
        elif game.is_winner(O):
            print("O wins!")
            reward = 1  # Reward AI if O wins
            game.reset()  # Reset the game
            agent.save_q_table()  # Save Q-table
            continue
        elif game.is_full():
            print("It's a draw!")
            reward = 0  # No reward for a draw
            game.reset()  # Reset the game
            agent.save_q_table()  # Save Q-table
            continue
        else:
            reward = -0.1  # Small penalty for continuing the game

        # If the game continues, AI makes a choice
        state = game.board.copy()
        available_actions = game.available_actions()

        # If there are available actions, AI performs action
        if available_actions:
            action = agent.choose_action(agent.get_state_key(state), available_actions)
            game.make_move(action[0], action[1])

            # Update Q-table at the end of each round
            next_state = game.board.copy()
            agent.learn(state, action, reward, next_state, available_actions)

        pygame.display.flip()  # Update the display
        clock.tick(1)  # Control the game frame rate

    # Save Q-table after the game ends
    agent.save_q_table()

    pygame.quit()  # Quit Pygame

**Exception Handling**

Handles graceful termination and saves the Q-table if an error occurs or if the game is interrupted.

In [None]:
if __name__ == "__main__":
    try:
        main()  # Run the main function
    except KeyboardInterrupt:
        print("Training interrupted. Saving Q-table...")
        agent.save_q_table()  # Save on interrupt
    except Exception as e:
        print(f"An error occurred: {e}")
        agent.save_q_table()  # Save on exception

# AI self-training code

This code file contains the training part of the Q-learning algorithm for the Tic Tac Toe game. It simulates an agent playing against itself for 3000 rounds, optimizing its strategy by continuously updating the Q value. The code includes the following elements:

*   Initialize the Q table and related parameters (such as epsilon, alpha, gamma).
*   The main loop of the game, where the agent selects actions based on the current strategy and updates the Q value.
*   The results of each training round are recorded for subsequent analysis.

In [None]:
import pygame
import numpy as np
import random
import pickle

# Game settings
WIDTH, HEIGHT = 300, 300
LINE_WIDTH = 10
BOARD_ROWS, BOARD_COLS = 3, 3
SQUARE_SIZE = WIDTH // BOARD_COLS

# Colors
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
GREEN = (0, 255, 0)

# Q-learning settings
epsilon = 0.9  # Exploration rate
alpha = 0.2    # Learning rate
gamma = 0.9    # Discount factor

# Game state
EMPTY = 0
X = 1
O = -1

class TicTacToe:
    def __init__(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.current_player = X

    def reset(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.current_player = X

    def is_winner(self, player):
        for row in range(BOARD_ROWS):
            if np.all(self.board[row, :] == player):
                return True
        for col in range(BOARD_COLS):
            if np.all(self.board[:, col] == player):
                return True
        if np.all(np.diag(self.board) == player) or np.all(np.diag(np.fliplr(self.board)) == player):
            return True
        return False

    def is_full(self):
        return np.all(self.board != EMPTY)

    def available_actions(self):
        return [(r, c) for r in range(BOARD_ROWS) for c in range(BOARD_COLS) if self.board[r, c] == EMPTY]

    def make_move(self, row, col):
        if self.board[row, col] == EMPTY:
            self.board[row, col] = self.current_player
            self.current_player = O if self.current_player == X else X
            return True
        return False

class QLearningAgent:
    def __init__(self):
        self.q_table = {}

    def save_q_table(self, filename='q_table.pkl'):
        try:
            with open(filename, 'wb') as f:
                pickle.dump(self.q_table, f)  # Save Q-table to file
            print(f"Q-table saved to {filename}")  # Ensure save is successful
        except Exception as e:
            print(f"Error saving Q-table: {e}")

    def load_q_table(self, filename='q_table.pkl'):
        try:
            with open(filename, 'rb') as f:
                self.q_table = pickle.load(f)  # Load Q-table from file
                print("Q-table loaded successfully.")
        except FileNotFoundError:
            print("Q-table file not found, starting fresh.")
            self.q_table = {}  # Initialize empty Q-table if file doesn't exist
        except Exception as e:
            print(f"Error loading Q-table: {e}")

    def get_state_key(self, board):
        return str(board.reshape(9))

    def choose_action(self, state, available_actions):
        if random.uniform(0, 1) < epsilon:
            return random.choice(available_actions)
        else:
            q_values = [self.q_table.get((state, (r, c)), 0) for r, c in available_actions]
            max_q = max(q_values)
            return available_actions[q_values.index(max_q)]

    def learn(self, state, action, reward, next_state, available_actions):
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)
        current_q = self.q_table.get((state_key, action), 0)

        # Get maximum Q-value of the next state
        future_q = max([self.q_table.get((next_state_key, a), 0) for a in available_actions], default=0)

        # Update the Q-value for the state-action pair
        self.q_table[(state_key, action)] = current_q + alpha * (reward + gamma * future_q - current_q)

        #print(f"Updated Q-value for {state_key}, {action}: {self.q_table[(state_key, action)]}")

def draw_board(board):
    # Draw grid lines
    for r in range(1, BOARD_ROWS):
        pygame.draw.line(screen, BLACK, (0, r * SQUARE_SIZE), (WIDTH, r * SQUARE_SIZE), LINE_WIDTH)
    for c in range(1, BOARD_COLS):
        pygame.draw.line(screen, BLACK, (c * SQUARE_SIZE, 0), (c * SQUARE_SIZE, HEIGHT), LINE_WIDTH)

    # Draw X and O
    for r in range(BOARD_ROWS):
        for c in range(BOARD_COLS):
            if board[r, c] == X:
                pygame.draw.line(screen, GREEN, (c * SQUARE_SIZE + 10, r * SQUARE_SIZE + 10), (c * SQUARE_SIZE + SQUARE_SIZE - 10, r * SQUARE_SIZE + SQUARE_SIZE - 10), LINE_WIDTH)
                pygame.draw.line(screen, GREEN, (c * SQUARE_SIZE + SQUARE_SIZE - 10, r * SQUARE_SIZE + 10), (c * SQUARE_SIZE + 10, r * SQUARE_SIZE + SQUARE_SIZE - 10), LINE_WIDTH)
            elif board[r, c] == O:
                pygame.draw.circle(screen, RED, (c * SQUARE_SIZE + SQUARE_SIZE // 2, r * SQUARE_SIZE + SQUARE_SIZE // 2), SQUARE_SIZE // 3)

# Initialize Pygame
pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption('Tic Tac Toe with Q-Learning')

def main():
    game = TicTacToe()
    agent = QLearningAgent()

    # Load Q-table
    agent.load_q_table()

    clock = pygame.time.Clock()
    running = True

    train_rounds = 3000  # Maximum number of training rounds
    current_round = 0  # Current training round

    while running and current_round < train_rounds:
        screen.fill(WHITE)
        draw_board(game.board)

        # Check if the game is over and print the result
        if game.is_winner(X):
            print("X wins!")
            reward = -1  # Penalize AI if X wins
            game.reset()  # Reset the game
            current_round += 1  # Increase training rounds after game ends
        elif game.is_winner(O):
            print("O wins!")
            reward = 1  # Reward AI if O wins
            game.reset()  # Reset the game
            current_round += 1  # Increase training rounds after game ends
        elif game.is_full():
            print("It's a draw!")
            reward = 0  # Zero reward for a draw
            game.reset()  # Reset the game
            current_round += 1  # Increase training rounds after game ends
        else:
            reward = -0.1  # Small penalty if the game is ongoing

        # AI makes a move if the game is still ongoing
        state = game.board.copy()
        available_actions = game.available_actions()

        # If there are available actions, AI makes a choice
        if available_actions:
            action = agent.choose_action(agent.get_state_key(state), available_actions)
            game.make_move(action[0], action[1])

            # Update Q-table at the end of each round
            next_state = game.board.copy()
            agent.learn(state, action, reward, next_state, available_actions)

        pygame.display.flip()
        clock.tick(30)  # Control the frame rate of the game

        # Exit after 3000 rounds
        if current_round >= train_rounds:
            print("Training finished after 3000 rounds.")
            break

    # Save Q-table after the game finishes
    agent.save_q_table()

    pygame.quit()

if __name__ == "__main__":
    main()

# Code for the player to play against the agent

Explanation:
This code file allows the player to play Tic Tac Toe against a trained Q-learning agent. The player can input their actions, and the agent responds based on the trained Q values. The code includes the following elements:

*   Load the trained Q table.
*   Provide a user interface that allows the player to select actions and displays the current game state.
*   Implement the game win and loss logic and provide result feedback after the game is over.

In [None]:
import pygame
import numpy as np
import random
import pickle

# Game settings
WIDTH, HEIGHT = 300, 300
LINE_WIDTH = 10
BOARD_ROWS, BOARD_COLS = 3, 3
SQUARE_SIZE = WIDTH // BOARD_COLS

# Colors
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)

# Q-learning settings
epsilon = 0.5  # Exploration rate
alpha = 0.5    # Learning rate
gamma = 0.9    # Discount factor

# Game state
EMPTY = 0
X = 1
O = -1

class TicTacToe:
    def __init__(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.current_player = X

    def reset(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.current_player = X

    def is_winner(self, player):
        for row in range(BOARD_ROWS):
            if np.all(self.board[row, :] == player):
                return True
        for col in range(BOARD_COLS):
            if np.all(self.board[:, col] == player):
                return True
        if np.all(np.diag(self.board) == player) or np.all(np.diag(np.fliplr(self.board)) == player):
            return True
        return False

    def is_full(self):
        return np.all(self.board != EMPTY)

    def available_actions(self):
        return [(r, c) for r in range(BOARD_ROWS) for c in range(BOARD_COLS) if self.board[r, c] == EMPTY]

    def make_move(self, row, col):
        if self.board[row, col] == EMPTY:
            self.board[row, col] = self.current_player
            self.current_player = O if self.current_player == X else X
            return True
        return False

class QLearningAgent:
    def __init__(self):
        self.q_table = {}

    def save_q_table(self, filename='q_table.pkl'):
        with open(filename, 'wb') as f:
            pickle.dump(self.q_table, f)  # Save Q-table to file

    def load_q_table(self, filename='q_table.pkl'):
        try:
            with open(filename, 'rb') as f:
                self.q_table = pickle.load(f)  # Load Q-table from file
                print("Q-table loaded successfully.")
        except FileNotFoundError:
            print("Q-table file not found, starting fresh.")
            self.q_table = {}  # Initialize Q-table if file not found

    def get_state_key(self, board):
        return str(board.reshape(9))

    def choose_action(self, state, available_actions):
        if random.uniform(0, 1) < epsilon:
            return random.choice(available_actions)
        else:
            q_values = [self.q_table.get((state, (r, c)), 0) for r, c in available_actions]
            max_q = max(q_values)
            return available_actions[q_values.index(max_q)]

    def learn(self, state, action, reward, next_state, available_actions):
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)
        current_q = self.q_table.get((state_key, action), 0)

        # Get maximum Q-value of the next state
        future_q = max([self.q_table.get((next_state_key, a), 0) for a in available_actions], default=0)

        # Update the Q-value for the state-action pair
        self.q_table[(state_key, action)] = current_q + alpha * (reward + gamma * future_q - current_q)

def draw_board(board):
    # Draw grid lines
    for r in range(1, BOARD_ROWS):
        pygame.draw.line(screen, BLACK, (0, r * SQUARE_SIZE), (WIDTH, r * SQUARE_SIZE), LINE_WIDTH)
    for c in range(1, BOARD_COLS):
        pygame.draw.line(screen, BLACK, (c * SQUARE_SIZE, 0), (c * SQUARE_SIZE, HEIGHT), LINE_WIDTH)

    # Draw X and O
    for r in range(BOARD_ROWS):
        for c in range(BOARD_COLS):
            if board[r, c] == X:
                pygame.draw.line(screen, GREEN, (c * SQUARE_SIZE + 10, r * SQUARE_SIZE + 10), (c * SQUARE_SIZE + SQUARE_SIZE - 10, r * SQUARE_SIZE + SQUARE_SIZE - 10), LINE_WIDTH)
                pygame.draw.line(screen, GREEN, (c * SQUARE_SIZE + SQUARE_SIZE - 10, r * SQUARE_SIZE + 10), (c * SQUARE_SIZE + 10, r * SQUARE_SIZE + SQUARE_SIZE - 10), LINE_WIDTH)
            elif board[r, c] == O:
                pygame.draw.circle(screen, RED, (c * SQUARE_SIZE + SQUARE_SIZE // 2, r * SQUARE_SIZE + SQUARE_SIZE // 2), SQUARE_SIZE // 3)

def draw_text(text, y_offset, size, color, background_color=None):
    font = pygame.font.Font(None, size)
    text_surface = font.render(text, True, color, background_color)
    text_rect = text_surface.get_rect(center=(WIDTH // 2, y_offset))
    screen.blit(text_surface, text_rect)

def draw_buttons():
    pygame.draw.rect(screen, BLUE, (100, 200, 100, 30))   # Reset button
    pygame.draw.rect(screen, BLUE, (100, 230, 100, 30))  # Quit button
    draw_text("Reset", 215, 30, WHITE)  # Set text position for Reset, y-coordinate just below the button
    draw_text("Quit", 245, 30, WHITE)     # Set text position for Quit, y-coordinate just below the button

# Initialize Pygame
pygame.init()
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption('Tic Tac Toe with Q-Learning')

def main():
    game = TicTacToe()
    agent = QLearningAgent()
    agent.load_q_table()
    clock = pygame.time.Clock()
    running = True
    game_over = False
    result_message = ""

    while running:
        screen.fill(WHITE)
        draw_board(game.board)

        if game_over:
            draw_text(result_message, 100, 30, WHITE, BLUE)
            draw_buttons()

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False
                if event.type == pygame.MOUSEBUTTONDOWN:
                    mouseX, mouseY = event.pos

                    if 100 <= mouseX <= 200 and 200 <= mouseY <= 230:
                        # Reset button
                        game.reset()
                        game_over = False
                    elif 100 <= mouseX <= 200 and 230 <= mouseY <= 260:
                        # Quit button
                        running = False
            pygame.display.flip()
            clock.tick(30)  # Control the frame rate of the game
            continue  # Skip the rest of the code and return to the loop

        # Check if the game is over
        if game.is_winner(X):
            result_message = "X wins!"
            game_over = True
            reward = -1  # Penalize AI if X wins
        elif game.is_winner(O):
            result_message = "O wins!"
            game_over = True
            reward = 1  # Reward AI if O wins
        elif game.is_full():
            result_message = "It's a draw!"
            game_over = True
            reward = 0  # Zero reward for a draw
        else:
            reward = -0.1  # Small penalty if the game is ongoing

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False
                if event.type == pygame.MOUSEBUTTONDOWN:
                    mouseX, mouseY = event.pos
                    row = mouseY // SQUARE_SIZE
                    col = mouseX // SQUARE_SIZE

                    # Player's move
                    if game.make_move(row, col):
                        state = game.board.copy()
                        available_actions = game.available_actions()

                        # Let AI make a move after checking if the game is over
                        if not game.is_winner(X) and not game.is_winner(O) and available_actions:
                            action = agent.choose_action(agent.get_state_key(state), available_actions)
                            game.make_move(action[0], action[1])

                            # Update Q-table at the end of each round
                            next_state = game.board.copy()
                            agent.learn(state, action, reward, next_state, available_actions)

        pygame.display.flip()
        clock.tick(30)  # Control the frame rate of the game

    # Save Q-table after the game ends
    agent.save_q_table()

    pygame.quit()

if __name__ == "__main__":
    main()