<h3 style="text-align: center;">Tic-Tac-Toe game With Reinfocement Learning</h3>

<p style="text-align: center;">This project implements a Tic-Tac-Toe game with a reinforcement learning agent using Deep Q-Learning (DQN). The game features a dark mode theme and allows the agent to continuously learn and improve its strategy during gameplay. Built with Pygame for the interface and TensorFlow for the neural network.</p>

<div style="text-align: center;">
  <img src="./Assets/Tic-tac-toe.png" alt="tic tac toe game image" width="500"/>
</div>

<h4>Import libraries</h4>

In [None]:
import numpy as np
import random
import pygame
import sys
import tensorflow as tf
from keras import layers

<h4>Define the Tic-Tac-Toe Environment</h4>

In [2]:
# Initialize pygame
pygame.init()

# Constants
WIDTH, HEIGHT = 600, 600
LINE_WIDTH = 15
BOARD_ROWS = 3
BOARD_COLS = 3
SQUARE_SIZE = WIDTH // BOARD_COLS
LINE_COLOR = (50, 50, 50)
BG_COLOR = (30, 30, 30)
CIRCLE_COLOR = (242, 235, 211)
CROSS_COLOR = (84, 84, 84)
FONT = pygame.font.SysFont('Arial', 40)
RESET_BUTTON_RECT = pygame.Rect(WIDTH // 2 - 50, HEIGHT - 50, 100, 40)

# Initialize screen
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption('Tic Tac Toe')
screen.fill(BG_COLOR)

# Initialize board
board = np.zeros((BOARD_ROWS, BOARD_COLS))

def draw_lines():
    # Horizontal lines
    pygame.draw.line(screen, LINE_COLOR, (0, SQUARE_SIZE), (WIDTH, SQUARE_SIZE), LINE_WIDTH)
    pygame.draw.line(screen, LINE_COLOR, (0, 2 * SQUARE_SIZE), (WIDTH, 2 * SQUARE_SIZE), LINE_WIDTH)
    # Vertical lines
    pygame.draw.line(screen, LINE_COLOR, (SQUARE_SIZE, 0), (SQUARE_SIZE, HEIGHT), LINE_WIDTH)
    pygame.draw.line(screen, LINE_COLOR, (2 * SQUARE_SIZE, 0), (2 * SQUARE_SIZE, HEIGHT), LINE_WIDTH)

def draw_board():
    draw_lines()

def reset_board():
    global board
    board = np.zeros((BOARD_ROWS, BOARD_COLS))
    screen.fill(BG_COLOR)
    draw_lines()

def make_move(row, col, player):
    if board[row][col] == 0:
        board[row][col] = player
        return True
    return False

def check_win(player):
    # Check horizontal
    for row in range(BOARD_ROWS):
        if np.all(board[row, :] == player):
            return True
    # Check vertical
    for col in range(BOARD_COLS):
        if np.all(board[:, col] == player):
            return True
    # Check diagonals
    if np.all(np.diag(board) == player) or np.all(np.diag(np.fliplr(board)) == player):
        return True
    return False

def get_state():
    return board

def draw_figures():
    for row in range(BOARD_ROWS):
        for col in range(BOARD_COLS):
            if board[row][col] == 1:
                pygame.draw.circle(screen, CIRCLE_COLOR, (int(col * SQUARE_SIZE + SQUARE_SIZE // 2), int(row * SQUARE_SIZE + SQUARE_SIZE // 2)), SQUARE_SIZE // 3, 15)
            elif board[row][col] == 2:
                pygame.draw.line(screen, CROSS_COLOR, (col * SQUARE_SIZE + SQUARE_SIZE // 4, row * SQUARE_SIZE + SQUARE_SIZE // 4), (col * SQUARE_SIZE + 3 * SQUARE_SIZE // 4, row * SQUARE_SIZE + 3 * SQUARE_SIZE // 4), 15)
                pygame.draw.line(screen, CROSS_COLOR, (col * SQUARE_SIZE + 3 * SQUARE_SIZE // 4, row * SQUARE_SIZE + SQUARE_SIZE // 4), (col * SQUARE_SIZE + SQUARE_SIZE // 4, row * SQUARE_SIZE + 3 * SQUARE_SIZE // 4), 15)

def draw_reset_button():
    pygame.draw.rect(screen, (100, 100, 100), RESET_BUTTON_RECT, border_radius=10)
    text = FONT.render('Reset', True, (255, 255, 255))
    text_x = RESET_BUTTON_RECT.x + (RESET_BUTTON_RECT.width - text.get_width()) // 2
    text_y = RESET_BUTTON_RECT.y + (RESET_BUTTON_RECT.height - text.get_height()) // 2
    screen.blit(text, (text_x, text_y))


def display_message(message):
    text = FONT.render(message, True, (255, 0, 0))  # Red color for the winner message
    screen.blit(text, (WIDTH // 2 - text.get_width() // 2, HEIGHT // 2 - text.get_height() // 2))

<h4>Reinforcement learning Agent</h4>

In [3]:
class DQNAgent:
    def __init__(self, state_shape, action_shape, epsilon=0.1, alpha=0.001, gamma=0.95):
        self.state_shape = state_shape
        self.action_shape = action_shape
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential()
        model.add(layers.Dense(24, input_shape=self.state_shape, activation='relu'))
        model.add(layers.Dense(24, activation='relu'))
        model.add(layers.Dense(self.action_shape, activation='linear'))
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.alpha), loss='mse')
        return model

    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.choice(np.argwhere(state == 0))
        q_values = self.model.predict(state.reshape(1, -1))
        return np.unravel_index(np.argmax(q_values), (BOARD_ROWS, BOARD_COLS))

    def train(self, state, action, reward, next_state, done):
        target = reward
        if not done:
            target += self.gamma * np.amax(self.model.predict(next_state.reshape(1, -1)))
        target_f = self.model.predict(state.reshape(1, -1))
        target_f[0][np.ravel_multi_index(action, (BOARD_ROWS, BOARD_COLS))] = target
        self.model.fit(state.reshape(1, -1), target_f, epochs=1, verbose=0)

<h4>Training</h4>

In [4]:
def train_dqn_agent(episodes=50000):
    agent = DQNAgent(state_shape=(BOARD_ROWS * BOARD_COLS,), action_shape=BOARD_ROWS * BOARD_COLS)
    for episode in range(episodes):
        reset_board()
        state = get_state()
        player = 1
        done = False

        while not done:
            if player == 1:
                available_actions = np.argwhere(state == 0)
                if len(available_actions) == 0:
                    done = True
                    continue
                action = random.choice(available_actions)
            else:
                action = agent.get_action(state)

            row, col = action
            if make_move(row, col, player):
                next_state = get_state()
                if check_win(player):
                    reward = 1 if player == 2 else -1
                    agent.train(state, (row, col), reward, next_state, done=True)
                    done = True
                else:
                    reward = -0.01  # Small negative reward for each move
                    agent.train(state, (row, col), reward, next_state, done=False)
                state = next_state
                player = 3 - player  # Switch player

        if episode % 1000 == 0:
            print(f"Episode {episode} completed")

    return agent

<h4>Evaluation</h4>

In [5]:
def evaluate_agent(agent, episodes=1000):
    wins = 0

    for episode in range(episodes):
        reset_board()
        state = get_state()
        player = 1
        done = False

        while not done:
            if player == 1:
                available_actions = np.argwhere(state == 0)
                if len(available_actions) == 0:
                    done = True
                    continue
                action = random.choice(available_actions)
            else:
                action = agent.get_action(state)

            row, col = action
            if make_move(row, col, player):
                next_state = get_state()
                if check_win(player):
                    if player == 2:
                        wins += 1
                    done = True
                state = next_state
                player = 3 - player  # Switch player

    print(f"Agent won {wins} out of {episodes} games")

<h4>The game (Enjoy😊)</h4>

In [None]:
def main():
    reset_board()
    player = 1
    agent = DQNAgent(state_shape=(BOARD_ROWS * BOARD_COLS,), action_shape=BOARD_ROWS * BOARD_COLS)
    game_over = False
    message = ""
    state = get_state()
    running = True
    
    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            if event.type == pygame.MOUSEBUTTONDOWN:
                if RESET_BUTTON_RECT.collidepoint(event.pos):
                    reset_board()
                    player = 1
                    game_over = False
                    message = ""
                    state = get_state()
                elif not game_over and player == 1:
                    mouseX = event.pos[0]  # x
                    mouseY = event.pos[1]  # y

                    clicked_row = int(mouseY // SQUARE_SIZE)
                    clicked_col = int(mouseX // SQUARE_SIZE)

                    if make_move(clicked_row, clicked_col, player):
                        next_state = get_state()
                        if check_win(player):
                            message = f"Player {player} wins!"
                            game_over = True
                            agent.train(state, (clicked_row, clicked_col), -1, next_state, done=True)
                        else:
                            agent.train(state, (clicked_row, clicked_col), -0.01, next_state, done=False)
                        state = next_state
                        player = 3 - player  # Switch player

        if not game_over and player == 2:
            row, col = agent.get_action(state)
            if make_move(row, col, player):
                next_state = get_state()
                if check_win(player):
                    message = f"Player {player} wins!"
                    game_over = True
                    agent.train(state, (row, col), 1, next_state, done=True)
                else:
                    agent.train(state, (row, col), -0.01, next_state, done=False)
                state = next_state
                player = 3 - player  # Switch player

        screen.fill(BG_COLOR)
        draw_lines()
        draw_figures()
        draw_reset_button()
        if game_over:
            display_message(message)
        pygame.display.update()

    pygame.quit()
    sys.exit()

if __name__ == "__main__":
    main()

<h3 style="text-align: center;">Thank you</h3>