<a href="https://colab.research.google.com/github/kozoB/TicTacToeRL/blob/main/TicTacToeRL_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reinforcement Learning

## Module Installations And Imports

In [42]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random

## Define Labels

In [43]:
# Define state labels for tic tac toe game cells ('-' (empty), 'X' and 'O')
mark_labels = ['-', 'X', 'O']
# Define game result labels (0 - game ongoing, 1 - 'X' won, 2 - 'O' won, 3 - draw)
game_result_labels = ['ongoing', 'X-won', 'O-won', 'draw']

## Create Game Environment And Rules

In [44]:
class TicTacToeEnv(Env):
  def __init__(self):
    # Actions we can take - Square in grid to mark (1-9)
    self.action_space = Discrete(9)
    # Observation space: 3x3 grid with 3 possible values (-, X, O) encoded as integers (0, 1, 2)
    self.observation_space = Box(low=0, high=2, shape=(3, 3), dtype=np.int32)
    # Initialize the game grid
    self.game_grid = np.full((3, 3), 0, dtype=np.int32)
    # Set starting player as the index of 'X' (1)
    self.current_player = 1 # 1 for 'X', 2 for 'O'
    # Episode status
    self.done = False

  def step(self, action):
    # Convert action (0-8) to row and column indices (0-2)
    row, col = divmod(action, 3)

    # Check if the chosen sqaure is empty
    if self.game_grid[row, col] != 0:
      # Invalid action, return a large negative reward
      return self.game_grid, -100, False, {}

    # Mark the chosen square with the current player's mark
    self.game_grid[row, col] = self.current_player

    # Check the game result
    game_result = self.check_game_result()

    # Determine the reward based on the game result
    if game_result == 1: # Assuming the agent is 'X'
        reward = +1 if self.current_player == 1 else -1  # 'X' wins
        self.done = True
    elif game_result == 2:
        reward = -1 if self.current_player == 1 else +1  # 'O' wins
        self.done = True
    elif game_result == 3:
        reward = +0.1  # Draw
        self.done = True
    else:
        reward = 0  # Game ongoing

    # Switch to the other player
    self.current_player = 2 if self.current_player == 1 else 1

    # Return the updated state, reward, done flag, and additional info
    return self.game_grid.copy(), reward, self.done, {}


  def reset(self):
    # Reset the game grid and player
    self.game_grid = np.full((3, 3), 0, dtype=np.int32)
    self.current_player = 1  # 'X' goes first
    self.done = False

    # Return the initial observation
    return self.game_grid.copy()

  def render(self):
    print('\nGrid state:\n***********************************************')

    # Create a 3x3 array for rendering the grid with the appropriate symbols
    grid_drawing = np.full((3, 3), '-', dtype=str)

    # Loop through each cell in the game grid
    for row in range(3):
      for col in range(3):
        # Get the value in the current cell of the game grid
        square = self.game_grid[row, col]
        # Convert the numerical value to the corresponding mark ('-', 'X', 'O')
        grid_drawing[row, col] = mark_labels[square]

    # Print the rendered game grid
    for row in grid_drawing:
        print(' '.join(row))
    print('***********************************************\n')

  def check_game_result(self):
    # Check rows, columns, and diagonals for a win condition
    for i in range(3):
        # Check rows
        if self.game_grid[i, 0] == self.game_grid[i, 1] == self.game_grid[i, 2] and self.game_grid[i, 0] != 0:
            return self.game_grid[i, 0]
        # Check columns
        if self.game_grid[0, i] == self.game_grid[1, i] == self.game_grid[2, i] and self.game_grid[0, i] != 0:
            return self.game_grid[0, i]

    # Check diagonals
    if self.game_grid[0, 0] == self.game_grid[1, 1] == self.game_grid[2, 2] and self.game_grid[0, 0] != 0:
        return self.game_grid[0, 0]
    if self.game_grid[0, 2] == self.game_grid[1, 1] == self.game_grid[2, 0] and self.game_grid[0, 2] != 0:
        return self.game_grid[0, 2]

    # Check for draw (grid is full)
    if not np.any(self.game_grid == 0):
        return 3  # Draw

    # Game ongoing
    return 0


In [45]:
env = TicTacToeEnv()

In [46]:
action = env.action_space.sample()
print(f"marked square idx: {action}")
env.render()

marked square idx: 2

Grid state:
***********************************************
- - -
- - -
- - -
***********************************************



In [47]:
episodes = 10

for episode in range(1, episodes+1):
  state = env.reset()
  done = False
  score = 0

  while not done:
    env.render()
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    score += reward

    # Update the current state for the next iteration
    state = next_state

  env.render()
  print(f'Episode: {episode} Score: {score}\n')


Grid state:
***********************************************
- - -
- - -
- - -
***********************************************


Grid state:
***********************************************
- - -
- - -
X - -
***********************************************


Grid state:
***********************************************
- - -
- - O
X - -
***********************************************


Grid state:
***********************************************
- - -
- X O
X - -
***********************************************


Grid state:
***********************************************
- - O
- X O
X - -
***********************************************


Grid state:
***********************************************
- X O
- X O
X - -
***********************************************


Grid state:
***********************************************
- X O
- X O
X - O
***********************************************

Episode: 1 Score: 1


Grid state:
***********************************************
- - -
- - -
- - -
****

# Create Deep Learning Model

## Imports

In [48]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

In [49]:
states = env.observation_space.shape
actions = env.action_space.n

print(states)
print(actions)

(3, 3)
9


## Create DL Model

In [50]:
from keras import __version__
import tensorflow as tf
tf.keras.__version__ = __version__

In [51]:
!pip install keras-rl2



In [52]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
from tensorflow.keras.optimizers.legacy import Adam

In [53]:
def build_model(actions):
    model = Sequential()
    # Flatten the 3x3 grid to a 1D array
    model.add(Flatten(input_shape=(1, 3, 3)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model


In [54]:
model = build_model(actions)

In [55]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 9)                 0         
                                                                 
 dense_3 (Dense)             (None, 24)                240       
                                                                 
 dense_4 (Dense)             (None, 24)                600       
                                                                 
 dense_5 (Dense)             (None, 9)                 225       
                                                                 
Total params: 1065 (4.16 KB)
Trainable params: 1065 (4.16 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


## Build Agent With Keras-RL

In [56]:
def build_agent(model, actions):
  #policy = BoltzmannQPolicy()
  policy = EpsGreedyQPolicy(eps=0.6)
  memory = SequentialMemory(limit=100000, window_length=1)
  dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=5000, target_model_update=1e-2)
  return dqn

In [57]:
optimizer = Adam(learning_rate=1e-2)

In [58]:
dqn = build_agent(model, actions)
dqn.compile(optimizer=optimizer, metrics=['mae'])
dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1)

Training for 1000000 steps ...
Interval 1 (0 steps performed)
586 episodes - episode_reward: -959.160 [-9099.900, 1.000] - loss: 1467.696 - mae: 162.128 - mean_q: -125.792

Interval 2 (10000 steps performed)
858 episodes - episode_reward: -398.325 [-6199.000, 1.000] - loss: 529.134 - mae: 76.447 - mean_q: -28.412

Interval 3 (20000 steps performed)
902 episodes - episode_reward: -351.543 [-3199.000, 1.000] - loss: 359.114 - mae: 54.222 - mean_q: -3.808

Interval 4 (30000 steps performed)
899 episodes - episode_reward: -352.822 [-2999.000, 1.000] - loss: 318.457 - mae: 51.610 - mean_q: 2.717

Interval 5 (40000 steps performed)
881 episodes - episode_reward: -371.542 [-2699.000, 1.000] - loss: 343.046 - mae: 51.666 - mean_q: 3.951

Interval 6 (50000 steps performed)
911 episodes - episode_reward: -337.515 [-2699.000, 1.000] - loss: 331.226 - mae: 49.939 - mean_q: 5.632

Interval 7 (60000 steps performed)
924 episodes - episode_reward: -338.290 [-2999.000, 1.000] - loss: 309.414 - mae: 48

<keras.src.callbacks.History at 0x7bf09961ff70>

In [59]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 1.000, steps: 7
Episode 2: reward: 1.000, steps: 7
Episode 3: reward: 1.000, steps: 7
Episode 4: reward: 1.000, steps: 7
Episode 5: reward: 1.000, steps: 7
Episode 6: reward: 1.000, steps: 7
Episode 7: reward: 1.000, steps: 7
Episode 8: reward: 1.000, steps: 7
Episode 9: reward: 1.000, steps: 7
Episode 10: reward: 1.000, steps: 7
Episode 11: reward: 1.000, steps: 7
Episode 12: reward: 1.000, steps: 7
Episode 13: reward: 1.000, steps: 7
Episode 14: reward: 1.000, steps: 7
Episode 15: reward: 1.000, steps: 7
Episode 16: reward: 1.000, steps: 7
Episode 17: reward: 1.000, steps: 7
Episode 18: reward: 1.000, steps: 7
Episode 19: reward: 1.000, steps: 7
Episode 20: reward: 1.000, steps: 7
Episode 21: reward: 1.000, steps: 7
Episode 22: reward: 1.000, steps: 7
Episode 23: reward: 1.000, steps: 7
Episode 24: reward: 1.000, steps: 7
Episode 25: reward: 1.000, steps: 7
Episode 26: reward: 1.000, steps: 7
Episode 27: reward: 1.000, steps: 7
Episode 

# Save The Model And Agent

In [60]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [61]:
path = "/content/drive/MyDrive/Machine Learning Projects/DL Practice/Tic Tac Toe RL"

## Save Model

In [62]:
import os

# Create directory if it doesn't exist
os.makedirs(f"{path}/tic_tac_toe_model", exist_ok=True)

model.save(f"{path}/tic_tac_toe_model/tic_tac_toe_model.h5")

## Save Agent

In [63]:
import json

# Get the agent's configuration dictionary
agent_config = dqn.get_config()

# Save the agent's configuration to a JSON file
with open(f"{path}/tic_tac_toe_agent_weights/tic_tac_toe_agent_config.json", "w") as config_out:
    json.dump(agent_config, config_out)

# Create directory if it doesn't exist
os.makedirs(f"{path}/tic_tac_toe_agent_weights", exist_ok=True)

# Save the agent's weights
dqn.save_weights(f"{path}/tic_tac_toe_agent_weights/tic_tac_toe_agent_weights.h5f", overwrite=True)

## Load Model And Agent

# Prepare Environment For Model And Agent Testing

In [12]:
from keras import __version__
import tensorflow as tf
tf.keras.__version__ = __version__

In [2]:
!pip install keras-rl2

Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.1/52.1 kB[0m [31m998.1 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: keras-rl2
Successfully installed keras-rl2-1.0.5


In [43]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import json

In [44]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [45]:
path = "/content/drive/MyDrive/Machine Learning Projects/DL Practice/Tic Tac Toe RL"

In [46]:
def build_agent(model, actions):
  #policy = BoltzmannQPolicy()
  policy = EpsGreedyQPolicy(eps=0.6)
  memory = SequentialMemory(limit=50000, window_length=1)
  dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
  return dqn

In [47]:
from keras.models import model_from_config
from rl.policy import BoltzmannQPolicy
from rl.policy import EpsGreedyQPolicy

# Load the agent's configuration from the JSON file
with open(f"{path}/tic_tac_toe_agent_weights/tic_tac_toe_agent_config.json", "r") as config_in:
    config = json.load(config_in)

# Load the agent's model
model = model_from_config(config["model"])

# Load the agent's weights
model.load_weights(f"{path}/tic_tac_toe_agent_weights/tic_tac_toe_agent_weights.h5f")

# Rebuild the agent with the loaded model and its configuration
dqn = build_agent(model, actions)

In [48]:
states = env.observation_space.shape
actions = env.action_space.n

In [49]:
from keras.models import model_from_config

# Load the agent's configuration from the JSON file
with open(f"{path}/tic_tac_toe_agent_weights/tic_tac_toe_agent_config.json", "r") as config_in:
    config = json.load(config_in)

# Load the agent's model
model = model_from_config(config["model"])

# Load the agent's weights
model.load_weights(f"{path}/tic_tac_toe_agent_weights/tic_tac_toe_agent_weights.h5f")

# Rebuild the agent with the loaded model and its configuration
dqn = build_agent(model, actions)

In [50]:
# Define state labels for tic tac toe game cells ('-' (empty), 'X' and 'O')
mark_labels = ['-', 'X', 'O']
# Define game result labels (0 - game ongoing, 1 - 'X' won, 2 - 'O' won, 3 - draw)
game_result_labels = ['ongoing', 'X-won', 'O-won', 'draw']

In [51]:
def build_model(actions):
    model = Sequential()
    # Flatten the 3x3 grid to a 1D array
    model.add(Flatten(input_shape=(1, 3, 3)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model


## Agent VS Agent Environment

In [52]:
class AgentVSAgentEnv(Env):
  def __init__(self):
    # Actions we can take - Square in grid to mark (1-9)
    self.action_space = Discrete(9)
    # Observation space: 3x3 grid with 3 possible values (-, X, O) encoded as integers (0, 1, 2)
    self.observation_space = Box(low=0, high=2, shape=(3, 3), dtype=np.int32)
    # Initialize the game grid
    self.game_grid = np.full((3, 3), 0, dtype=np.int32)
    # Set starting player as the index of 'X' (1)
    self.current_player = 1 # 1 for 'X', 2 for 'O'
    # Episode status
    self.done = False

  def step(self, action):
    # Convert action (0-8) to row and column indices (0-2)
    row, col = divmod(action, 3)

    # Check if the chosen sqaure is empty
    if self.game_grid[row, col] != 0:
      # Invalid action, return a large negative reward
      return self.game_grid, -10, False, {}

    # Mark the chosen square with the current player's mark
    self.game_grid[row, col] = self.current_player

    # Check the game result
    game_result = self.check_game_result()

    # Determine the reward based on the game result
    if game_result == 1:
        reward = 1 if self.current_player == 1 else -2  # 'X' wins
        self.done = True
    elif game_result == 2:
        reward = -2 if self.current_player == 1 else 1  # 'O' wins
        self.done = True
    elif game_result == 3:
        reward = -1  # Draw
        self.done = True
    else:
        reward = 0  # Game ongoing

    # Switch to the other player
    self.current_player = 2 if self.current_player == 1 else 1

    # Return the updated state, reward, done flag, and additional info
    return self.game_grid.copy(), reward, self.done, {}


  def reset(self):
    # Reset the game grid and player
    self.game_grid = np.full((3, 3), 0, dtype=np.int32)
    self.current_player = 1  # 'X' goes first
    self.done = False

    # Return the initial observation
    return self.game_grid.copy()

  def render(self):
    print('\nGrid state:\n******')

    # Create a 3x3 array for rendering the grid with the appropriate symbols
    grid_drawing = np.full((3, 3), '-', dtype=str)

    # Loop through each cell in the game grid
    for row in range(3):
      for col in range(3):
        # Get the value in the current cell of the game grid
        square = self.game_grid[row, col]
        # Convert the numerical value to the corresponding mark ('-', 'X', 'O')
        grid_drawing[row, col] = mark_labels[square]

    # Print the rendered game grid
    for row in grid_drawing:
        print(' '.join(row))
    print('******\n')

  def check_game_result(self):
    # Check rows, columns, and diagonals for a win condition
    for i in range(3):
        # Check rows
        if self.game_grid[i, 0] == self.game_grid[i, 1] == self.game_grid[i, 2] and self.game_grid[i, 0] != 0:
            return self.game_grid[i, 0]
        # Check columns
        if self.game_grid[0, i] == self.game_grid[1, i] == self.game_grid[2, i] and self.game_grid[0, i] != 0:
            return self.game_grid[0, i]

    # Check diagonals
    if self.game_grid[0, 0] == self.game_grid[1, 1] == self.game_grid[2, 2] and self.game_grid[0, 0] != 0:
        return self.game_grid[0, 0]
    if self.game_grid[0, 2] == self.game_grid[1, 1] == self.game_grid[2, 0] and self.game_grid[0, 2] != 0:
        return self.game_grid[0, 2]

    # Check for draw (grid is full)
    if not np.any(self.game_grid == 0):
        return 3  # Draw

    # Game ongoing
    return 0


In [53]:
env = AgentVSAgentEnv()

In [54]:
state = env.reset()
done = False
score = 0

while not done:
    env.render()
    action = dqn.forward(state)  # Use forward method instead of act
    next_state, reward, done, info = env.step(action)
    score += reward

    # Update the current state for the next iteration
    state = next_state

env.render()
print(f'Score: {score}\n')


Grid state:
******
- - -
- - -
- - -
******


Grid state:
******
- X -
- - -
- - -
******


Grid state:
******
- X -
- - -
- - O
******


Grid state:
******
- X -
X - -
- - O
******


Grid state:
******
- X O
X - -
- - O
******


Grid state:
******
- X O
X X -
- - O
******


Grid state:
******
O X O
X X -
- - O
******


Grid state:
******
O X O
X X -
- X O
******

Score: 1



## Agent VS Human Environment

In [55]:
class AgentVSHumanEnv(Env):
    def __init__(self):
        # Actions we can take - Square in grid to mark (1-9)
        self.action_space = Discrete(9)
        # Observation space: 3x3 grid with 3 possible values (-, X, O) encoded as integers (0, 1, 2)
        self.observation_space = Box(low=0, high=2, shape=(3, 3), dtype=np.int32)
        # Initialize the game grid
        self.game_grid = np.full((3, 3), 0, dtype=np.int32)
        # Set starting player as the index of 'X' (1)
        self.current_player = 1 # 1 for 'X', 2 for 'O'
        # Episode status
        self.done = False

    def step(self, action):
        # Convert action (0-8) to row and column indices (0-2)
        row, col = divmod(action, 3)

        # Check if the chosen square is empty
        if self.game_grid[row, col] != 0:
            # Invalid action, return a large negative reward
            return self.game_grid, -10, False, {}

        # Mark the chosen square with the current player's mark
        self.game_grid[row, col] = self.current_player

        # Check the game result
        game_result = self.check_game_result()

        # Determine the reward based on the game result
        if game_result == 1:
            reward = 1 if self.current_player == 1 else -2  # 'X' wins
            self.done = True
        elif game_result == 2:
            reward = -2 if self.current_player == 1 else 1  # 'O' wins
            self.done = True
        elif game_result == 3:
            reward = -1  # Draw
            self.done = True
        else:
            reward = 0  # Game ongoing

        # Switch to the other player
        self.current_player = 2 if self.current_player == 1 else 1

        # Return the updated state, reward, done flag, and additional info
        return self.game_grid.copy(), reward, self.done, {}

    def reset(self):
        # Reset the game grid and player
        self.game_grid = np.full((3, 3), 0, dtype=np.int32)
        self.current_player = 1  # 'X' goes first
        self.done = False

        # Return the initial observation
        return self.game_grid.copy()

    def render(self):
        print('\nGrid state:\n******')

        # Create a 3x3 array for rendering the grid with the appropriate symbols
        grid_drawing = np.full((3, 3), '-', dtype=str)

        # Loop through each cell in the game grid
        for row in range(3):
            for col in range(3):
                # Get the value in the current cell of the game grid
                square = self.game_grid[row, col]
                # Convert the numerical value to the corresponding mark ('-', 'X', 'O')
                grid_drawing[row, col] = mark_labels[square]

        # Print the rendered game grid
        for row in grid_drawing:
            print(' '.join(row))
        print('******\n')

    def check_game_result(self):
        # Check rows, columns, and diagonals for a win condition
        for i in range(3):
            # Check rows
            if self.game_grid[i, 0] == self.game_grid[i, 1] == self.game_grid[i, 2] and self.game_grid[i, 0] != 0:
                return self.game_grid[i, 0]
            # Check columns
            if self.game_grid[0, i] == self.game_grid[1, i] == self.game_grid[2, i] and self.game_grid[0, i] != 0:
                return self.game_grid[0, i]

        # Check diagonals
        if self.game_grid[0, 0] == self.game_grid[1, 1] == self.game_grid[2, 2] and self.game_grid[0, 0] != 0:
            return self.game_grid[0, 0]
        if self.game_grid[0, 2] == self.game_grid[1, 1] == self.game_grid[2, 0] and self.game_grid[0, 2] != 0:
            return self.game_grid[0, 2]

        # Check for draw (grid is full)
        if not np.any(self.game_grid == 0):
            return 3  # Draw

        # Game ongoing
        return 0


In [56]:
env = AgentVSHumanEnv()

In [57]:
# Reset the environment
state = env.reset()
done = False
score = 0

# Choose player symbol (X or O)
player_symbol = input("Choose your symbol:\n1 for X\n2 for O\n").upper()

# Determine the agent's symbol
agent_symbol = '1' if player_symbol == '2' else '2'

if player_symbol == '1':
  env.render()

# Game loop
while not done:
  # Your turn (if applicable)
  if env.current_player == int(player_symbol):  # Your turn
    print(f"Your turn! ({mark_labels[env.current_player]})")
    # Allow the palyer to choose only valid input
    while True:
      player_selected_square = int(input("\nChoose Square (0-8): ").upper()) # (0-8)
      if player_selected_square < 0 or player_selected_square > 8:
        print("Invalid Square number. Valid squares are 0-8. try another square!")
        continue

      row, col = divmod(player_selected_square, 3)
      # try:
      if env.game_grid[row][col] == 0:
        break
      else:
        print("Square already marked, try another square!")

    env.game_grid[row][col] = player_symbol
    env.render()
    result = env.check_game_result()
    # Check if turn ended in a draw/win
    if result == 3:
      done = True
      break
    elif result != 0:
      winner = int(player_symbol)
      done = True
      break

    # Change player to agent
    env.current_player = int(agent_symbol)


  # Agent's turn
  else:
    print("Agent's turn!")
    action = dqn.forward(state)

    # Perform the action
    next_state, reward, done, _ = env.step(action)
    score += reward
    state = next_state
    env.render()
    result = env.check_game_result()
    if result == 3:
      done = True
      break
    elif result != 0:
      winner = int(agent_symbol)
      done = True
      break

if env.check_game_result() == 3:
  print(f"\nGame ended in a Draw!")
else:
  print(f"\nGame ended in a Victory for {mark_labels[winner]}!")
  if winner == int(player_symbol):
    print("You won against the AI!")
  else:
    print("You lost against the AI!")

print(f'\nAI Score: {score}\n')


Choose your symbol:
1 for X
2 for O
1

Grid state:
******
- - -
- - -
- - -
******

Your turn! (X)

Choose Square (0-8): 4

Grid state:
******
- - -
- X -
- - -
******

Agent's turn!

Grid state:
******
- O -
- X -
- - -
******

Your turn! (X)

Choose Square (0-8): 8

Grid state:
******
- O -
- X -
- - X
******

Agent's turn!

Grid state:
******
- O -
- X -
- - X
******

Agent's turn!

Grid state:
******
- O O
- X -
- - X
******

Your turn! (X)

Choose Square (0-8): 5

Grid state:
******
- O O
- X X
- - X
******

Agent's turn!

Grid state:
******
- O O
O X X
- - X
******

Your turn! (X)

Choose Square (0-8): 0

Grid state:
******
X O O
O X X
- - X
******


Game ended in a Victory for X!
You won against the AI!

AI Score: -10

