In [None]:
import numpy as np
np.__version__

'1.23.2'

In [None]:
class TicTacToe:
    def __init__(self):
        self.row_count = 3
        self.column_count = 3
        self.action_size = self.row_count * self.column_count

    def get_initial_state(self):
        return np.zeros((self.row_count, self.column_count))

    def get_next_state(self, state, action, player):
        row = action // self.column_count
        column = action % self.column_count
        state[row, column] = player
        return state

    def get_valid_moves(self, state):
        return (state.reshape(-1) == 0).astype(np.uint8)

    def check_win(self, state, action):
        row = action // self.column_count
        column = action % self.column_count
        player = state[row, column]

        return (
            np.sum(state[row, :]) == player * self.column_count
            or np.sum(state[:, column]) == player * self.row_count
            or np.sum(np.diag(state)) == player * self.row_count
            or np.sum(np.diag(np.flip(state, axis=0))) == player * self.row_count
        )

    def get_value_and_terminated(self, state, action):
        if self.check_win(state, action):
            return 1, True
        if np.sum(self.get_valid_moves(state)) == 0:
            return 0, True
        return 0, False

    def get_opponent(self, player):
        return -player

In [None]:
import numpy as np
import matplotlib.pyplot as plt

class TicTacToe:
    def __init__(self):
        self.row_count = 3
        self.column_count = 3
        self.action_size = self.row_count * self.column_count

    def get_initial_state(self):
        return np.zeros((self.row_count, self.column_count))

    def get_next_state(self, state, action, player):
        row = action // self.column_count
        column = action % self.column_count
        state[row, column] = player
        return state

    def get_valid_moves(self, state):
        return (state.reshape(-1) == 0).astype(np.uint8)

    def check_win(self, state, action):
        row = action // self.column_count
        column = action % self.column_count
        player = state[row, column]

        return (
            np.sum(state[row, :]) == player * self.column_count
            or np.sum(state[:, column]) == player * self.row_count
            or np.sum(np.diag(state)) == player * self.row_count
            or np.sum(np.diag(np.flip(state, axis=0))) == player * self.row_count
        )

    def get_value_and_terminated(self, state, action):
        if self.check_win(state, action):
            return 1, True
        if np.sum(self.get_valid_moves(state)) == 0:
            return 0, True
        return 0, False

    def get_opponent(self, player):
        return -player

def plot_tictactoe(state):
    fig, ax = plt.subplots(figsize=(3, 3))
    ax.imshow(np.zeros_like(state), cmap='Greys', alpha=0.5)

    # Draw the grid lines
    for i in range(1, 3):
        ax.axhline(i - 0.5, color='black', linewidth=2)
        ax.axvline(i - 0.5, color='black', linewidth=2)

    # Plot X's and O's
    for r in range(state.shape[0]):
        for c in range(state.shape[1]):
            if state[r, c] == 1:
                ax.text(c, r, 'X', ha='center', va='center', fontsize=40, color='blue')
            elif state[r, c] == -1:
                ax.text(c, r, 'O', ha='center', va='center', fontsize=40, color='red')

    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlim(-0.5, 2.5)
    ax.set_ylim(2.5, -0.5)
    ax.set_aspect('equal', adjustable='box')
    plt.show()


tictactoe = TicTacToe()
player = 1
state = tictactoe.get_initial_state()

while True:
    print(state)
    valid_moves = tictactoe.get_valid_moves(state)
    print("valid_moves", [i for i in range(tictactoe.action_size) if valid_moves[i] == 1])
    action = int(input(f"{player}:"))

    if valid_moves[action] == 0:
        print("action not valid")
        continue

    state = tictactoe.get_next_state(state, action, player)

    value, is_terminal = tictactoe.get_value_and_terminated(state, action)

    if is_terminal:
        print(state)
        if value == 1:
            print(player, "won")
        else:
            print("draw")
        plot_tictactoe(state)
        break

    player = tictactoe.get_opponent(player)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
valid_moves [0, 1, 2, 3, 4, 5, 6, 7, 8]


In [2]:
import matplotlib.pyplot as plt
import numpy as np

def plot_tictactoe(state):
    fig, ax = plt.subplots(figsize=(3, 3))
    ax.imshow(np.zeros_like(state), cmap='Greys', alpha=0.5)

    # Draw the grid lines
    for i in range(1, 3):
        ax.axhline(i - 0.5, color='black', linewidth=2)
        ax.axvline(i - 0.5, color='black', linewidth=2)

    # Plot X's and O's
    for r in range(state.shape[0]):
        for c in range(state.shape[1]):
            if state[r, c] == 1:
                ax.text(c, r, 'X', ha='center', va='center', fontsize=40, color='blue')
            elif state[r, c] == -1:
                ax.text(c, r, 'O', ha='center', va='center', fontsize=40, color='red')

    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlim(-0.5, 2.5)
    ax.set_ylim(2.5, -0.5)
    ax.set_aspect('equal', adjustable='box')
    plt.show()

In [3]:
tictactoe = TicTacToe()
player = 1

state = tictactoe.get_initial_state()


while True:
    print(state)
    valid_moves = tictactoe.get_valid_moves(state)
    print("valid_moves", [i for i in range(tictactoe.action_size) if valid_moves[i] == 1])
    action = int(input(f"{player}:"))

    if valid_moves[action] == 0:
        print("action not valid")
        continue

    state = tictactoe.get_next_state(state, action, player)

    value, is_terminal = tictactoe.get_value_and_terminated(state, action)

    if is_terminal:
        print(state)
        if value == 1:
            print(player, "won")
        else:
            print("draw")
        plot_tictactoe(state)  # Call plot_tictactoe here
        break


    player = tictactoe.get_opponent(player)

NameError: name 'TicTacToe' is not defined