In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [1]:

class TicTacToe:
    def __init__(self):
        self.board = [0 for _ in range(9)]
        self.current_player = -1

    def print_board(self):
        for i in range(0, 9, 3):


            print(str(self.board[i]) + "|" + str(self.board[i + 1]) + "|" + str(self.board[i + 2]))
            if i < 6:
                print("-" * 5)

        print()

    def check_win(self, player):
        win_conditions = [(0, 1, 2), (3, 4, 5), (6, 7, 8),
                        (0, 3, 6), (1, 4, 7), (2, 5, 8),
                        (0, 4, 8), (2, 4, 6)]

        for condition in win_conditions:
            if all(self.board[i] == player for i in condition):
                return True
        return False

    def step(self, position):
        if self.board[position] == 0:
            self.board[position] = self.current_player
            if self.check_win(self.current_player):
                return self.board, self.current_player, True
            elif 0 not in self.board:
                return self.board, 0, True
            self.current_player = 1 if self.current_player == -1 else -1
            return self.board, self.current_player, False
        else:
            print("Cell already occupied. Try again.")
            return self.board, self.current_player, False


    def reset(self):
        self.__init__()


In [None]:
env = TicTacToe()
while True:
    env.print_board() # You can comment this part out if you don't want to see the board
    position = int(input(f"Player {env.current_player}, enter your move (0-8): "))
    if not (0 <= position < 9):
        print("Invalid position. Please try again.")
        continue

    board, player, terminated = env.step(position)

    if terminated:
        if player == -1:
            print("Player -1 wins")
        elif player == 1:
            print("Player 1 wins")
        elif player == 0:
            print("It's a draw")
        break

In [None]:
# If you want to use the GUI version that is up to you. While testing your algorithm we will use a code simliar to the above.

import tkinter as tk
from tkinter import messagebox

class TicTacToeGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("Tic Tac Toe")
        self.game = TicTacToe()
        self.buttons = []
        self.create_board()

    def create_board(self):
        for i in range(3):
            for j in range(3):
                button = tk.Button(self.root, text=" ", font=('Arial', 20), width=5, height=2,
                                   command=lambda row=i, col=j: self.on_click(row, col))
                button.grid(row=i, column=j)
                self.buttons.append(button)

    def update_board(self):
        for i in range(9):
            self.buttons[i]['text'] = 'X' if self.game.board[i] == -1 else 'O' if self.game.board[i] == 1 else ' '

    def on_click(self, row, col):
        position = row * 3 + col
        board, player, terminated = self.game.step(position)
        self.update_board()

        if terminated:
            if player == -1:
                messagebox.showinfo("Game Over", "Player -1 wins")
            elif player == 1:
                messagebox.showinfo("Game Over", "Player 1 wins")
            elif player == 0:
                messagebox.showinfo("Game Over", "It's a draw")
            self.root.quit()

if __name__ == '__main__':
    root = tk.Tk()
    tic_tac_toe_gui = TicTacToeGUI(root)
    root.mainloop()


You are required to solve Tic Tac Toe using **Value Iteration**. The optimal policy should give a draw no matter what.  
It should work whether you are player -1 or player 1. It doesn't matter what your turn is.

It also follows that your code should win board states where it can and make optimal decisions.

Our final goal is to solve for 3d Tic Tac Toe, 4 X 4 X 4, using reinforcement learning. But, we will start by solving the 2d case and then gradually build up to the 3d case.

There is no starter code available. You are free to choose your implementation. One suggestion is to give +1 reward for a win, 0 for a draw and -1 for a loss.