In [1]:
import numpy as np
import matplotlib.pyplot as plt
import math
import random

In [2]:
class TicTacToe4x4x4:
    def __init__(self):
        # 3D board: 4 layers of 4x4 grids
        self.board = [[[" " for _ in range(4)] for _ in range(4)] for _ in range(4)]
        self.current_player = -1

    def check_draw(self):
        # Check for any empty space in the entire 3D board
        return not any(
            " " in cell for layer in self.board for row in layer for cell in row
        )

    def print_board(self):
        # Prints each layer of the 3D board
        for layer in range(4):
            print(f"Layer {layer + 1}:")
            print("┌───┬───┬───┬───┐")
            for i, row in enumerate(self.board[layer]):
                print("│ " + " │ ".join(row) + " │")
                if i < 3:
                    print("├───┼───┼───┼───┤")
            print("└───┴───┴───┴───┘")
            if layer < 3:
                print()

    def check_winner(self, player):
        # Check rows, columns, layers, and diagonals for a win
        for layer in self.board:
            for row in layer:
                if all(cell == player for cell in row):
                    return True

        for layer in range(4):
            for col in range(4):
                if all(self.board[layer][row][col] == player for row in range(4)):
                    return True

        for row in range(4):
            for col in range(4):
                if all(self.board[layer][row][col] == player for layer in range(4)):
                    return True

        # TODO
        # Checking the diagonals is more complex in a 3D game and is left as an exercise

        return False

    def step(self, state):
        layer = int(state / 16)
        row = int((state % 16) / 4)
        col = int((state % 16) % 4)

        current_player_symbol = "X" if self.current_player == -1 else "O"

        if self.board[layer][row][col] == " ":
            self.board[layer][row][col] = current_player_symbol
        else:
            return self.board, self.current_player, False, 0

        if self.check_winner(current_player_symbol):
            return (
                self.board,
                self.current_player,
                True,
                -1 if current_player_symbol == "O" else 1,
            )
        elif self.check_draw():
            return self.board, self.current_player, True, 0

        self.current_player *= -1
        return self.board, self.current_player, False, 0

In [4]:
# Initialized as a random policy for player 1


def policy_player1(board):
    possible_actions = []

    for i in range(4):
        for j in range(4):
            for k in range(4):
                if board[i][j][k] == " ":
                    possible_actions.append(i * 16 + j * 4 + k)

    return random.choice(possible_actions)


# Initialized as a random policy for player 2
def policy_player2(board):
    possible_actions = []

    for i in range(4):
        for j in range(4):
            for k in range(4):
                if board[i][j][k] == " ":
                    possible_actions.append(i * 16 + j * 4 + k)

    return random.choice(possible_actions)

In [6]:
def play_one_game(policy_player1, policy_player2):
    tictactoe = TicTacToe4x4x4()

    terminated = 0
    board = [[[" " for _ in range(4)] for _ in range(4)] for _ in range(4)]

    for i in range(32):
        for turn in [-1, 1]:
            action = 0
            if turn == -1:
                action = policy_player1(board)
            else:
                action = policy_player2(board)

            board, player, terminated, reward = tictactoe.step(action)

            # Uncomment this if you want to see the board
            tictactoe.print_board()

            if terminated:
                break

    return -1 * reward  # This is the player who won

In [7]:
def run_alternating_games(games=10):
    results = []
    for i in range(games):
        for j in range(2):
            if j==0:
                winner = play_one_game(policy_player1, policy_player2)

                match winner:
                    case -1:
                        results.append(1)
                    case 1:
                        results.append(2)
                    case 0:
                        results.append(0)

            if j==1:
                winner = play_one_game(policy_player2, policy_player1)

                match winner:
                    case -1:
                        results.append(2)
                    case 1:
                        results.append(1)
                    case 0:
                        results.append(0)


    return results

In [9]:
results = run_alternating_games(2)
print("Draws: ", results.count(0))
print("Player 1 Wins:", results.count(1))
print("Player 2 Wins:", results.count(2))

Layer 1:
┌───┬───┬───┬───┐
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
└───┴───┴───┴───┘

Layer 2:
┌───┬───┬───┬───┐
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │ X │   │
├───┼───┼───┼───┤
│   │   │   │   │
└───┴───┴───┴───┘

Layer 3:
┌───┬───┬───┬───┐
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
└───┴───┴───┴───┘

Layer 4:
┌───┬───┬───┬───┐
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
└───┴───┴───┴───┘
Layer 1:
┌───┬───┬───┬───┐
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
└───┴───┴───┴───┘

Layer 2:
┌───┬───┬───┬───┐
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │   │   │
├───┼───┼───┼───┤
│   │   │ X │   │
├───┼───┼───┼───┤
│   │ 

The TicTacToe game is not complete. It does not check for diagonals. You have to write the code for the diagonal winning conditions.  

You can use any approach to solve this problem. Your solution will be used to compete against other players. Ofcourse you need to store the weights of the neural network / QTable in a separate file.  

You will submit that file and a function that loads those weights and gives the optimal actions given a board state. Your function should input the board state and output an action which will be used to get the next state of the game.  

If you are unable to do anything, just compete the TicTacToe code and have a random action policy. This simple policy might get you pretty good results while competing! You could also add that it avoids stupid mistakes like stopping a player from winning easily. The possibilities are endless! Good Luck!  

Also, write a report of whatever algorithms you used in the whole project for all phases. There is no fixed length. It has to be PDF format. Just explain your ideas as simply as possible without using any jargon.  