In [1]:
!dir

README.md    data	      game.py  self_play.py
__pycache__  dual_network.py  mcts.py  term.ipynb


In [75]:
import pickle

def visualize_board(board_state, step):
    player_pieces, enemy_pieces = board_state


    for i in range(3):
        row_visual = '    '

        for j in range(3):
            index = i * 3 + j
            if step % 2 != 0:
                if player_pieces[index] == 1:
                    row_visual += ' X '
                elif enemy_pieces[index] == 1:
                    row_visual += ' O '
                else:
                    row_visual += ' · '
            else:
                if player_pieces[index] == 1:
                    row_visual += ' O '
                elif enemy_pieces[index] == 1:
                    row_visual += ' X '
                else:
                    row_visual += ' · '

            

        print(row_visual)

def read_history(path):
    with open(path, 'rb') as f:
        data = pickle.load(f)

    for i, (board, policies, value) in enumerate(data):
        print(f'Step: {i+1}')
        print(f'    Board State: {board}')
        visualize_board(board, i)
        formatted_policies = ', '.join(f'{p:.2f}' for p in policies)
        print(f'    Policies: [{formatted_policies}]')
        print(f'    Value: {value}')


In [76]:
path = './data/20231117155837.history'
read_history(path)

Step: 1
    Board State: [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]]
     ·  ·  · 
     ·  ·  · 
     ·  ·  · 
    Policies: [0.14, 0.12, 0.10, 0.10, 0.10, 0.10, 0.10, 0.12, 0.10]
    Value: -1
Step: 2
    Board State: [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0]]
     ·  O  · 
     ·  ·  · 
     ·  ·  · 
    Policies: [0.16, 0.00, 0.12, 0.12, 0.12, 0.12, 0.10, 0.12, 0.12]
    Value: 1
Step: 3
    Board State: [[0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0]]
     ·  O  · 
     ·  ·  X 
     ·  ·  · 
    Policies: [0.33, 0.00, 0.12, 0.10, 0.12, 0.00, 0.10, 0.12, 0.10]
    Value: -1
Step: 4
    Board State: [[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0]]
     ·  O  · 
     ·  ·  X 
     O  ·  · 
    Policies: [0.22, 0.00, 0.14, 0.16, 0.14, 0.00, 0.00, 0.14, 0.18]
    Value: 1
Step: 5
    Board State: [[0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 1, 0, 0, 0]]
     ·  O  · 
     ·  X  X 
     O  ·  · 
    Policies: [0.08, 0.00, 0.10, 0.61, 0

In [3]:
import torch
from torchsummary import summary
from dual_network import DualNetwork

model = DualNetwork(num_residual_block=16, num_filters=128)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

summary(model, input_size = (3, 3, 2))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 128, 3, 2]           3,456
       BatchNorm2d-2            [-1, 128, 3, 2]             256
              ReLU-3            [-1, 128, 3, 2]               0
            Conv2d-4            [-1, 128, 3, 2]         147,456
       BatchNorm2d-5            [-1, 128, 3, 2]             256
              ReLU-6            [-1, 128, 3, 2]               0
            Conv2d-7            [-1, 128, 3, 2]         147,456
       BatchNorm2d-8            [-1, 128, 3, 2]             256
              ReLU-9            [-1, 128, 3, 2]               0
    ResidualBlock-10            [-1, 128, 3, 2]               0
           Conv2d-11            [-1, 128, 3, 2]         147,456
      BatchNorm2d-12            [-1, 128, 3, 2]             256
             ReLU-13            [-1, 128, 3, 2]               0
           Conv2d-14            [-1, 12