In [1]:
import torch
from Game.C4 import Game_C4
from AlphaZero.model import ResNet_C4
from AlphaZero.AlphaZero import AlphaZero
from torchinfo import summary

pygame 2.5.2 (SDL 2.28.3, Python 3.10.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


# Train AlphaZero on Connect4

In [3]:
game_c4 = Game_C4()

args = {
    'board_dim': (game_c4.rows, game_c4.columns), 
    'n_actions': game_c4.action_size,
    'n_res_blocks': 9,
    'n_hidden': 128,
    'C': 2,
    'num_searches': 10,
    'n_iters': 2,
    'n_selfPlay': 10,
    'epochs': 2,
    'batch_size': 2,
    'temperature': 1.25,
    'dirichlet_epsilon': 0.25,
    'dirichlet_alpha': 0.03
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet_C4(
    board_dim=args['board_dim'], 
    n_actions=args['n_actions'],
    n_res=args['n_res_blocks'],
    n_hidden=args['n_hidden'],
    device=device
)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
architecture = summary(model, input_size=(args['batch_size'], 3, game_c4.rows, game_c4.columns), verbose=0)
print(architecture)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet_C4                                [2, 1]                    --
├─Sequential: 1-1                        [2, 128, 6, 7]            --
│    └─Conv2d: 2-1                       [2, 128, 6, 7]            3,584
│    └─BatchNorm2d: 2-2                  [2, 128, 6, 7]            256
│    └─ReLU: 2-3                         [2, 128, 6, 7]            --
├─ModuleList: 1-2                        --                        --
│    └─ResBlock: 2-4                     [2, 128, 6, 7]            --
│    │    └─Conv2d: 3-1                  [2, 128, 6, 7]            147,584
│    │    └─BatchNorm2d: 3-2             [2, 128, 6, 7]            256
│    │    └─Conv2d: 3-3                  [2, 128, 6, 7]            147,584
│    │    └─BatchNorm2d: 3-4             [2, 128, 6, 7]            256
│    └─ResBlock: 2-5                     [2, 128, 6, 7]            --
│    │    └─Conv2d: 3-5                  [2, 128, 6, 7]            14

In [None]:
alphazero = AlphaZero(model, optimizer, game_c4, args)
alphazero.learn()

In [17]:
state = np.array(game_c4.initialize())
state = game_c4.get_encoded_state(state)
state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0) 
state

tensor([[[[0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.]],

         [[1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1., 1., 1.]],

         [[0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0.]]]])