In [6]:
!git clone https://github.com/nishantkushwaha-1999/AlphaZero_C4.git
!pip install torchinfo
!pip install pygame

Cloning into 'AlphaZero_C4'...
remote: Enumerating objects: 33, done.[K
remote: Counting objects: 100% (33/33), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 33 (delta 7), reused 25 (delta 4), pack-reused 0[K
Receiving objects: 100% (33/33), 39.27 KiB | 3.57 MiB/s, done.
Resolving deltas: 100% (7/7), done.


In [3]:
import os
path = os.getcwd() + "/AlphaZero_C4"
os.chdir(path)
print(path)

In [None]:
from google.colab import drive
drive.mount('./drive', force_remount=True)

In [1]:
import numpy as np
import torch
from Game.C4 import Game_C4
from AlphaZero.model import ResNet_C4
from AlphaZero.AlphaZero import AlphaZero
from torchinfo import summary

pygame-ce 2.4.1 (SDL 2.28.5, Python 3.10.11)


# Train AlphaZero_C4

### Train from Scratch

In [2]:
game_c4 = Game_C4()

args = {
    'board_dim': (game_c4.rows, game_c4.columns),
    'n_actions': game_c4.action_size,
    'n_res_blocks': 10,
    'n_hidden': 64,
    'C': 3,
    'lr': 0.01,
    'weight_decay': 0.001,
    'num_searches': 800,
    'n_iters': 50,
    'n_selfPlay': 150,
    'n_parallel': True,
    'n_parallel_games': 50,
    'epochs': 4,
    'batch_size': 128,
    'temperature': 1.2,
    'dirichlet_epsilon': 0.25,
    'dirichlet_alpha': 0.65
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet_C4(
    board_dim=args['board_dim'],
    n_actions=args['n_actions'],
    n_res=args['n_res_blocks'],
    n_hidden=args['n_hidden'],
    device=device
)
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])
architecture = summary(model, input_size=(args['batch_size'], 3, game_c4.rows, game_c4.columns), verbose=0)
print(architecture)

Layer (type:depth-idx)                   Output Shape              Param #
ResNet_C4                                [128, 1]                  --
├─Sequential: 1-1                        [128, 16, 6, 7]           --
│    └─Conv2d: 2-1                       [128, 16, 6, 7]           448
│    └─BatchNorm2d: 2-2                  [128, 16, 6, 7]           32
│    └─ReLU: 2-3                         [128, 16, 6, 7]           --
├─ModuleList: 1-2                        --                        --
│    └─ResBlock: 2-4                     [128, 16, 6, 7]           --
│    │    └─Conv2d: 3-1                  [128, 16, 6, 7]           2,320
│    │    └─BatchNorm2d: 3-2             [128, 16, 6, 7]           32
│    │    └─Conv2d: 3-3                  [128, 16, 6, 7]           2,320
│    │    └─BatchNorm2d: 3-4             [128, 16, 6, 7]           32
├─Sequential: 1-3                        [128, 1]                  --
│    └─Conv2d: 2-5                       [128, 32, 6, 7]           4,640
│    

In [4]:
alphazero = AlphaZero()
alphazero.learn(model, optimizer, game_c4, args, 
                # save_path=""
                save_path="/content/AlphaZero_C4/drive/MyDrive/AlphaZero_C4/saved_models", 
                n_parallel=args['n_parallel'])

Initializing...
Iter 1 of 1
Self Play: Playing 1 games parallely


21 fills remaining: : 100%|██████████| 1/1 [00:02<00:00,  2.09s/it]


Model Train:
Training on 22 game states


Epoch 0/1 - batch_loss: 3.8608224391937256: 100%|██████████| 1/1 [00:00<00:00, 71.73it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_1_Connect_4.pt


### Load & Train

In [2]:
game_c4 = Game_C4()

alphazero = AlphaZero()
alphazero.load_and_resume(game_c4, "/Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/", train=True)

Resuming Training:
Iter 2 of 10
Self Play: Playing 1 games parallely


30 fills remaining: : 100%|██████████| 1/1 [00:01<00:00,  1.39s/it]


Model Train:
Training on 15 game states


Epoch 0/1 - batch_loss: 6.415152072906494: 100%|██████████| 1/1 [00:00<00:00, 37.10it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_2_Connect_4.pt
Iter 3 of 10
Self Play: Playing 1 games parallely


26 fills remaining: : 100%|██████████| 1/1 [00:01<00:00,  1.47s/it]


Model Train:
Training on 17 game states


Epoch 0/1 - batch_loss: 7.175382137298584: 100%|██████████| 1/1 [00:00<00:00, 67.52it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_3_Connect_4.pt
Iter 4 of 10
Self Play: Playing 1 games parallely


26 fills remaining: : 100%|██████████| 1/1 [00:01<00:00,  1.46s/it]


Model Train:
Training on 17 game states


Epoch 0/1 - batch_loss: 5.056169509887695: 100%|██████████| 1/1 [00:00<00:00, 81.18it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_4_Connect_4.pt
Iter 5 of 10
Self Play: Playing 1 games parallely


25 fills remaining: : 100%|██████████| 1/1 [00:02<00:00,  2.52s/it]


Model Train:
Training on 26 game states


Epoch 0/1 - batch_loss: 5.144381046295166: 100%|██████████| 1/1 [00:00<00:00, 61.80it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_5_Connect_4.pt
Iter 6 of 10
Self Play: Playing 1 games parallely


29 fills remaining: : 100%|██████████| 1/1 [00:01<00:00,  1.88s/it]


Model Train:
Training on 18 game states


Epoch 0/1 - batch_loss: 5.300870895385742: 100%|██████████| 1/1 [00:00<00:00, 76.39it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_6_Connect_4.pt
Iter 7 of 10
Self Play: Playing 1 games parallely


26 fills remaining: : 100%|██████████| 1/1 [00:01<00:00,  1.95s/it]


Model Train:
Training on 20 game states


Epoch 0/1 - batch_loss: 4.695318698883057: 100%|██████████| 1/1 [00:00<00:00, 74.85it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_7_Connect_4.pt
Iter 8 of 10
Self Play: Playing 1 games parallely


31 fills remaining: : 100%|██████████| 1/1 [00:01<00:00,  1.41s/it]


Model Train:
Training on 13 game states


Epoch 0/1 - batch_loss: 4.594738483428955: 100%|██████████| 1/1 [00:00<00:00, 144.57it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_8_Connect_4.pt
Iter 9 of 10
Self Play: Playing 1 games parallely


25 fills remaining: : 100%|██████████| 1/1 [00:01<00:00,  1.76s/it]


Model Train:
Training on 18 game states


Epoch 0/1 - batch_loss: 4.268261909484863: 100%|██████████| 1/1 [00:00<00:00, 81.70it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_9_Connect_4.pt
Iter 10 of 10
Self Play: Playing 1 games parallely


23 fills remaining: : 100%|██████████| 1/1 [00:02<00:00,  2.04s/it]


Model Train:
Training on 20 game states


Epoch 0/1 - batch_loss: 4.010201930999756: 100%|██████████| 1/1 [00:00<00:00, 70.23it/s]


Model saved at: /Volumes/Storage/Git Repos/AlphaZero_C4/saved_models/****_10_Connect_4.pt
