In [None]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/UCLA/ECE239/Gomoku-project/')

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku4, player_basic, player_zero
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [None]:
## check running GPU
tf.test.gpu_device_name()

In [2]:
model = player_zero.net(6)

opt = keras.optimizers.Adam(learning_rate=1e-3)
policy_loss = keras.losses.BinaryCrossentropy(name='cross_entropy')
value_loss = keras.losses.MeanSquaredError(name='mse')

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [3]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 6, 6, 1)]    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 6, 6, 64)     640         input[0][0]                      
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 6, 6, 64)     36928       conv1[0][0]                      
__________________________________________________________________________________________________
conv3 (Conv2D)                  (None, 6, 6, 64)     36928       conv2[0][0]                      
______________________________________________________________________________________________

In [27]:
#model.save_weights('day0.h5')

In [4]:
model.load_weights('day1.h5')

# Playtest

In [18]:
## playtest against human
game = gomoku4.Gomoku4(6)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)

In [19]:
game.play(1, 2)
t = time()
if not game.finished:
    game.play(*p2.play(game))
game.show()
print("time: {0:.3f}".format(time() - t))

white played (4, 1).
black played (1, 2).
game has ended. winner: black
   0 1 2 3 4 5
 0 . ○ . . . . 
 1 . . . . ○ ○ 
 2 . ● ● ● ● . 
 3 . . . . . . 
 4 . . . . . . 
 5 . . . . . . 
time: 0.004


In [19]:
game.play(2, 2)
p2.tree.updateHead(2*6+2)
game.play(2, 3)
p2.tree.updateHead(2*6+3)

game.play(3, 2)
p2.tree.updateHead(3*6+2)
game.play(3, 3)
p2.tree.updateHead(3*6+3)

# game.play(4, 2)
# p2.tree.updateHead(4*6+2)
# game.play(4, 3)
# p2.tree.updateHead(4*6+3)

game.show()

black played (4, 2).
white played (4, 3).
black's turn.
   0 1 2 3 4 5
 0 . . . . . . 
 1 . . . . . . 
 2 . . ● ● ● . 
 3 . . ○ ○ ○ . 
 4 . . . . . . 
 5 . . . . . . 


In [20]:
p2.play(game)

(5, 2)

In [21]:
p2.tree.prev_head.P

array([0.00624203, 0.00266773, 0.02310409, 0.00296424, 0.00310789,
       0.00491623, 0.00463357, 0.01427532, 0.33325308, 0.01495194,
       0.01600388, 0.01115462, 0.00199637, 0.00263356, 0.00800716,
       0.00556969, 0.01367661, 0.00409513, 0.0110047 , 0.00835468,
       0.00426882, 0.00153794, 0.0024741 , 0.01886619, 0.00280085,
       0.00454124, 0.00381445, 0.00438922, 0.00835988, 0.01443459,
       0.01301124, 0.00666254, 0.4126034 , 0.0063967 , 0.00113718,
       0.0020892 ])

In [57]:
p2.tree.prev_head.N

array([31., 44., 24., 30., 37., 32., 28., 37., 33., 29., 25., 26., 27.,
       33.,  0.,  0., 28., 42., 33., 25.,  0.,  0., 29., 26., 35., 29.,
       27., 45., 28., 49., 30., 26., 33., 24., 29., 26.], dtype=float32)

In [64]:
## playtest against self
game = gomoku4.Gomoku4(6)
p1 = player_zero.ZeroPlayer('p1', +1, game, model)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)
pa, pb = p1, p2

In [65]:
#if not game.finished:
t = time()
while not game.finished:
    game.play(*pa.play(game, n_iter=1000))
    pa, pb = pb, pa
game.show()
print("time: {0:.3f}".format(time() - t))

black played (0, 5).
white played (4, 2).
game has ended. winner: white
   0 1 2 3 4 5
 0 . . . . . . 
 1 ● . ● . . ○ 
 2 . . . . ○ . 
 3 ● . ● . ○ . 
 4 . . . . ○ . 
 5 ● . . . ○ . 
time: 37.953


# Self-play games

In [38]:
## play 1000 games

day = 'day0'

for i in range(10):
    try:
        suffix = '_' + str(i)
        recorder = player_zero.GameRecorder(day+suffix+'.tfrecords', size=6)
        recorder.open()
        print(i, end=': ')
        for j in range(100):
            print(j, end=',')
            game = gomoku4.Gomoku4(6)
            p1 = player_zero.ZeroPlayer('p1', +1, game, model, recorder)
            p2 = player_zero.ZeroPlayer('p2', -1, game, model, recorder)

            pa, pb = p1, p2
            while not game.finished:
                game.play(*pa.play(game))
                pa, pb = pb, pa
        print()
        recorder.close()
    except:
        recorder.close()

0: 0,


In [37]:
recorder.close()

# Augment dataset

In [103]:
## merge into one dataset
datasets = []
day = 'day0'

for i in range(12):
    suffix = '_' + str(i)
    recorder = player_zero.GameRecorder(day+suffix+'.tfrecords', size=6)
    datasets.append(recorder.fetch())

In [128]:
recorder = player_zero.GameRecorder(day+'.tfrecords', size=6)
recorder.open(overwrite=True)
for ds in datasets:
    for x in ds:
        board = x[0].numpy()
        policy = x[1]['policy'].numpy()
        value = x[1]['value']
        recorder.write(board, policy, value)
        ## add rotated and flipped
        policy_square = policy.reshape(6, 6)
        recorder.write(np.rot90(board, k=1), np.rot90(policy_square, k=1).flatten(), value)
        recorder.write(np.rot90(board, k=2), np.rot90(policy_square, k=2).flatten(), value)
        recorder.write(np.rot90(board, k=3), np.rot90(policy_square, k=3).flatten(), value)
        recorder.write(np.flipud(board), np.flipud(policy_square).flatten(), value)
        recorder.write(np.rot90(np.flipud(board), k=1), np.rot90(np.flipud(policy_square), k=1).flatten(), value)
        recorder.write(np.rot90(np.flipud(board), k=2), np.rot90(np.flipud(policy_square), k=2).flatten(), value)
        recorder.write(np.rot90(np.flipud(board), k=3), np.rot90(np.flipud(policy_square), k=3).flatten(), value)
        
recorder.close()

In [129]:
recorder = player_zero.GameRecorder('day0.tfrecords', size=6)
data = recorder.fetch()

In [130]:
len(list(data))

136760

In [123]:
board = np.array([x[0] for x in data])
policy = np.array([x[1]['policy'] for x in data])
value = np.array([x[1]['value'] for x in data])

In [125]:
policy_square = policy[0].reshape(6, 6)

In [127]:
policy_square.flatten()

array([0.03 , 0.022, 0.03 , 0.024, 0.03 , 0.03 , 0.032, 0.024, 0.028,
       0.026, 0.028, 0.026, 0.024, 0.046, 0.022, 0.028, 0.022, 0.026,
       0.034, 0.026, 0.032, 0.022, 0.026, 0.024, 0.024, 0.026, 0.042,
       0.026, 0.022, 0.024, 0.03 , 0.03 , 0.022, 0.026, 0.03 , 0.036],
      dtype=float32)

In [102]:
np.rot90(x, k=0)[..., 0]

array([[ 0,  0, -1,  0,  0, -1],
       [ 0, -1,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [-1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  1,  1,  0,  1,  0]], dtype=int8)

# Train NN

In [104]:
recorder = player_zero.GameRecorder('day0.tfrecords')
data = recorder.fetch()

In [105]:
train = data.shuffle(1000).batch(10)

In [None]:
history = model.fit(train, epochs=30)

In [79]:
model.save_weights('day1.h5')