In [None]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/UCLA/ECE239/Gomoku-project/')

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku, player_basic, player_zero
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [None]:
## check running GPU
tf.test.gpu_device_name()

In [2]:
model = player_zero.net(9)

opt = keras.optimizers.Adam(learning_rate=1e-3)
policy_loss = keras.losses.BinaryCrossentropy(name='cross_entropy')
value_loss = keras.losses.MeanSquaredError(name='mse')

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [3]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 9, 9, 1)]    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 9, 9, 64)     640         input[0][0]                      
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 9, 9, 64)     36928       conv1[0][0]                      
__________________________________________________________________________________________________
conv3 (Conv2D)                  (None, 9, 9, 64)     36928       conv2[0][0]                      
______________________________________________________________________________________________

In [4]:
#model.save_weights('day0.h5')

In [5]:
model.load_weights('day1.h5')

# Playtest

In [107]:
## playtest against human
game = gomoku.Gomoku(9)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)

In [94]:
game.play(4, 4)
t = time()
if not game.finished:
    game.play(*p2.play(game))
game.show()
print("time: {0:.3f}".format(time() - t))

black played (4, 4).
white played (0, 1).
black's turn.
   0 1 2 3 4 5 6 7 8
 0 . . . . . . . . . 
 1 ○ . . . . . . . . 
 2 . . . . . . . . . 
 3 . . . . . . . . . 
 4 . . . . ● . . . . 
 5 . . . . . . . . . 
 6 . . . . . . . . . 
 7 . . . . . . . . . 
 8 . . . . . . . . . 
time: 2.224


In [108]:
game.play(3, 3)
p2.tree.updateHead(3*9+3)
game.play(3, 4)
p2.tree.updateHead(3*9+4)

game.play(4, 3)
p2.tree.updateHead(4*9+3)
game.play(4, 4)
p2.tree.updateHead(4*9+4)

game.play(5, 3)
p2.tree.updateHead(5*9+3)
game.play(5, 4)
p2.tree.updateHead(5*9+4)

game.play(6, 3)
p2.tree.updateHead(6*9+3)
game.play(6, 4)
p2.tree.updateHead(6*9+4)

# game.play(8, 8)
# p2.tree.updateHead(8*9+8)

game.show()

black played (6, 3).
white played (6, 4).
black's turn.
   0 1 2 3 4 5 6 7 8
 0 . . . . . . . . . 
 1 . . . . . . . . . 
 2 . . . . . . . . . 
 3 . . . ● ● ● ● . . 
 4 . . . ○ ○ ○ ○ . . 
 5 . . . . . . . . . 
 6 . . . . . . . . . 
 7 . . . . . . . . . 
 8 . . . . . . . . . 


In [109]:
p2.play(game)

(3, 0)

In [111]:
np.argsort(p2.tree.prev_head.P)

array([39, 29, 74, 72, 28, 41, 65, 34, 32,  2,  7, 62, 33, 15, 50, 77,  3,
       25, 64, 71, 63, 45,  5, 61,  8, 60, 78, 59, 31, 55, 38, 73, 69, 54,
       26, 24, 80, 46, 36, 35,  0, 11, 17, 23, 10, 49, 53, 20, 52, 75, 79,
       30, 40,  9, 22, 68, 76, 21, 12, 43, 27,  4, 16, 56, 57, 18, 14, 67,
       66, 47, 13, 58, 42, 48, 51, 19, 37,  1, 70,  6, 44])

In [6]:
## playtest against self
game = gomoku.Gomoku(9)
p1 = player_zero.ZeroPlayer('p1', +1, game, model)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)
pa, pb = p1, p2

In [7]:
#if not game.finished:
t = time()
while not game.finished:
    game.play(*pa.play(game))
    pa, pb = pb, pa
game.show()
print("time: {0:.3f}".format(time() - t))

black played (0, 4).
white played (6, 0).
game has ended. winner: white
   0 1 2 3 4 5 6 7 8
 0 . ● ○ ○ ○ ○ ○ . . 
 1 ● . . . ○ ● . ○ ● 
 2 ○ ● . . ● . . . . 
 3 . ● . . . . . . . 
 4 ● . ○ ○ . ● . . . 
 5 ● . . . . . . . . 
 6 . . ○ . . . . ○ . 
 7 ● . ● . ○ . ○ . . 
 8 . ● ○ ● ○ . ● ● . 
time: 59.261


# Self-play games

In [None]:
## play 1000 games

day = 'day0'

for i in range(10):
    suffix = '_' + str(i)
    recorder = player_zero.GameRecorder(day+suffix+'.tfrecords')
    recorder.open()
    print(i, end=': ')
    for j in range(100):
        print(j, end=',')
        game = gomoku.Gomoku(9)
        p1 = player_zero.ZeroPlayer('p1', +1, game, model, recorder)
        p2 = player_zero.ZeroPlayer('p2', -1, game, model, recorder)

        pa, pb = p1, p2
        while not game.finished:
            game.play(*pa.play(game))
            pa, pb = pb, pa
    print()
    recorder.close()

In [112]:
## merge into one dataset
datasets = []
day = 'day0'

for i in range(11):
    suffix = '_' + str(i)
    recorder = player_zero.GameRecorder(day+suffix+'.tfrecords')
    datasets.append(recorder.fetch())

In [113]:
recorder = player_zero.GameRecorder(day+'.tfrecords')
recorder.open(overwrite=True)
for ds in datasets:
    for x in ds:
        board = x[0].numpy()
        policy = x[1]['policy'].numpy()
        value = x[1]['value']
        recorder.write(board, policy, value)
recorder.close()

In [8]:
recorder = player_zero.GameRecorder('day0.tfrecords')
data = recorder.fetch()

In [9]:
len(list(data))

36929

In [10]:
board = np.array([x[0] for x in data])
policy = np.array([x[1]['policy'] for x in data])
value = np.array([x[1]['value'] for x in data])

# Train NN

In [104]:
recorder = player_zero.GameRecorder('day0.tfrecords')
data = recorder.fetch()

In [105]:
train = data.shuffle(1000).batch(10)

In [None]:
history = model.fit(train, epochs=30)

In [79]:
model.save_weights('day1.h5')