In [None]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/UCLA/ECE239/Gomoku-project/')

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku4, player_basic, player_zero
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [None]:
## check running GPU
tf.test.gpu_device_name()

In [2]:
model = player_zero.net(6)

opt = keras.optimizers.Adam(learning_rate=1e-3)
policy_loss = keras.losses.BinaryCrossentropy(name='cross_entropy')
value_loss = keras.losses.MeanSquaredError(name='mse')

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [3]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 6, 6, 1)]    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 6, 6, 64)     640         input[0][0]                      
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 6, 6, 64)     36928       conv1[0][0]                      
__________________________________________________________________________________________________
conv3 (Conv2D)                  (None, 6, 6, 64)     36928       conv2[0][0]                      
______________________________________________________________________________________________

In [27]:
#model.save_weights('day0.h5')

In [5]:
model.load_weights('day0.h5')

# Playtest

In [54]:
## playtest against human
game = gomoku4.Gomoku4(6)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)

In [19]:
game.play(1, 2)
t = time()
if not game.finished:
    game.play(*p2.play(game))
game.show()
print("time: {0:.3f}".format(time() - t))

white played (4, 1).
black played (1, 2).
game has ended. winner: black
   0 1 2 3 4 5
 0 . ○ . . . . 
 1 . . . . ○ ○ 
 2 . ● ● ● ● . 
 3 . . . . . . 
 4 . . . . . . 
 5 . . . . . . 
time: 0.004


In [55]:
game.play(2, 2)
p2.tree.updateHead(2*6+2)
game.play(2, 3)
p2.tree.updateHead(2*6+3)

game.play(3, 2)
p2.tree.updateHead(3*6+2)
game.play(3, 3)
p2.tree.updateHead(3*6+3)

# game.play(4, 2)
# p2.tree.updateHead(4*6+2)
# game.play(4, 3)
# p2.tree.updateHead(4*6+3)

game.show()

black played (3, 2).
white played (3, 3).
black's turn.
   0 1 2 3 4 5
 0 . . . . . . 
 1 . . . . . . 
 2 . . ● ● . . 
 3 . . ○ ○ . . 
 4 . . . . . . 
 5 . . . . . . 


In [56]:
p2.play(game, n_iter)

(2, 5)

In [57]:
p2.tree.prev_head.N

array([31., 44., 24., 30., 37., 32., 28., 37., 33., 29., 25., 26., 27.,
       33.,  0.,  0., 28., 42., 33., 25.,  0.,  0., 29., 26., 35., 29.,
       27., 45., 28., 49., 30., 26., 33., 24., 29., 26.], dtype=float32)

In [64]:
## playtest against self
game = gomoku4.Gomoku4(6)
p1 = player_zero.ZeroPlayer('p1', +1, game, model)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)
pa, pb = p1, p2

In [65]:
#if not game.finished:
t = time()
while not game.finished:
    game.play(*pa.play(game, n_iter=1000))
    pa, pb = pb, pa
game.show()
print("time: {0:.3f}".format(time() - t))

black played (0, 5).
white played (4, 2).
game has ended. winner: white
   0 1 2 3 4 5
 0 . . . . . . 
 1 ● . ● . . ○ 
 2 . . . . ○ . 
 3 ● . ● . ○ . 
 4 . . . . ○ . 
 5 ● . . . ○ . 
time: 37.953


# Self-play games

In [38]:
## play 1000 games

day = 'day0'

for i in range(10):
    try:
        suffix = '_' + str(i)
        recorder = player_zero.GameRecorder(day+suffix+'.tfrecords', size=6)
        recorder.open()
        print(i, end=': ')
        for j in range(100):
            print(j, end=',')
            game = gomoku4.Gomoku4(6)
            p1 = player_zero.ZeroPlayer('p1', +1, game, model, recorder)
            p2 = player_zero.ZeroPlayer('p2', -1, game, model, recorder)

            pa, pb = p1, p2
            while not game.finished:
                game.play(*pa.play(game))
                pa, pb = pb, pa
        print()
        recorder.close()
    except:
        recorder.close()

0: 0,


In [37]:
recorder.close()

In [112]:
## merge into one dataset
datasets = []
day = 'day0'

for i in range(11):
    suffix = '_' + str(i)
    recorder = player_zero.GameRecorder(day+suffix+'.tfrecords', size=6)
    datasets.append(recorder.fetch())

In [113]:
recorder = player_zero.GameRecorder(day+'.tfrecords', size=6)
recorder.open(overwrite=True)
for ds in datasets:
    for x in ds:
        board = x[0].numpy()
        policy = x[1]['policy'].numpy()
        value = x[1]['value']
        recorder.write(board, policy, value)
recorder.close()

In [41]:
recorder = player_zero.GameRecorder('day0_0.tfrecords', size=6)
data = recorder.fetch()

In [42]:
len(list(data))

7

In [43]:
board = np.array([x[0] for x in data])
policy = np.array([x[1]['policy'] for x in data])
value = np.array([x[1]['value'] for x in data])

# Train NN

In [104]:
recorder = player_zero.GameRecorder('day0.tfrecords')
data = recorder.fetch()

In [105]:
train = data.shuffle(1000).batch(10)

In [None]:
history = model.fit(train, epochs=30)

In [79]:
model.save_weights('day1.h5')