In [None]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/UCLA/ECE239/Gomoku-project/')

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku, player_zero
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [None]:
## check running GPU
tf.test.gpu_device_name()

In [68]:
model = player_zero.net(9)

opt = keras.optimizers.Adam(learning_rate=1e-3)
policy_loss = keras.losses.BinaryCrossentropy(name='cross_entropy')
value_loss = keras.losses.MeanSquaredError(name='mse')

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [69]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 9, 9, 1)]    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 9, 9, 64)     640         input[0][0]                      
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 9, 9, 64)     36928       conv1[0][0]                      
__________________________________________________________________________________________________
conv3 (Conv2D)                  (None, 9, 9, 64)     36928       conv2[0][0]                      
____________________________________________________________________________________________

In [70]:
#model.save_weights('day0.h5')

In [4]:
model.load_weights('day0.h5')

# Playtest

In [59]:
## playtest against human
game = gomoku.Gomoku(9)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)

In [49]:
game.play(4, 4)
t = time()
if not game.finished:
    game.play(*p2.play(game))
game.show()
print("time: {0:.3f}".format(time() - t))

black played (4, 4).
white played (3, 1).
black's turn.
   0 1 2 3 4
 0 . . . . . 
 1 . . . ○ . 
 2 . . . . . 
 3 . . . . . 
 4 . . . . ● 
time: 1.983


In [60]:
game.play(4, 0)
p2.tree.updateHead(4*9+0)
game.play(4, 1)
p2.tree.updateHead(4*9+1)

game.play(1, 0)
p2.tree.updateHead(1*9+0)
game.play(1, 1)
p2.tree.updateHead(1*9+1)

game.play(2, 0)
p2.tree.updateHead(2*9+0)
game.play(2, 1)
p2.tree.updateHead(2*9+1)

game.play(3, 0)
p2.tree.updateHead(3*9+0)
# game.play(3, 1)
# p2.tree.updateHead(3*9+1)

# game.play(8, 8)
# p2.tree.updateHead(8*9+8)

game.show()

white played (2, 1).
black played (3, 0).
white's turn.
   0 1 2 3 4
 0 . ● ● ● ● 
 1 . ○ ○ . ○ 
 2 . . . . . 
 3 . . . . . 
 4 . . . . . 


In [71]:
## playtest against self
game = gomoku.Gomoku(9)
p1 = player_zero.ZeroPlayer('p1', +1, game, model)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)
pa, pb = p1, p2

In [72]:
#if not game.finished:
t = time()
while not game.finished:
    game.play(*pa.play(game))
    pa, pb = pb, pa
game.show()
print("time: {0:.3f}".format(time() - t))

white played (0, 8).
black played (6, 5).
game has ended. winner: black
   0 1 2 3 4 5 6 7 8
 0 . ● . . ● . . ● . 
 1 . ○ . . ● . . . . 
 2 . . . . ○ ● . . . 
 3 ○ . ○ . . ○ . . ○ 
 4 ● ● . . ○ . . ● . 
 5 . . . . ● ● ● ● ● 
 6 ○ . . . . ○ . . . 
 7 ● . ● . . ○ . . . 
 8 ○ . . ○ . ○ ○ . . 
time: 50.267


# Self-play games

In [None]:
## play 1000 games

day = 'day0'

for i in range(10):
    suffix = '_' + str(i)
    recorder = player_zero.GameRecorder(day+suffix+'.tfrecords')
    recorder.open()
    print(i, end=': ')
    for j in range(100):
        print(j, end=',')
        game = gomoku.Gomoku(9)
        p1 = player_zero.ZeroPlayer('p1', +1, game, model, recorder)
        p2 = player_zero.ZeroPlayer('p2', -1, game, model, recorder)

        pa, pb = p1, p2
        while not game.finished:
            game.play(*pa.play(game))
            pa, pb = pb, pa
    print()
    recorder.close()

In [38]:
## merge into one dataset
datasets = []
day = 'day0'

for i in range(10):
    suffix = '_' + str(i)
    recorder = player_zero.GameRecorder(day+suffix+'.tfrecords')
    datasets.append(recorder.fetch())

In [39]:
recorder = player_zero.GameRecorder(day+'.tfrecords')
recorder.open()
for ds in datasets:
    for x in ds:
        board = x[0].numpy()
        policy = x[1]['policy'].numpy()
        value = x[1]['value']
        recorder.write(board, policy, value)
recorder.close()

In [40]:
recorder = player_zero.GameRecorder('day0.tfrecords')
data = recorder.fetch()

In [41]:
len(list(data))

33240

# Train NN

In [66]:
recorder = player_zero.GameRecorder('day1.tfrecords')
data1 = recorder.fetch()

In [16]:
board = np.array([x[0] for x in data])
policy = np.array([x[1]['policy'] for x in data])
value = np.array([x[1]['value'] for x in data])

In [17]:
np.argsort(value)[-100]

825

In [18]:
value[825]

1.0

In [9]:
train = data.shuffle(1000).batch(20)

In [10]:
history = model.fit(train, epochs=50)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
model.save_weights('day1.h5')