In [None]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/UCLA/ECE239/Gomoku-project/')

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku, player_basic, player_zero4 as player_zero
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [None]:
## check running GPU
tf.test.gpu_device_name()

In [2]:
## Gobal parameters ==========
SIZE = 6
WIN = 4
N_ITER = 500
## ===========================

In [3]:
model = player_zero.net(SIZE)
model.load_weights('day0.h5')
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 6, 6, 4)]    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 6, 6, 32)     1184        input[0][0]                      
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 6, 6, 64)     18496       conv1[0][0]                      
__________________________________________________________________________________________________
conv3 (Conv2D)                  (None, 6, 6, 128)    73856       conv2[0][0]                      
______________________________________________________________________________________________

In [76]:
#model.save_weights('day0.h5')

# Playtest

**Human vs cp**

In [7]:
game = gomoku.Gomoku(SIZE)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)

In [8]:
## play your move ==========
game.play(4, 4)
## =========================

t = time()
if not game.finished:
    game.play(*p2.play(game, N_ITER))
game.show()
print("time: {0:.3f}".format(time() - t))

black played (4, 4).
white played (2, 2).
black's turn.
   0 1 2 3 4 5
 0 . . . . . . 
 1 . . . . . . 
 2 . . ○ . . . 
 3 . . . . . . 
 4 . . . . ● . 
 5 . . . . . . 
time: 1.732


In [None]:
p2.tree.prev_head.P

**Create a fictional game**

In [6]:
game = gomoku.Gomoku(SIZE)

game.play(1, 2)
game.play(1, 3)

game.play(2, 2)
game.play(2, 3)

game.play(3, 2)
game.play(3, 3)

game.play(4, 2)

p2 = player_zero.ZeroPlayer('p2', +1, game, model, player_zero.PrintRecorder())
p2.tree.temp=.5

game.play(4, 3)

game.show()

black played (4, 2).
white played (4, 3).
black's turn.
   0 1 2 3 4 5
 0 . . . . . . 
 1 . . . . . . 
 2 . ● ● ● ● . 
 3 . ○ ○ ○ ○ . 
 4 . . . . . . 
 5 . . . . . . 


In [None]:
p2.play(game, N_ITER)

In [None]:
p2.tree.prev_head.N

**Cp vs cp**

In [9]:
## playtest against self
game = gomoku.Gomoku(SIZE)
p1 = player_zero.ZeroPlayer('p1', +1, game, model)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)
pa, pb = p1, p2

In [10]:
t = time()
#if not game.finished:
while not game.finished:
    game.play(*pa.play(game, N_ITER))
    pa, pb = pb, pa
game.show()
print("time: {0:.3f}".format(time() - t))

white played (1, 4).
black played (2, 5).
game has ended. winner: black
   0 1 2 3 4 5
 0 . . . ● ○ ○ 
 1 . . ● . . ○ 
 2 . ○ ● . . . 
 3 . . ● . ● . 
 4 . ○ ● ○ ● . 
 5 . . ● . ○ . 
time: 29.565


# Gather self-play games

In [60]:
## ==========
BATCH_SIZE = 100
START = 0
END = 10
PREFIX = 'day0'
## ==========
## save games frequently to prevent data loss
## files are saved as 'PREFIX_i.tfrecords' for i = START, START+1, ..., END-1

for i in range(START, END):
    filename = PREFIX + '_' + str(i) + '.tfrecords'
    with player_zero.GameRecorder(filename, SIZE) as recorder:
        print("batch {0:2d}: ".format(i), end='')
        for j in range(BATCH_SIZE):
            print(j, end=',')
            game = gomoku.Gomoku(SIZE)
            p1 = player_zero.ZeroPlayer('p1', +1, game, model, recorder)
            p2 = player_zero.ZeroPlayer('p2', -1, game, model, recorder)

            pa, pb = p1, p2
            while not game.finished:
                game.play(*pa.play(game, N_ITER))
                pa, pb = pb, pa
        print()

batch  0: 0,


Examine games collected

In [61]:
recorder = player_zero.GameRecorder('day0_0.tfrecords', SIZE)
data = recorder.fetch()

In [62]:
len(list(data))

59

In [63]:
board = np.array([x[0] for x in data])
policy = np.array([x[1]['policy'] for x in data])
value = np.array([x[1]['value'] for x in data])

In [None]:
i = 0
print(board[i][..., 0])
print(board[i][..., 1])
print(board[i][..., 2])
print(board[i][..., 3])
print(policy[i])
print(value[i])

# Merge data and augment

In [None]:
## merge into one dataset==============
innames = [
    'day0_0.tfrecords',
    'day0_1.tfrecords',
    'day0_2.tfrecords',
    'day0_3.tfrecords',
]
outname = 'day0.tfrecord'
## ====================================

datasets = []

for inname in innames:
    recorder = player_zero.GameRecorder(inname, SIZE)
    datasets.append(recorder.fetch())

with player_zero.GameRecorder(outname, SIZE) as recorder:
    for ds in datasets:
        for x in ds:
            board = x[0].numpy()
            policy = x[1]['policy'].numpy()
            value = x[1]['value']
            recorder.write(board, policy, value)

In [None]:
## augment dataset=====================
innames = [
    'day0_1.tfrecords',
    'day0_1.tfrecords',
    'day0_2.tfrecords',
    'day0_3.tfrecords',
]
outname = 'day0aug.tfrecord'
## ====================================

datasets = []

for inname in innames:
    recorder = player_zero.GameRecorder(inname, SIZE)
    datasets.append(recorder.fetch())

with player_zero.GameRecorder('day0aug.tfrecords', SIZE) as recorder:
    for ds in datasets:
        for x in ds:
            board = x[0].numpy()
            policy = x[1]['policy'].numpy().reshape(SIZE, SIZE) # make into square
            value = x[1]['value']
            ## rotate
            recorder.write(board, policy.flatten(), value)
            recorder.write(np.rot90(board, k=1), np.rot90(policy, k=1).flatten(), value)
            recorder.write(np.rot90(board, k=2), np.rot90(policy, k=2).flatten(), value)
            recorder.write(np.rot90(board, k=3), np.rot90(policy, k=3).flatten(), value)
            ## flip
            board = np.flipud(board)
            policy = np.flipud(policy)
            recorder.write(board, policy.flatten(), value)
            recorder.write(np.rot90(board, k=1), np.rot90(policy, k=1).flatten(), value)
            recorder.write(np.rot90(board, k=2), np.rot90(policy, k=2).flatten(), value)
            recorder.write(np.rot90(board, k=3), np.rot90(policy, k=3).flatten(), value)

# Train NN

In [None]:
def cross_entropy(y_true, y_pred):
    return - tf.reduce_mean(y_true * tf.math.log(y_pred + 1e-10), axis=-1)

opt = keras.optimizers.Adam(learning_rate=1e-3)
#policy_loss = keras.losses.BinaryCrossentropy()
policy_loss = cross_entropy
value_loss = keras.losses.MeanSquaredError()

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [None]:
recorder = player_zero.GameRecorder('day0.tfrecords', SIZE)
data = recorder.fetch()

In [None]:
train = data.shuffle(10000).batch(10)

In [None]:
history = model.fit(train, epochs=30)

In [None]:
model.save_weights('day1.h5')