In [None]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/UCLA/ECE239/Gomoku-project/')

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku, player_basic, player_zero
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [None]:
## check running GPU
tf.test.gpu_device_name()

In [2]:
## Gobal parameters ==========
SIZE = 9
WIN = 5
N_ITER = 1000
## ===========================

In [3]:
model = player_zero.net(SIZE, l2=1e-5)
#model.load_weights('day0.h5')
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 9, 9, 1)]    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 9, 9, 64)     640         input[0][0]                      
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 9, 9, 64)     36928       conv1[0][0]                      
__________________________________________________________________________________________________
conv3 (Conv2D)                  (None, 9, 9, 64)     36928       conv2[0][0]                      
______________________________________________________________________________________________

# Playtest

**Human vs cp**

In [None]:
game = gomoku.Gomoku(SIZE)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)

In [None]:
## play your move ==========
game.play(4, 4)
## =========================

t = time()
if not game.finished:
    game.play(*p2.play(game, N_ITER))
game.show()
print("time: {0:.3f}".format(time() - t))

**Create a fictional game**

In [None]:
game = gomoku.Gomoku(SIZE)

game.play(0, 0)
game.play(0, 1)

game.play(1, 0)
game.play(1, 1)

game.play(2, 0)
game.play(2, 1)

game.play(3, 0)
#game.play(3, 1)

game.show()

p2 = player_zero.ZeroPlayer('p2', -1, game, model, player_zero.PrintRecorder())

In [None]:
p2.play(game, 2000)

In [None]:
p2.tree.prev_head.N

**Cp vs cp**

In [4]:
## playtest against self
game = gomoku.Gomoku(SIZE)
p1 = player_zero.ZeroPlayer('p1', +1, game, model)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)
pa, pb = p1, p2

In [5]:
t = time()
#if not game.finished:
while not game.finished:
    game.play(*pa.play(game, N_ITER))
    pa, pb = pb, pa
game.show()
print("time: {0:.3f}".format(time() - t))

white played (1, 5).
black played (7, 2).
game has ended. winner: black
   0 1 2 3 4 5 6 7 8
 0 ○ ○ . ● ○ . . . ● 
 1 . . ● . . . . ○ . 
 2 . ○ . . ● ● ● ● . 
 3 . ● . . ○ . ● ○ . 
 4 . . ○ ● ○ ● . . . 
 5 . ○ ○ . ● . ● . ○ 
 6 . . . ● ○ ○ . . ● 
 7 . ● . . ● ● . . . 
 8 ○ ○ . . . . . . ○ 
time: 124.183


# Gather self-play games

In [None]:
## ==========
BATCH_SIZE = 100
START = 0
END = 10
PREFIX = 'day0'
## ==========
## save games frequently (in batches of 100) to prevent data loss
## files are saved as 'PREFIX_i.tfrecords' for i = START, START+1, ..., END-1

for i in range(START, END):
    filename = PREFIX + '_' + str(i) + '.tfrecords'
    with player_zero.GameRecorder(filename, SIZE) as recorder:
        print("batch {0:2d}: ".format(i), end='')
        for j in range(BATCH_SIZE):
            print(j, end=',')
            game = gomoku.Gomoku(SIZE)
            p1 = player_zero.ZeroPlayer('p1', +1, game, model, recorder)
            p2 = player_zero.ZeroPlayer('p2', -1, game, model, recorder)

            pa, pb = p1, p2
            while not game.finished:
                game.play(*pa.play(game, N_ITER))
                pa, pb = pb, pa
        print()

Examine games collected

In [None]:
recorder = player_zero.GameRecorder('day0.tfrecords', SIZE)
data = recorder.fetch()

In [None]:
len(list(data))

In [None]:
board = np.array([x[0] for x in data])
policy = np.array([x[1]['policy'] for x in data])
value = np.array([x[1]['value'] for x in data])

In [None]:
i = 0
print(board[i][..., 0])
print(policy[i])
print(value[i])

# Merge data and augment

In [None]:
## merge into one dataset
datasets = []

for i in range(10):
    filename = 'day0_' + str(i) + '.tfrecords'
    recorder = player_zero.GameRecorder(filename, SIZE)
    datasets.append(recorder.fetch())

In [None]:
with player_zero.GameRecorder('day0.tfrecords', SIZE) as recorder:
    for ds in datasets:
        for x in ds:
            board = x[0].numpy()
            policy = x[1]['policy'].numpy()
            value = x[1]['value']
            recorder.write(board, policy, value)
            ## add rotated and flipped
            policy_square = policy.reshape(SIZE, SIZE)
            recorder.write(np.rot90(board, k=1), np.rot90(policy_square, k=1).flatten(), value)
            recorder.write(np.rot90(board, k=2), np.rot90(policy_square, k=2).flatten(), value)
            recorder.write(np.rot90(board, k=3), np.rot90(policy_square, k=3).flatten(), value)
            recorder.write(np.flipud(board), np.flipud(policy_square).flatten(), value)
            recorder.write(np.rot90(np.flipud(board), k=1), np.rot90(np.flipud(policy_square), k=1).flatten(), value)
            recorder.write(np.rot90(np.flipud(board), k=2), np.rot90(np.flipud(policy_square), k=2).flatten(), value)
            recorder.write(np.rot90(np.flipud(board), k=3), np.rot90(np.flipud(policy_square), k=3).flatten(), value)

# Train NN

In [None]:
def cross_entropy(y_true, y_pred):
    return - tf.reduce_mean(y_true * tf.math.log(y_pred + 1e-10), axis=-1)

opt = keras.optimizers.Adam(learning_rate=1e-3)
#policy_loss = keras.losses.BinaryCrossentropy()
policy_loss = cross_entropy
value_loss = keras.losses.MeanSquaredError()

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [None]:
recorder = player_zero.GameRecorder('day0.tfrecords', SIZE)
data = recorder.fetch()

In [None]:
train = data.shuffle(10000).batch(10)

In [None]:
history = model.fit(train, epochs=30)

In [None]:
model.save_weights('day1.h5')