In [None]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/UCLA/ECE239/Gomoku-project/')

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku, player_basic, player_zero, player_zero4
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [None]:
## Gobal parameters ==========
SIZE = 9
WIN = 5
N_ITER = 500
## ===========================

# Gather self-play games

In [None]:
model = player_zero4.net(SIZE)
model.load_weights('4feat 9x9/day0.h5')

In [None]:
## ==========
BATCH_SIZE = 50
START = 0
END = 15
PREFIX = 'day0'
## ==========
## save games frequently to prevent data loss
## files are saved as 'PREFIX_i.tfrecords' for i = START, START+1, ..., END-1

for i in range(START, END):
    filename = PREFIX + '_' + str(i) + '.tfrecords'
    with player_zero.GameRecorder(filename, SIZE) as recorder:
        print("batch {0:2d}: ".format(i), end='')
        for j in range(BATCH_SIZE):
            print(j, end=',')
            game = gomoku.Gomoku(SIZE, WIN)
            p1 = player_zero.ZeroPlayer('p1', +1, game, model, N_ITER, recorder)
            p2 = player_zero.ZeroPlayer('p2', -1, game, model, N_ITER, recorder)

            pa, pb = p1, p2
            while not game.finished:
                game.play(*pa.play(game))
                pa, pb = pb, pa
        print()

**Examine games collected**

In [None]:
recorder = player_zero.GameRecorder('day0_0.tfrecords', SIZE)
data = recorder.fetch()

In [None]:
len(list(data))

In [None]:
board = np.array([x[0] for x in data])
policy = np.array([x[1]['policy'] for x in data])
value = np.array([x[1]['value'] for x in data])

In [None]:
i = 0
print(board[i][..., 0])
print(board[i][..., 1])
print(board[i][..., 2])
print(board[i][..., 3])
print(policy[i])
print(value[i])

# Merge data and augment

In [None]:
## merge into one dataset==============
innames = [
    'day0_0.tfrecords',
    'day0_1.tfrecords',
    'day0_2.tfrecords',
    'day0_3.tfrecords',
    'day0_4.tfrecords',
    'day0_5.tfrecords',
    'day0_6.tfrecords',
    'day0_7.tfrecords',
    'day0_8.tfrecords',
    'day0_9.tfrecords',
    'day0_10.tfrecords',
    'day0_11.tfrecords',
    'day0_12.tfrecords',
    'day0_13.tfrecords',
    'day0_14.tfrecords',
]
outname = 'day0.tfrecords'
## ====================================

datasets = []

for inname in innames:
    recorder = player_zero.GameRecorder(inname, SIZE)
    datasets.append(recorder.fetch())

with player_zero.GameRecorder(outname, SIZE) as recorder:
    for ds in datasets:
        for x in ds:
            board = x[0].numpy()
            policy = x[1]['policy'].numpy()
            value = x[1]['value']
            recorder.write(board, policy, value)

In [None]:
## augment dataset=====================
innames = [
    'day0.tfrecords'
]
outname = 'X.tfrecords'
## ====================================

datasets = []

for inname in innames:
    recorder = player_zero.GameRecorder(inname, SIZE)
    datasets.append(recorder.fetch())

with player_zero.GameRecorder(outname, SIZE) as recorder:
    for ds in datasets:
        for x in ds:
            board = x[0].numpy()
            policy = x[1]['policy'].numpy().reshape(SIZE, SIZE) # make into square
            value = x[1]['value']
            ## rotate
            recorder.write(board, policy.flatten(), value)
            recorder.write(np.rot90(board, k=1), np.rot90(policy, k=1).flatten(), value)
            recorder.write(np.rot90(board, k=2), np.rot90(policy, k=2).flatten(), value)
            recorder.write(np.rot90(board, k=3), np.rot90(policy, k=3).flatten(), value)
            ## flip
            board = np.flipud(board)
            policy = np.flipud(policy)
            recorder.write(board, policy.flatten(), value)
            recorder.write(np.rot90(board, k=1), np.rot90(policy, k=1).flatten(), value)
            recorder.write(np.rot90(board, k=2), np.rot90(policy, k=2).flatten(), value)
            recorder.write(np.rot90(board, k=3), np.rot90(policy, k=3).flatten(), value)

# Train NN

In [None]:
## check running GPU
tf.test.gpu_device_name()

In [None]:
model = player_zero4.net(SIZE, l2=1e-12)
model.load_weights('day0.h5')

In [None]:
def cross_entropy(y_true, y_pred):
    return - tf.reduce_mean(y_true * tf.math.log(y_pred + 1e-10), axis=-1)

opt = keras.optimizers.Adam(learning_rate=1e-3)
policy_loss = cross_entropy
value_loss = keras.losses.MeanSquaredError()

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [None]:
recorder = player_zero.GameRecorder('X.tfrecords', SIZE)
data = recorder.fetch()

In [None]:
len(list(data))

In [None]:
train = data.shuffle(10000).batch(20)

In [None]:
model.fit(train, epochs=20)

In [None]:
model.save_weights('day1.h5')