In [None]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/UCLA/ECE239/Gomoku-project/')

In [None]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku, player_basic, player_zero4 as player_zero
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

In [None]:
## Gobal parameters ==========
SIZE = 9
WIN = 5
N_ITER = 500
## ===========================

In [None]:
model = player_zero.net(SIZE)
model.load_weights('day0.h5')

In [None]:
model.summary()

In [None]:
#model.save_weights('day0.h5')

# Playtest

**Human vs cp**

In [None]:
game = gomoku.Gomoku(SIZE, WIN)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)
## remove all exploration
p2.tree.epsilon=0
p2.tree.temp=1e-3

In [None]:
## play your move ==========
game.play(3, 4)
## =========================

t = time()
if not game.finished:
    game.play(*p2.play(game, N_ITER))
game.show()
print("time: {0:.3f}".format(time() - t))

In [None]:
np.argsort(p2.tree.prev_head.N)

**Create a fictional game**

In [None]:
game = gomoku.Gomoku(SIZE, WIN)

game.play(2, 4)
game.play(3, 4)

game.play(2, 3)
game.play(3, 3)

game.play(2, 2)
game.play(3, 2)

game.play(2, 1)

p2 = player_zero.ZeroPlayer('p2', +1, game, model, player_zero.PrintRecorder())

game.play(3, 1)

game.show()

In [None]:
p2.play(game, N_ITER)

In [None]:
p2.tree.prev_head.V

**Cp vs cp**

In [None]:
## playtest against self
game = gomoku.Gomoku(SIZE, WIN)
p1 = player_zero.ZeroPlayer('p1', +1, game, model)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)
pa, pb = p1, p2

In [None]:
t = time()
#if not game.finished:
while not game.finished:
    game.play(*pa.play(game, N_ITER))
    pa, pb = pb, pa
game.show()
print("time: {0:.3f}".format(time() - t))

# Gather self-play games

In [None]:
## ==========
BATCH_SIZE = 100
START = 0
END = 1
PREFIX = 'day0'
## ==========
## save games frequently to prevent data loss
## files are saved as 'PREFIX_i.tfrecords' for i = START, START+1, ..., END-1

for i in range(START, END):
    filename = PREFIX + '_' + str(i) + '.tfrecords'
    with player_zero.GameRecorder(filename, SIZE) as recorder:
        print("batch {0:2d}: ".format(i), end='')
        for j in range(BATCH_SIZE):
            print(j, end=',')
            game = gomoku.Gomoku(SIZE, WIN)
            p1 = player_zero.ZeroPlayer('p1', +1, game, model, recorder)
            p2 = player_zero.ZeroPlayer('p2', -1, game, model, recorder)

            pa, pb = p1, p2
            while not game.finished:
                game.play(*pa.play(game, N_ITER))
                pa, pb = pb, pa
        print()

Examine games collected

In [None]:
recorder = player_zero.GameRecorder('day0.tfrecords', SIZE)
data = recorder.fetch()

In [None]:
len(list(data))

In [None]:
board = np.array([x[0] for x in data])
policy = np.array([x[1]['policy'] for x in data])
value = np.array([x[1]['value'] for x in data])

In [None]:
i = 0
print(board[i][..., 0])
print(board[i][..., 1])
print(board[i][..., 2])
print(board[i][..., 3])
print(policy[i])
print(value[i])

# Merge data and augment

In [None]:
## merge into one dataset==============
innames = [
    'day0_0.tfrecords',
    'day0_1.tfrecords',
    'day0_2.tfrecords',
    'day0_3.tfrecords',
    'day0_4.tfrecords',
    'day0_5.tfrecords',
    'day0_6.tfrecords',
    'day0_7.tfrecords',
    'day0_8.tfrecords',
    'day0_9.tfrecords',
    'day0_10.tfrecords',
    'day0_11.tfrecords',
    'day0_12.tfrecords',
    'day0_13.tfrecords',
    'day0_14.tfrecords',
]
outname = 'day0.tfrecords'
## ====================================

datasets = []

for inname in innames:
    recorder = player_zero.GameRecorder(inname, SIZE)
    datasets.append(recorder.fetch())

with player_zero.GameRecorder(outname, SIZE) as recorder:
    for ds in datasets:
        for x in ds:
            board = x[0].numpy()
            policy = x[1]['policy'].numpy()
            value = x[1]['value']
            recorder.write(board, policy, value)

In [None]:
## augment dataset=====================
innames = [
    'day0.tfrecords'
]
outname = 'X.tfrecords'
## ====================================

datasets = []

for inname in innames:
    recorder = player_zero.GameRecorder(inname, SIZE)
    datasets.append(recorder.fetch())

with player_zero.GameRecorder(outname, SIZE) as recorder:
    for ds in datasets:
        for x in ds:
            board = x[0].numpy()
            policy = x[1]['policy'].numpy().reshape(SIZE, SIZE) # make into square
            value = x[1]['value']
            ## rotate
            recorder.write(board, policy.flatten(), value)
            recorder.write(np.rot90(board, k=1), np.rot90(policy, k=1).flatten(), value)
            recorder.write(np.rot90(board, k=2), np.rot90(policy, k=2).flatten(), value)
            recorder.write(np.rot90(board, k=3), np.rot90(policy, k=3).flatten(), value)
            ## flip
            board = np.flipud(board)
            policy = np.flipud(policy)
            recorder.write(board, policy.flatten(), value)
            recorder.write(np.rot90(board, k=1), np.rot90(policy, k=1).flatten(), value)
            recorder.write(np.rot90(board, k=2), np.rot90(policy, k=2).flatten(), value)
            recorder.write(np.rot90(board, k=3), np.rot90(policy, k=3).flatten(), value)

# Train NN

In [None]:
## check running GPU
tf.test.gpu_device_name()

In [None]:
model = player_zero.net(SIZE)
model.load_weights('day0.h5')

In [None]:
def cross_entropy(y_true, y_pred):
    return - tf.reduce_mean(y_true * tf.math.log(y_pred + 1e-10), axis=-1)

opt = keras.optimizers.Adam(learning_rate=1e-3)
policy_loss = cross_entropy
value_loss = keras.losses.MeanSquaredError()

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [None]:
recorder = player_zero.GameRecorder('X.tfrecords', SIZE)
data = recorder.fetch()

In [None]:
len(list(data))

In [None]:
train = data.shuffle(10000).batch(20)

In [None]:
history = model.fit(train, epochs=20)

In [None]:
model.save_weights('day1.h5')

# Evaluate

In [None]:
model1 = player_zero.net(SIZE)
model1.load_weights('day5.h5')
model2 = player_zero.net(SIZE)
model2.load_weights('day4.h5')

In [None]:
wins = 0
losses = 0
for i in range(10):
    game = gomoku.Gomoku(SIZE, WIN)
    p1 = player_zero.ZeroPlayer('p1', +1, game, model2)
    p1.tree.epsilon=0
    p1.tree.temp=.5
    p2 = player_zero.ZeroPlayer('p2', -1, game, model1)
    p2.tree.epsilon=0
    p2.tree.temp=.5
    pa, pb = p1, p2

    while not game.finished:
        game.play(*pa.play(game, N_ITER))
        pa, pb = pb, pa
    if game.winner == +1:
        wins += 1
    elif game.winner == -1:
        losses += 1
    
    game.show()