In [32]:
## only run if using google Colab
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/gomoku/')

Mounted at /content/drive


In [69]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import gomoku4, player_basic, player_zero
from time import time

# for auto-reloading external modules
%load_ext autoreload
%autoreload 2

# Need this to update learning rate over time
from keras import backend as K


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
## check running GPU
tf.test.gpu_device_name()

''

# Self-play games

In [91]:
model = player_zero.net(6)
#model.summary()

opt = keras.optimizers.Adam(learning_rate=1e-3)
policy_loss = keras.losses.BinaryCrossentropy(name='cross_entropy')
value_loss = keras.losses.MeanSquaredError(name='mse')

model.compile(optimizer=opt, 
              loss={'policy': policy_loss,
                    'value': value_loss})

In [92]:
## Self play 1000 games, split into 5 rounds
## Each round is 200 games and then train NN using those data
## Self play game of round 3 is stored in 'round3.tfrecords'
## Weights trained using games of round 3 are stored in 'round4weights.h5'
## The final weights are stored in 'round5weights.h5'

for i in range (5):
    recorder = player_zero.GameRecorder('round'+str(i)+'.tfrecords', size=6)
    recorder.open()
    print ('Round '+str(i))
    print ('Self play games', end=': ')
    for j in range(200):
        print(j, end=',')
        game = gomoku4.Gomoku4(6)
        p1 = player_zero.ZeroPlayer('p1', +1, game, model, recorder)
        p2 = player_zero.ZeroPlayer('p2', -1, game, model, recorder)
        pa, pb = p1, p2
        while not game.finished:
            game.play(*pa.play(game))
            pa, pb = pb, pa
    print()
    print('This is one of the game.')
    game.show()
    recorder.close()

    print('Augmenting data')   
    ##### augment data
    dataset = recorder.fetch()
    recorder = player_zero.GameRecorder('round'+str(i)+'aug.tfrecords', size=6)
    recorder.open()
    for x in dataset:
        board = x[0].numpy()
        policy = x[1]['policy'].numpy()
        value = x[1]['value']
        policy_square = policy.reshape(6,6)
        recorder.write(np.rot90(board, k=1), np.rot90(policy_square, k=1).flatten(), value)
        recorder.write(np.rot90(board, k=2), np.rot90(policy_square, k=2).flatten(), value)
        recorder.write(np.rot90(board, k=3), np.rot90(policy_square, k=3).flatten(), value)
        recorder.write(np.flipud(board), np.flipud(policy_square).flatten(), value)
        recorder.write(np.rot90(np.flipud(board), k=1), np.rot90(np.flipud(policy_square), k=1).flatten(), value)
        recorder.write(np.rot90(np.flipud(board), k=2), np.rot90(np.flipud(policy_square), k=2).flatten(), value)
        recorder.write(np.rot90(np.flipud(board), k=3), np.rot90(np.flipud(policy_square), k=3).flatten(), value)
    recorder.close()

    print('training NN') 
    augdataset = recorder.fetch()   
    train = augdataset.shuffle(1000).batch(10)
    model.fit(train,epochs=15,verbose=2)
    ## Save the weight
    model.save_weights('round'+str(i+1)+'weights.h5')
    ## Decrease learning rate
    lr = K.eval(model.optimizer.lr)
    K.set_value(model.optimizer.learning_rate, lr*0.98)

Round 0
Self play games: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,
This is one of the game.
white played (4, 5).
black played (3, 0).
game has ended. winner: black
   0 1 2 3 4 5
 0 . . ● ● ● ● 
 1 ○ . ○ . . . 
 2 ● . ● . ● . 
 3 . ● ● ● ○ ○ 
 4 ○ . . . . . 
 5 ○ . ○ . ○ ○ 
Augmenting data
training NN
Epoch 1/15
1999/1999 - 20s - loss: 0.1994 - poli

# Play against it as Human

Can't beat it if AI goes first

In [112]:
game = gomoku4.Gomoku4(6)
p1 = player_zero.ZeroPlayer('p1', 1, game, model)
game.play(*p1.play(game))
game.show()

black played (2, 3).
white's turn.
   0 1 2 3 4 5
 0 . . . . . . 
 1 . . . . . . 
 2 . . . . . . 
 3 . . ● . . . 
 4 . . . . . . 
 5 . . . . . . 


In [117]:
## Now human play here
game.play(1,1)

if not game.finished:
    game.play(*p1.play(game))
game.show()

white played (1, 1).
black played (1, 5).
game has ended. winner: black
   0 1 2 3 4 5
 0 . . . . . . 
 1 . ○ . . . . 
 2 . ● . ○ . . 
 3 ○ ● ● ● ○ . 
 4 . ● . ○ . . 
 5 . ● . . . . 


In [120]:
##Here are the sequnce of moves
game.episode

[(2, 3),
 (3, 2),
 (1, 2),
 (3, 4),
 (3, 3),
 (4, 3),
 (1, 3),
 (0, 3),
 (1, 4),
 (1, 1),
 (1, 5)]

If Human goes first, human can always win. See if AI can catch a mistake

In [124]:
game = gomoku4.Gomoku4(6)
p2 = player_zero.ZeroPlayer('p2', -1, game, model)

In [136]:
game.play(4,2)
if not game.finished:
    game.play(*p2.play(game))
game.show()

white played (2, 5).
black played (4, 2).
game has ended. winner: black
   0 1 2 3 4 5
 0 ○ . . ● . . 
 1 ○ ● ○ ○ ○ ● 
 2 ● ○ ● ● ● ● 
 3 ○ ● ○ ○ ● . 
 4 . ● ○ ● . . 
 5 . . ○ . . . 


In [137]:
###  Here is the game history.
###  For the most part it played reasonable moves
###  But it missed some obvious moves that could win. Maybe it's not so strong as white. 
game.episode

[(3, 2),
 (3, 3),
 (1, 4),
 (2, 3),
 (4, 3),
 (2, 1),
 (2, 2),
 (0, 3),
 (1, 3),
 (1, 2),
 (3, 0),
 (0, 1),
 (3, 4),
 (3, 1),
 (1, 1),
 (4, 1),
 (5, 1),
 (0, 0),
 (0, 2),
 (2, 4),
 (5, 2),
 (2, 5),
 (4, 2)]

# Compare this AI against the AI that played 1000 games and then trained

In [142]:
model2=player_zero.net(6)
model2.load_weights('day1.h5')

Since the current AI always win if it goes first, let it go second

In [143]:
wincount = 0
for i in range(20):
    game = gomoku4.Gomoku4(6)
    p1 = player_zero.ZeroPlayer('p1', +1, game, model2)
    p2 = player_zero.ZeroPlayer('p2', -1, game, model)
    pa, pb = p1, p2
    while not game.finished:
        game.play(*pa.play(game))
        pa, pb = pb, pa
    game.show()
    if game.winner == -1:
        wincount += 1

black played (3, 4).
white played (5, 5).
game has ended. winner: white
   0 1 2 3 4 5
 0 . . . . . . 
 1 . . . . . . 
 2 . . ○ . . . 
 3 . ● ● ○ . . 
 4 . . . ● ○ . 
 5 . . ● . . ○ 
black played (0, 2).
white played (5, 0).
game has ended. winner: white
   0 1 2 3 4 5
 0 . . ● ● . ○ 
 1 . . . . ○ . 
 2 ● . . ○ . ● 
 3 . . ○ . . . 
 4 . . . . ○ . 
 5 . ● . . . . 
black played (0, 3).
white played (4, 0).
game has ended. winner: white
   0 1 2 3 4 5
 0 ○ . ● . ○ . 
 1 ● ● . ○ . . 
 2 . ● ○ ● . ● 
 3 ● ○ ○ ○ . . 
 4 . . ● ○ ○ ○ 
 5 ● ● ● ○ ○ ● 
black played (4, 4).
white played (2, 5).
game has ended. winner: white
   0 1 2 3 4 5
 0 . ● . . . . 
 1 . . . . ● . 
 2 ○ . ○ . ● . 
 3 . . ○ . . . 
 4 . . ○ ● ● . 
 5 . . ○ . . . 
black played (4, 2).
white played (1, 3).
game has ended. winner: white
   0 1 2 3 4 5
 0 . . ● . . . 
 1 . . . . . . 
 2 . . . . ● . 
 3 . ○ ○ ○ ○ . 
 4 . . ● . . . 
 5 . ● . . . . 
black played (1, 3).
white played (1, 2).
game has ended. winner: white
   0 1 2 3 4 

In [145]:
print('won '+str(wincount)+' out of 20 games')

won 19 out of 20 games
