In [1]:
import numpy as np
from keras.engine.topology import Input
from keras.engine.training import Model
from keras.layers.convolutional import Conv2D
from keras import optimizers
from keras.layers.core import Activation, Dense, Flatten
from keras.layers.merge import Add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [12]:
hex_data = np.load('hex_data.npz')

In [45]:
states = hex_data['states']
turns = hex_data['turns']
visits = hex_data['visits']
moves = hex_data['moves']
values = hex_data['values']


In [46]:
for i in range(states.shape[0]):
    if turns[i] == -1:
        states[i] = states[i].T
        moves[i] = np.array([[moves[i][1], moves[i][0]]])
        visits[i] = visits[i].T
        values[i] = 1 - values[i].T

In [47]:
def calculate_probabilities(visits):
    normalize_sums = visits.sum(axis=1).sum(axis=1)
    reshaped = visits.reshape((visits.shape[0], visits.shape[1]*visits.shape[2]))

    normalized = reshaped/normalize_sums[:,None]

    probabilities = normalized.reshape((visits.shape[0], visits.shape[1]*visits.shape[2]))

    return probabilities

def calculate_values(moves, values):
    y_values = np.array([value[move[0]][move[1]] for move, value in zip(moves, values)])
    return y_values


In [48]:
values

array([[[0.9381295 , 0.93598862, 0.93862816, ..., 1.        ,
         0.94473488, 0.92607527],
        [0.93972404, 0.93862816, 0.93972404, ..., 1.        ,
         1.        , 0.9308915 ],
        [0.91909385, 0.94362018, 0.92994505, ..., 1.        ,
         1.        , 0.93598862],
        ...,
        [0.92328042, 0.92702703, 0.91253102, ..., 1.        ,
         1.        , 0.9381295 ],
        [0.92374005, 0.9070186 , 0.91457286, ..., 1.        ,
         0.91666667, 0.92847411],
        [0.91498741, 0.91838046, 0.90552585, ..., 0.92847411,
         0.93291405, 0.92896175]],

       [[0.88974719, 0.8994004 , 0.89013296, ..., 0.90131579,
         0.8994004 , 0.88896747],
        [0.8914405 , 0.89966777, 0.89980093, ..., 0.9009901 ,
         0.89853138, 0.90255069],
        [0.91376147, 0.91245376, 0.92439863, ..., 0.        ,
         0.89437586, 0.86939418],
        ...,
        [0.        , 0.        , 0.        , ..., 0.88205128,
         0.87964339, 0.88738418],
        [0.9

In [49]:
states = states.reshape(states.shape[0], 1, 8, 8)

train_X = states[:4*states.shape[0] // 5]
test_X = states[4*states.shape[0] // 5:]

probabilities = calculate_probabilities(visits)
y_values = calculate_values(moves, values)

training_probs = probabilities[:4*probabilities.shape[0] // 5]
training_values = y_values[:4*y_values.shape[0] // 5]
testing_probs = probabilities[4*y_values.shape[0] // 5:]
testing_values = y_values[4*y_values.shape[0] // 5:]

In [50]:
print((hex_data['visits'][0]/hex_data['visits'][0].sum())

SyntaxError: unexpected EOF while parsing (<ipython-input-50-31edd51b8c4a>, line 1)

In [52]:
np.zeros(64).reshape(8,8)

array([[0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.]])

In [7]:
in_x = x = Input((1, 8, 8))
cnn_filter_num = 128
cnn_first_filter_size = 2
cnn_filter_size = 2
l2_reg = 0.0001
res_layer_num = 20
n_labels = 64
value_fc_size = 64

In [8]:
x = Conv2D(filters=cnn_filter_num,   kernel_size=cnn_first_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg), name="input_conv-"+str(cnn_first_filter_size)+"-"+str(cnn_filter_num))(x)
x = BatchNormalization(axis=1, name="input_batchnorm")(x)
x = Activation("relu", name="input_relu")(x)

In [9]:
def _build_residual_block(x, index):
    in_x = x
    res_name = "res"+str(index)
    x = Conv2D(filters=cnn_filter_num, kernel_size=cnn_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg), name=res_name+"_conv1-"+str(cnn_filter_size)+"-"+str(cnn_filter_num))(x)
    x = BatchNormalization(axis=1, name=res_name+"_batchnorm1")(x)
    x = Activation("relu",name=res_name+"_relu1")(x)
    x = Conv2D(filters=cnn_filter_num, kernel_size=cnn_filter_size, padding="same", data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg), name=res_name+"_conv2-"+str(cnn_filter_size)+"-"+str(cnn_filter_num))(x)
    x = BatchNormalization(axis=1, name="res"+str(index)+"_batchnorm2")(x)
    x = Add(name=res_name+"_add")([in_x, x])
    x = Activation("relu", name=res_name+"_relu2")(x)
    return x

In [10]:
for i in range(res_layer_num):
    x = _build_residual_block(x, i + 1)
        

In [11]:
res_out = x


In [12]:
# for policy output
x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg), name="policy_conv-1-2")(res_out)

x = BatchNormalization(axis=1, name="policy_batchnorm")(x)
x = Activation("relu", name="policy_relu")(x)
x = Flatten(name="policy_flatten")(x)

# no output for 'pass'
policy_out = Dense(n_labels, kernel_regularizer=l2(l2_reg), activation="softmax", name="policy_out")(x)

# for value output
x = Conv2D(filters=4, kernel_size=1, data_format="channels_first", use_bias=False, kernel_regularizer=l2(l2_reg), name="value_conv-1-4")(res_out)

x = BatchNormalization(axis=1, name="value_batchnorm")(x)
x = Activation("relu",name="value_relu")(x)
x = Flatten(name="value_flatten")(x)
x = Dense(value_fc_size, kernel_regularizer=l2(l2_reg), activation="relu", name="value_dense")(x)

value_out = Dense(1, kernel_regularizer=l2(l2_reg), activation="tanh", name="value_out")(x)

model = Model(in_x, [policy_out, value_out], name="hex_model")

In [13]:
learning_rate = 0.1 # schedule dependent on thousands of steps, every 200 thousand steps, decrease by factor of 10

sgd = optimizers.SGD(lr=learning_rate, momentum=0.9)

losses = ['categorical_crossentropy', 'mean_squared_error']

model.compile(loss=losses, optimizer=sgd)

model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1, 8, 8)      0                                            
__________________________________________________________________________________________________
input_conv-2-128 (Conv2D)       (None, 128, 8, 8)    512         input_1[0][0]                    
__________________________________________________________________________________________________
input_batchnorm (BatchNormaliza (None, 128, 8, 8)    512         input_conv-2-128[0][0]           
__________________________________________________________________________________________________
input_relu (Activation)         (None, 128, 8, 8)    0           input_batchnorm[0][0]            
__________________________________________________________________________________________________
res1_conv1

In [14]:
states = states.reshape(states.shape[0], 1, 8, 8)

In [15]:
train_X = states[:4*states.shape[0] // 5]
test_X = states[4*states.shape[0] // 5:]

In [16]:
normalize_sums = visits.sum(axis=1).sum(axis=1)

In [17]:
reshaped = visits.reshape((visits.shape[0], visits.shape[1]*visits.shape[2]))

In [18]:
normalized = reshaped/normalize_sums[:,None]

In [19]:
probabilities = normalized.reshape((visits.shape[0], visits.shape[1]*visits.shape[2]))

In [20]:
y_values = np.array([value[move[0]][move[1]] for move, value in zip(moves, values)])

In [21]:
probabilities.shape

(47537, 64)

In [22]:
y_values.shape

(47537,)

In [23]:
train_X.shape

(38029, 1, 8, 8)

In [24]:
Y_data = [probabilities, y_values]

In [25]:
training_probs = probabilities[:4*probabilities.shape[0] // 5]
training_values = y_values[:4*y_values.shape[0] // 5]
testing_probs = probabilities[4*y_values.shape[0] // 5:]
testing_values = y_values[4*y_values.shape[0] // 5:]

In [26]:
train_Y = {'policy_out':training_probs, 'value_out':training_values}
test_Y = {'policy_out':testing_probs, 'value_out':testing_values}

In [None]:
history = model.fit(train_X, train_Y, verbose = 1, validation_data=(test_X,test_Y), epochs = 1)

Train on 38029 samples, validate on 9508 samples
Epoch 1/1
 3264/38029 [=>............................] - ETA: 30:18 - loss: 5.0368 - policy_out_loss: 4.1582 - value_out_loss: 0.2090