# s{t} = [X{t}, Y{t}, X{t−1}, Y{t−1},..., X{t−7}, Y{t−7}, C].
The input features s{t} are processed by a residual tower that consists of a single convolutional block 
followed by either 19 or 39 residual blocks.

The convolutional block applies the following modules:
(1) A convolution of 256 filters of kernel size 3 × 3 with stride 1
(2) Batch normalization
(3) A rectifier nonlinearity

Each residual block applies the following modules sequentially to its input: 
(1) A convolution of 256 filters of kernel size 3 × 3 with stride 1
(2) Batch normalization
(3) A rectifier nonlinearity
(4) A convolution of 256 filters of kernel size 3 × 3 with stride 1
(5) Batch normalization
(6) A skip connection that adds the input to the block
(7) A rectifier nonlinearity

The output of the residual tower is passed into two separate ‘heads’ for computing the policy and value. 

The policy head applies the following modules: 
(1) A convolution of 2 filters of kernel size 1 × 1 with stride 1
(2) Batch normalization
(3) A rectifier nonlinearity
(4) A fully connected linear layer that outputs a vector of size 19*19 + 1 = 362, corresponding to logit probabilities for all intersections and the pass move

The value head applies the following modules:
(1) A convolution of 1 filter of kernel size 1 × 1 with stride 1 
(2) Batch normalization
(3) A rectifier nonlinearity
(4) A fully connected linear layer to a hidden layer of size 256
(5) A rectifier nonlinearity
(6) A fully connected linear layer to a scalar
(7) A tanh nonlinearity outputting a scalar in the range [−1, 1]

The overall network depth, in the 20­ or 40­ block network, is 39 or 79 parameterized layers, respectively, 
for the residual tower, plus an additional 2 layers for the policy head and 3 layers for the value head.
"""


In [105]:
import keras
from keras.layers import Activation, BatchNormalization
from keras.layers import Conv2D, Dense, Flatten, Input
from keras.models import Model
import numpy as np



class trojanGoZero:
    def __init__(self, num_resnet_block=19):
        #self.board_input = Input(shape=TrojanGoPlane.shape(), name='board_input')
        self.board_input = Input(shape=(7, 5, 5), name='board_input')
        self.num_resnet_block = num_resnet_block
        self.num_filters = 256

    
    def resNetBlock(self, x,filters,pool=False):
        res = x
        
        if pool:
            x = MaxPooling2D(pool_size=(2, 2))(x)
            res = Conv2D(filters=filters,kernel_size=[1,1],strides=(2,2),padding="same", data_format='channels_first')(res)
        #out = BatchNormalization()(x)
        #out = Activation("relu")(out)
            
        out = Conv2D(filters=filters, kernel_size=[3, 3], strides=[1, 1], padding="same", data_format='channels_first')(x)
        out = BatchNormalization()(out)
        out = Activation("relu")(out)
        
        out = Conv2D(filters=filters, kernel_size=[3, 3], strides=[1, 1], padding="same", data_format='channels_first')(out)
        out = BatchNormalization()(out)
        
        out = keras.layers.add([res,out])
        out = Activation("relu")(out)

        return out


        
    def nn_model(self, input_shape):
        #Input feature of 17*19*19 or 7*5*5 as board_input or board_images
        
        board_images = Input(input_shape)
        #board_images = self.board_input

        #CNN-1 with Batch Normalization and rectifier nonlinearity.
        cnn1 = Conv2D(filters=256, kernel_size=[3, 3], strides=[1, 1], padding="same", data_format='channels_first')(board_images)
        cnn1_batch = BatchNormalization()(cnn1)
        cnn1_act = Activation("relu")(cnn1_batch)

        self_in = cnn1_act
        
        #Now build 19 or 39 ResNet block networks depends on "num_resnet_block" variable.
        for i in range(self.num_resnet_block):
            self_out = self.resNetBlock(self_in, self.num_filters)
            self_in = self_out

        out = self_out
        
        policy_conv = \
            Conv2D(2, (1, 1),                          # <2>
                data_format='channels_first',          # <2>
                activation='relu')(out)                # <2>
        policy_flat = Flatten()(policy_conv)           # <2>
        policy_output = \
            Dense(25,
                  activation='softmax')(policy_flat)   # <2>




        value_conv = \
            Conv2D(1, (1, 1),                                    # <3>
                data_format='channels_first',                    # <3>
                activation='relu')(out)                          # <3>
        value_flat = Flatten()(value_conv)                       # <3>
        value_hidden = Dense(256, activation='relu')(value_flat) # <3>
        value_output = Dense(1, activation='tanh')(value_hidden) # <3>

        model = Model(
            inputs=[board_images],
            outputs=[policy_output, value_output])

        return model


In [106]:
net = trojanGoZero()
input_shape = (7,5,5)
model = net.nn_model(input_shape)
print(model.summary())
    

Model: "model_12"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 7, 5, 5)      0                                            
__________________________________________________________________________________________________
conv2d_455 (Conv2D)             (None, 256, 5, 5)    16384       input_4[0][0]                    
__________________________________________________________________________________________________
batch_normalization_433 (BatchN (None, 256, 5, 5)    20          conv2d_455[0][0]                 
__________________________________________________________________________________________________
activation_432 (Activation)     (None, 256, 5, 5)    0           batch_normalization_433[0][0]    
___________________________________________________________________________________________

In [147]:

model_input = []

for _ in range(10000):
    #board_tensor = np.random((7, 5, 5))
    board_tensor = np.random.randint(0, 3, size=(7, 5, 5))
    model_input.append(board_tensor)


model_input = np.array(model_input) 
print(model_input)  

action_target = []
for _ in range (10000):
    search_prob = np.random.randn(5,5)
    search_prob_flat = search_prob.reshape(25,)
    action_target.append(search_prob_flat)
    
action_target = np.array(action_target)    
print(action_target)

#value_target = [0, 1, 0.5, 0.9, -1, -0.6, 1, 1, 1, -1]
value_target = np.random.rand(10000)
value_target = np.array(value_target) 
print(value_target)

"""
model_input = []

inpu = np.ones((7, 5, 5))
model_input.append(inpu)

inpu = np.ones((7, 5, 5))
model_input.append(inpu)

action_target = np.random.randn(5,5)
value_target = [0.5]
"""

[[[[2 2 2 1 0]
   [1 2 2 2 2]
   [2 0 2 2 2]
   [1 2 0 0 0]
   [2 2 2 0 1]]

  [[1 2 0 0 1]
   [1 0 2 0 2]
   [1 2 1 1 2]
   [0 1 0 2 1]
   [0 2 2 2 2]]

  [[1 2 1 0 1]
   [1 0 0 0 1]
   [2 1 2 2 2]
   [1 0 0 2 2]
   [1 0 1 0 1]]

  ...

  [[1 0 0 2 2]
   [0 1 2 1 0]
   [2 0 2 0 1]
   [2 2 2 0 1]
   [1 0 2 1 2]]

  [[1 0 1 2 0]
   [0 0 1 1 0]
   [2 1 2 0 0]
   [2 0 2 1 1]
   [2 0 2 0 2]]

  [[0 2 2 2 0]
   [1 0 0 0 0]
   [1 0 0 2 1]
   [0 2 2 2 0]
   [1 0 1 2 0]]]


 [[[2 2 1 2 1]
   [0 1 0 0 2]
   [2 1 0 2 2]
   [2 0 2 1 0]
   [0 2 0 2 0]]

  [[1 2 2 2 1]
   [2 2 2 1 0]
   [1 0 2 0 1]
   [1 0 0 2 2]
   [1 0 1 2 2]]

  [[0 1 0 0 1]
   [0 0 1 2 2]
   [1 2 0 0 1]
   [1 1 2 2 0]
   [1 0 2 1 2]]

  ...

  [[0 1 1 0 2]
   [1 1 2 1 1]
   [2 0 0 0 1]
   [2 2 2 1 0]
   [0 1 2 0 2]]

  [[0 2 0 0 1]
   [0 2 0 0 2]
   [1 0 2 0 2]
   [2 2 0 1 1]
   [1 0 2 0 2]]

  [[2 1 2 0 1]
   [1 1 2 2 1]
   [2 2 0 1 0]
   [2 0 0 0 2]
   [1 0 1 1 0]]]


 [[[0 2 1 2 0]
   [2 1 1 0 0]
   [2 1 2 0 1]
   [2 2 1 0 0

'\nmodel_input = []\n\ninpu = np.ones((7, 5, 5))\nmodel_input.append(inpu)\n\ninpu = np.ones((7, 5, 5))\nmodel_input.append(inpu)\n\naction_target = np.random.randn(5,5)\nvalue_target = [0.5]\n'

In [148]:
from keras.optimizers import SGD
model.compile(SGD(lr=0.01), loss=['categorical_crossentropy', 'mse'])

In [150]:
import time
start = time.time()
model.fit(model_input, [action_target, value_target], batch_size=64, epochs=1)
finish = time.time()
print("Time taken : ", finish - start)

Epoch 1/1
Time taken :  714.2429230213165


In [151]:
X = model_input[0]
X = np.expand_dims(X, axis=0)
print(X.shape)
prediction = model.predict(X)
print(prediction)


(1, 7, 5, 5)
[array([[0.03640846, 0.04634101, 0.04317485, 0.03242254, 0.04850648,
        0.03450151, 0.03997014, 0.03587916, 0.04387308, 0.04597052,
        0.03960366, 0.05057499, 0.03758756, 0.04722814, 0.03776512,
        0.03552547, 0.0439564 , 0.02352914, 0.03258144, 0.03444119,
        0.03427581, 0.0455748 , 0.04680213, 0.04054449, 0.04296193]],
      dtype=float32), array([[1.]], dtype=float32)]


In [156]:
index = np.argmax(prediction[0])
rows = int(index/5)
cols = index%5
print("Move : ", (rows, cols))
print("Win chance :", prediction[1])

Move :  (2, 1)
Win chance : [[1.]]
