In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/cours/iasd-deep-golois/project/
!pip install pybind11
!chmod 777 ./compile.sh
!./compile.sh


In [None]:
!cp golois.cpython-36m-x86_64-linux-gnu.so golois.so

# Best solution refers to "Mobile Networks for Computer Go" : a MobileNet V2




The batch size is fixed to 32. The annealing schedule is to
train with a learning rate of 0.005 for the first 100 epochs.
Then to train with a learning rate of 0.0005 from 100 to 150
epochs. Then to train with a learning rate of 0.00005 from
150 to 200 epochs. It enables to fine tune the networks when
the learning stalls. This is similar to the AlphaZero annealing
schedule which also divides the learning rate by ten every
200 epochs in the beginning and every 100 epochs in the end

The fifth network uses 33 MobileNet blocks with a trunk
of 64 and 200 filters inside the blocks. It uses the fully
convolutional policy head and the Global Average Pooling value head. 
It has 970 477 parameters. During training it
uses the Binary Crossentropy loss for the value and the
Categorical Crossentropy loss for the policy. The network is
called mobile.small.conv.avg.bin

Save and load : https://www.tensorflow.org/tutorials/keras/save_and_load

In [None]:
!pwd

In [None]:
import os
import math
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from tensorflow.keras import layers 
from tensorflow.keras import regularizers

import golois

planes = 21
moves = 361
N = 200000
epochs = 50
batch=128 
#256

filters = 64
trunk = filters

input_data = np.random.randint(2, size=(N, 19, 19, planes))
input_data = input_data.astype ('float32')

policy = np.random.randint(moves, size=(N,))
policy = keras.utils.to_categorical (policy)

value = np.random.randint(2, size=(N,))
value = value.astype ('float32')

end = np.random.randint(2, size=(N, 19, 19, 2))
end = end.astype ('float32')

groups = np.zeros((N, 19, 19, 1))
groups = groups.astype ('float32')

print ("getValidation", flush = True)
golois.getValidation (input_data, policy, value, end)

def bottleneck_block(x, expand, squeeze=64):
    m = layers.Conv2D(expand, (1,1), kernel_regularizer=regularizers.l2(0.0001), use_bias = False)(x)
    m = layers.BatchNormalization()(m)
    m = layers.Activation('relu')(m)
    m = layers.DepthwiseConv2D((3,3), padding='same', kernel_regularizer=regularizers.l2(0.0001), use_bias = False)(m)
    m = layers.BatchNormalization()(m)
    m = layers.Activation('relu')(m)
    m = layers.Conv2D(squeeze, (1,1), kernel_regularizer=regularizers.l2(0.0001), use_bias = False)(m)
    m = layers.BatchNormalization()(m)
    return layers.Add()([m, x])


input = keras.Input(shape=(19, 19, planes), name='board')

x = layers.Conv2D(trunk, 1, activation='relu', padding='same')(input)
for i in range (33):
    x = bottleneck_block(x, 200, trunk)

# fully convolutional, no dense layer
policy_head = layers.Conv2D(1, 1, activation='relu', padding='same',use_bias = False,kernel_regularizer=regularizers.l2(0.0001))(x)
policy_head = layers.Flatten()(policy_head)
policy_head = layers.Activation('softmax', name='policy')(policy_head)

value_head = layers.GlobalAveragePooling2D()(x)
value_head = layers.Dense(50, activation='relu',kernel_regularizer=regularizers.l2(0.0001))(value_head)
value_head = layers.Dense(1, activation='sigmoid', name='value',kernel_regularizer=regularizers.l2(0.0001))(value_head)

model = keras.Model(inputs=input, outputs=[policy_head, value_head])
model.summary ()

model.compile(optimizer=keras.optimizers.SGD(lr=0.005, momentum=0.9),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={'policy': 'categorical_accuracy', 'value': 'mse'})

# essayer binary crossentropy pour la loss de la value, garder MSE pour la validation

model.load_weights("./test.h5")

# learning rate schedule
def step_decay(epoch):
	initial_lrate = 0.000005 # 0.005
	drop = 0.1
	epochs_drop = 2 # 10.0
	lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
	return lrate

def lr_scheduler(epoch):
  if epoch < 10:
    return 0.00005
  elif epoch < 20:
    return 0.000025
  elif epoch >= 30:
    return 0.00001

checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

# adapt LR as epochs grows
#lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

# 1st model obtained using : 
lr_callback = tf.keras.callbacks.LearningRateScheduler(step_decay)


for i in range (1, epochs + 1):
    print ('epoch ' + str (i))
    golois.getBatch (input_data, policy, value, end, groups)
    history = model.fit(input_data,
                        {'policy': policy, 'value': value}, 
                        epochs=1, 
                        callbacks=[lr_callback, cp_callback],
                        batch_size=batch)
    if (i % 10 == 0):
        golois.getValidation (input_data, policy, value, end)
        val = model.evaluate (input_data,
                              [policy, value], verbose = 0, batch_size=batch)
        print ("val =", val)

model.save ('test.h5')



# Random try

In [None]:
filters = 512
trunk = 128
epochs = 100

def bottleneck_block(x, expand=filters, squeeze=trunk):
  m = layers.Conv2D(expand, (1,1),kernel_regularizer=regularizers.l2(0.0001),use_bias = False)(x)
  m = layers.BatchNormalization()(m)
  m = layers.Activation('relu')(m)
  m = layers.DepthwiseConv2D((3,3), padding='same', kernel_regularizer=regularizers.l2(0.0001),use_bias = False)(m)
  m = layers.BatchNormalization()(m)
  m = layers.Activation('relu')(m)
  m = layers.Conv2D(squeeze, (1,1),kernel_regularizer=regularizers.l2(0.0001),use_bias = False)(m)
  m = layers.BatchNormalization()(m)
  return layers.Add()([m, x])

input = keras.Input(shape=(19, 19, 21), name='board')

x = layers.Conv2D(trunk, 1, padding='same',kernel_regularizer=regularizers.l2(0.0001))(input)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)

for i in range (6):
  x = bottleneck_block (x, filters, trunk)

# fully convolutional, no dense layer
policy_head = layers.Conv2D(1, 1, activation='relu', padding='same',use_bias = False,kernel_regularizer=regularizers.l2(0.0001))(x)
policy_head = layers.Flatten()(policy_head)
policy_head = layers.Activation('softmax', name='policy')(policy_head)

value_head = layers.GlobalAveragePooling2D()(x)
value_head = layers.Dense(50, activation='relu',kernel_regularizer=regularizers.l2(0.0001))(value_head)
value_head = layers.Dense(1, activation='sigmoid', name='value',kernel_regularizer=regularizers.l2(0.0001))(value_head)

model = keras.Model(inputs=input, outputs=[policy_head, value_head])
model.summary ()

# divide lr by 10 later ?
model.compile(optimizer=keras.optimizers.SGD(lr=0.005, momentum=0.9),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={'policy': 'categorical_accuracy', 'value': 'mse'})

for i in range (1, epochs + 1):
    print ('epoch ' + str (i))
    golois.getBatch (input_data, policy, value, end, groups)
    history = model.fit(input_data,
                        {'policy': policy, 'value': value}, 
                        epochs=1, batch_size=batch)
    if (i % 10 == 0):
        golois.getValidation (input_data, policy, value, end)
        val = model.evaluate (input_data,
                              [policy, value], verbose = 0, batch_size=batch)
        print ("val =", val)

model.save ('test.h5')


In [None]:
# How to load and use weights from a checkpoint
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
import numpy
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# load weights
model.load_weights("weights.best.hdf5")
# Compile model (required to make predictions)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print("Created model and loaded weights from file")
# load pima indians dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# estimate accuracy on whole dataset using loaded weights
scores = model.evaluate(X, Y, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

# Another net from Cazenave's paper : mobile.small.conv.avg.bin

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from tensorflow.keras import layers 
from tensorflow.keras import regularizers

import golois

planes = 21
moves = 361
N = 10000
epochs = 20
batch = 128
filters = 64

input_data = np.random.randint(2, size=(N, 19, 19, planes))
input_data = input_data.astype ('float32')

policy = np.random.randint(moves, size=(N,))
policy = keras.utils.to_categorical (policy)

value = np.random.randint(2, size=(N,))
value = value.astype ('float32')

end = np.random.randint(2, size=(N, 19, 19, 2))
end = end.astype ('float32')

groups = np.zeros((N, 19, 19, 1))
groups = groups.astype ('float32')

print ("getValidation", flush = True)
golois.getValidation (input_data, policy, value, end)


In [None]:
planes = 21
moves = 361
N = 10000
epochs = 20
batch = 128
filters = 64

## MobileNet

In [None]:
input = keras.Input(shape=(19, 19, planes), name='board')
x = layers.Conv2D(filters, 1, activation='relu', padding='same')(input)
for i in range (6):
    x1 = layers.Conv2D(filters, 3, activation='relu', padding='same')(x)
    x1 = layers.Conv2D(filters, 3, padding='same')(x1)
    x = layers.add([x1,x])
    x = layers.ReLU()(x)
    x = layers.BatchNormalization()(x)

policy_head = layers.Conv2D(1, 1, activation='relu', padding='same', use_bias = False, kernel_regularizer=regularizers.l2(0.0001))(x)
policy_head = layers.Flatten()(policy_head)
policy_head = layers.Activation('softmax', name='policy')(policy_head)

value_head = layers.GlobalAveragePooling2D()(x)
value_head = layers.Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.0001))(value_head)
value_head = layers.Dense(1, activation='sigmoid', name='value', kernel_regularizer=regularizers.l2(0.0001))(value_head)

model = keras.Model(inputs=input, outputs=[policy_head, value_head])
model.summary ()

model.compile(optimizer=keras.optimizers.SGD(lr=0.005, momentum=0.9),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={'policy': 'categorical_accuracy', 'value': 'mse'})

for i in range (1, epochs + 1):
    print ('epoch ' + str (i))
    golois.getBatch (input_data, policy, value, end, groups)
    history = model.fit(input_data,
                        {'policy': policy, 'value': value}, 
                        epochs=1, batch_size=batch)
    if (i % 10 == 0):
        golois.getValidation (input_data, policy, value, end)
        val = model.evaluate (input_data,
                              [policy, value], verbose = 0, batch_size=batch)
        print ("val =", val)

model.save ('test.h5')


## Mobilenet V2

In [None]:
def bottleneck_block(x, expand=64, squeeze=21):
    m = layers.Conv2D(expand, (1,1), kernel_regularizer=regularizers.l2(0.0001), use_bias = False)(x)
    m = layers.BatchNormalization()(m)
    m = layers.Activation('relu')(m)
    m = layers.DepthwiseConv2D((3,3), padding='same', kernel_regularizer=regularizers.l2(0.0001), use_bias = False)(m)
    m = layers.BatchNormalization()(m)
    m = layers.Activation('relu')(m)
    m = layers.Conv2D(squeeze, (1,1), kernel_regularizer=regularizers.l2(0.0001), use_bias = False)(m)
    m = layers.BatchNormalization()(m)
    return layers.Add()([m, x])


In [None]:
x = keras.Input(shape=(19, 19, planes), name='board')

# do nothing
x = layers.Conv2D(32, 1, activation='relu', padding='same')(input)

for i in range (6):
    x = bottleneck_block(x, 196, 32)

policy_head = layers.Conv2D(1, 1, activation='relu', padding='same', use_bias = False, kernel_regularizer=regularizers.l2(0.0001))(x)
policy_head = layers.Flatten()(policy_head)
policy_head = layers.Activation('softmax', name='policy')(policy_head)

value_head = layers.GlobalAveragePooling2D()(x)
value_head = layers.Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.0001))(value_head)
value_head = layers.Dense(1, activation='sigmoid', name='value', kernel_regularizer=regularizers.l2(0.0001))(value_head)

model = keras.Model(inputs=input, outputs=[policy_head, value_head])
model.summary ()

model.compile(optimizer=keras.optimizers.SGD(lr=0.005, momentum=0.9),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={'policy': 'categorical_accuracy', 'value': 'mse'})

for i in range (1, epochs + 1):
    print ('epoch ' + str (i))
    golois.getBatch (input_data, policy, value, end, groups)
    history = model.fit(input_data,
                        {'policy': policy, 'value': value}, 
                        epochs=1, batch_size=batch)
    if (i % 10 == 0):
        golois.getValidation (input_data, policy, value, end)
        val = model.evaluate (input_data,
                              [policy, value], verbose = 0, batch_size=batch)
        print ("val =", val)

model.save ('test.h5')


# Simple Net from kaggle

In [None]:
input = keras.Input(shape=(19, 19, planes), name='board')

x = layers.Conv2D(48, (7,7), activation='elu', padding='same')(input)
x = layers.Conv2D(32, (5,5), activation='elu', padding='same')(x)
x = layers.Conv2D(32, (5,5), activation='elu', padding='same')(x)
x = layers.Conv2D(32, (5,5), activation='elu', padding='same')(x)

policy_head = layers.Conv2D(1, 1, activation='elu', padding='same')(x)
policy_head = layers.Flatten()(policy_head)
policy_head = layers.Activation('softmax', name='policy')(policy_head)

value_head = layers.GlobalAveragePooling2D()(x)
value_head = layers.Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.0001))(value_head)
value_head = layers.Dense(1, activation='sigmoid', name='value', kernel_regularizer=regularizers.l2(0.0001))(value_head)

model = keras.Model(inputs=input, outputs=[policy_head, value_head])
model.summary ()

model.compile(optimizer=keras.optimizers.SGD(lr=0.005, momentum=0.9),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={'policy': 'categorical_accuracy', 'value': 'mse'})

for i in range (1, epochs + 1):
    print ('epoch ' + str (i))
    golois.getBatch (input_data, policy, value, end, groups)
    history = model.fit(input_data,
                        {'policy': policy, 'value': value}, 
                        epochs=1, batch_size=batch)
    if (i % 10 == 0):
        golois.getValidation (input_data, policy, value, end)
        val = model.evaluate (input_data,
                              [policy, value], verbose = 0, batch_size=batch)
        print ("val =", val)

model.save ('test.h5')

# Inspired from Cross-width convolutions by Jeffrey Barratt

In [1]:
planes = 21
moves = 361
N = 10000
epochs = 20
batch = 128
filters = 256

input = keras.Input(shape=(19, 19, planes), name='board')

x = layers.Conv2D(128, (7,7), activation='relu', padding='same')(input)

x = layers.Conv2D(filters, 1, activation='relu', padding='same')(x)

for i in range (7):
    x1 = layers.Conv2D(filters, 3, activation='relu', padding='same')(x)
    x1 = layers.Conv2D(filters, 3, padding='same')(x1)
    x = layers.add([x1,x])
    x = layers.ReLU()(x)
    x = layers.BatchNormalization()(x)

policy_head = layers.Conv2D(1, 1, activation='relu', padding='same', use_bias = False, kernel_regularizer=regularizers.l2(0.0001))(x)
policy_head = layers.Flatten()(policy_head)
policy_head = layers.Activation('softmax', name='policy')(policy_head)

value_head = layers.GlobalAveragePooling2D()(x)
value_head = layers.Dense(50, activation='relu', kernel_regularizer=regularizers.l2(0.0001))(value_head)
value_head = layers.Dense(1, activation='sigmoid', name='value', kernel_regularizer=regularizers.l2(0.0001))(value_head)

model = keras.Model(inputs=input, outputs=[policy_head, value_head])
model.summary ()


model.compile(optimizer=keras.optimizers.SGD(lr=0.005, momentum=0.9),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={'policy': 'categorical_accuracy', 'value': 'mse'})

for i in range (1, epochs + 1):
    print ('epoch ' + str (i))
    golois.getBatch (input_data, policy, value, end, groups)
    history = model.fit(input_data,
                        {'policy': policy, 'value': value}, 
                        epochs=1, batch_size=batch)
    if (i % 10 == 0):
        golois.getValidation (input_data, policy, value, end)
        val = model.evaluate (input_data,
                              [policy, value], verbose = 0, batch_size=batch)
        print ("val =", val)

model.save ('test.h5')


NameError: ignored

# Sandbox

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

In [None]:
function ClickConnect(){
console.log("Working"); 
document.querySelector("colab-toolbar-button#connect").click() 
}
setInterval(ClickConnect,60000)