In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/cours/iasd-deep-golois/project/
!pip install pybind11
!chmod 777 ./compile.sh
!./compile.sh


In [None]:
!cp golois.cpython-36m-x86_64-linux-gnu.so golois.so

In [None]:
!pip install hyperas
!pip install hyperopt

In [None]:
!pip install mlflow --quiet

In [None]:
import os
import math
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from tensorflow.keras import layers 
from tensorflow.keras import regularizers

import golois

# HyperOpt exploration

In [None]:
def bottleneck_block(x, hpo, expand, squeeze=64):
    m = layers.Conv2D(expand, (1,1), kernel_regularizer=regularizers.l2(0.000328), use_bias = False)(x)
    m = layers.BatchNormalization()(m)
    m = layers.LeakyReLU(alpha=float(hpo["leakly_alpha"]))(m) # layers.Activation('relu')(m)
    m = layers.DepthwiseConv2D((3,3), padding='same', kernel_regularizer=regularizers.l2(0.000328), use_bias = False)(m)
    m = layers.BatchNormalization()(m)
    m = layers.LeakyReLU(alpha=float(hpo["leakly_alpha"]))(m) #layers.Activation('relu')(m)
    m = layers.Conv2D(squeeze, (1,1), kernel_regularizer=regularizers.l2(0.000328), use_bias = False)(m)
    m = layers.BatchNormalization()(m)
    return layers.Add()([m, x])

# hpo provides the parameter values
def create_model(hpo):
  planes = 21
  moves = 361
  N = 100000
  epochs = 10
  batch= 128

  filters = 64
  trunk = filters

  input_data = np.random.randint(2, size=(N, 19, 19, planes))
  input_data = input_data.astype ('float32')

  policy = np.random.randint(moves, size=(N,))
  policy = keras.utils.to_categorical (policy)

  value = np.random.randint(2, size=(N,))
  value = value.astype ('float32')

  end = np.random.randint(2, size=(N, 19, 19, 2))
  end = end.astype ('float32')

  groups = np.zeros((N, 19, 19, 1))
  groups = groups.astype ('float32')

  print ("getValidation", flush = True)
  golois.getValidation (input_data, policy, value, end)

  input = keras.Input(shape=(19, 19, planes), name='board')

  # tester leaky relu, 
  x = layers.Conv2D(trunk, 1, activation='relu', padding='same')(input)

  for i in range (33+1):
      x = bottleneck_block(x, hpo, 201, trunk)

  # fully convolutional, no dense layer
  policy_head = layers.Conv2D(1, 1, activation='relu', padding='same',use_bias = False,kernel_regularizer=regularizers.l2(0.00048))(x)
  policy_head = layers.Flatten()(policy_head)
  policy_head = layers.Activation('softmax', name='policy')(policy_head)

  value_head = layers.GlobalAveragePooling2D()(x)
  value_head = layers.Dense(50, activation='relu',kernel_regularizer=regularizers.l2(0.00048))(value_head)
  value_head = layers.Dense(1, activation='sigmoid', name='value',kernel_regularizer=regularizers.l2(0.00048))(value_head)

  model = keras.Model(inputs=input, outputs=[policy_head, value_head])
  return model

In [None]:
def runNN(hpo):

  # Need to include the TF import due to serialization issues
  import tensorflow as tf
  
  planes = 21
  moves = 361
  N = 100000
  epochs = 10
  batch= 128

  input_data = np.random.randint(2, size=(N, 19, 19, planes))
  input_data = input_data.astype ('float32')

  policy = np.random.randint(moves, size=(N,))
  policy = keras.utils.to_categorical (policy)

  value = np.random.randint(2, size=(N,))
  value = value.astype ('float32')

  end = np.random.randint(2, size=(N, 19, 19, 2))
  end = end.astype ('float32')

  groups = np.zeros((N, 19, 19, 1))
  groups = groups.astype ('float32')

  # create the model given the hpo parameters
  model = create_model(hpo)
 
  model.compile(optimizer=keras.optimizers.SGD(learning_rate=0.0005, momentum=0.9),
              loss={'policy': 'categorical_crossentropy', 'value': 'binary_crossentropy'},
              loss_weights={'policy' : 1.0, 'value' : 1.0},
              metrics={'policy': 'categorical_accuracy', 'value': 'mse'})
  
  reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                                patience=5, min_lr=0.000005)

  for i in range (1, epochs + 1):
    # print ('epoch ' + str (i))
    golois.getBatch (input_data, policy, value, end, groups)
    history = model.fit(input_data,
                        {'policy': policy, 'value': value}, 
                        epochs=1, 
                        callbacks=[reduce_lr], # lr_callback, cp_callback],
                        batch_size=batch)
    if (i % 10 == 0):
        golois.getValidation (input_data, policy, value, end)
        val = model.evaluate (input_data,
                              [policy, value], verbose = 0, batch_size=batch)
        print ("val =", val)

  # model.save ('tmp.h5')

  # objective metric
  obj_metric =  3.0 * (history.history["value_loss"][-1]) - history.history["policy_loss"][-1]
  print("-->" + str(obj_metric))
  return {"loss": obj_metric, "status": STATUS_OK}


In [None]:
%%capture

import os
import math
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from tensorflow.keras import layers 
from tensorflow.keras import regularizers

import golois
import mlflow
import mlflow.keras
from hyperopt import fmin, hp, tpe, STATUS_OK, Trials

tf.random.set_seed(42)
 
space = {
  # "L2_regul": hp.uniform("l2_rate", 0.00048, 0.00048), # useless
  "leakly_alpha": hp.uniform("leakly_alpha", 0.0, 0.2)
  # "activation": hp.uniform("alpha_rate", 0.0, 0.1)
 }
 
colab_trials = Trials()
 
# see mlflow auto logging 
np_rstate = np.random.RandomState(42)

with mlflow.start_run():
  
  best_hyperparam = fmin(fn=runNN, 
                         space=space, 
                         algo=tpe.suggest, #top !
                         max_evals=5, 
                         trials=colab_trials,
                         rstate=np_rstate)
 
best_hyperparam

In [None]:
import pickle
with open("test6.hyperopt", "wb") as f:
        pickle.dump(colab_trials, f)

In [None]:
best_hyperparam

# Notes & tools

In [None]:
# JS code to append in Chrome console to keep the Colab session live
function ClickConnect(){
console.log("Working"); 
document.querySelector("#help-menu-button").click() 
}
setInterval(ClickConnect,60000)