## Set-Up

In [17]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [18]:
def mod_5(in_list):  #Input is a binary array [a, b, c ,d , e] encoding a number e + 2 d + 4 c + ...
                # Output is the encoded number mod 5 (as an int).
  return int("".join(str(x) for x in in_list), 2) % 5

def div_5(in_list): 
  return int( bool( mod_5( in_list ) ) )


In [19]:
target_func = mod_5
input_size = 20

In [20]:
def get_batch(batch_size):
  X = np.random.randint(0, 2, (batch_size, input_size), dtype = np.uint8) % 2
  y = np.apply_along_axis(div_5, 1, X) 
  return (X, y)



## Construct modular model

In [65]:
model = keras.models.Sequential()

model.add(keras.layers.Dense(400, activation="relu", input_shape = (input_size,)))
model.add(keras.layers.Dense(60, activation="relu"))
model.add(keras.layers.Dense(1, activation="sigmoid"))

def build_module(num_layers, layer_size, first_layer_mul = 1):
  module = keras.models.Sequential()
  module.add(keras.layers.Dense(layer_size))
  module.add(keras.layers.BatchNormalization())
  module.add(keras.layers.Dense(layer_size, activation = "elu", kernel_initializer="he_normal"))
  module.add(keras.layers.BatchNormalization())
  module.add(keras.layers.Dense(layer_size, activation = "elu", kernel_initializer="he_normal"))
  module.add(keras.layers.BatchNormalization())
  module.add(keras.layers.Dense(layer_size, activation = "elu", kernel_initializer="he_normal"))

  return module

In [68]:
def build_modular_model(module_size = 4, module_width = 16, activation = "elu", final_layer_size = 32):
  input_ = keras.layers.Input(shape=[input_size])

  module1 = build_module(4, 16)(input_)
  module2 = build_module(4, 16)(input_)
  module3 = build_module(4, 16)(input_)
  module4 = build_module(4, 16)(input_)

  concat = keras.layers.Concatenate()([module1, module2, module3, module4])
  normalize = keras.layers.BatchNormalization()(concat)
  merge1 = keras.layers.Dense(32, activation = "elu", kernel_initializer="he_normal")(normalize)
  norm2 = keras.layers.BatchNormalization()(merge1)
  merge2 = keras.layers.Dense(16, activation = "elu", kernel_initializer="he_normal")(norm2)
  norm3 = keras.layers.BatchNormalization()(merge2)
  merge3 = keras.layers.Dense(16, activation = "elu", kernel_initializer="he_normal")(norm3)
  norm4 = keras.layers.BatchNormalization()(merge3)
  merge4 = keras.layers.Dense(16, activation = "elu", kernel_initializer="he_normal")(norm4)

  he_avg_init = keras.initializers.VarianceScaling(scale=2.,
      mode='fan_avg',
      distribution='uniform')
  output = keras.layers.Dense(1, activation = "sigmoid", kernel_initializer = he_avg_init)(merge4)
  
  return keras.Model(inputs=[input_], outputs=[output])


## Test Again

In [23]:
optimizer=keras.optimizers.SGD(learning_rate = 0.01)
model.compile(loss="binary_crossentropy", 
  optimizer=optimizer,
  metrics=["accuracy"])

In [24]:
(X_train, y_train) = get_batch(100000)
(X_valid, y_valid) = get_batch(10000)



In [25]:
#Hyperparameter_search

param_distribs = {
"first_layer_mul" : [1, 4],
"layer_num" : [1, 2, 4, 8, 12],
"layer_size" : [8, 16, 32],
"learning_rate" : [0.003, 0.01, 0.03, 0.1, 0.3, 1]
}


#with_momentum # Do twice, once with, once without momentum?

def construct_model(first_layer_mul = 1, layer_num = 1, layer_size = 8, learning_rate = 0.1, momentum = None, bn = False):
  
  model = keras.models.Sequential()
  model.add(keras.layers.Input(shape = [input_size]))

  if bn : 
    model.add(keras.layers.BatchNormalization( input_shape = (input_size,))) 
  model.add(keras.layers.Dense(first_layer_mul * layer_size, activation="elu"))

  for i in range(layer_num - 1):
    if bn:
      model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dense(layer_size, activation="elu"))
  
  model.add(keras.layers.Dense(1, activation="sigmoid"))
  
  
  if momentum :
    optimizer = keras.optimizers.SGD(learning_rate = learning_rate, momentum = momentum, nesterov = True)
  else:
    optimizer= keras.optimizers.SGD(learning_rate = learning_rate)

  model.compile(loss="binary_crossentropy", 
              optimizer = optimizer,
              metrics=["accuracy"])

  return model

keras_reg = keras.wrappers.scikit_learn.KerasRegressor(construct_model)



In [26]:
# from google.colab import drive
# drive.mount('/drive')
# filename='/drive/My Drive/Colab Notebooks/colab/log.csv'
# history_logger=tf.keras.callbacks.CSVLogger(filename, separator=",", append=True)


In [27]:
from sklearn.model_selection import RandomizedSearchCV

model = construct_model(first_layer_mul = 4, layer_num = 32, layer_size = 32, learning_rate = 0.01, momentum = 0.9, bn = True)


In [28]:
#history = model.fit(X_train, y_train, epochs = 5, validation_data = (X_valid, y_valid))

Best values: 

In [29]:
random_search.cv_results_

NameError: ignored

In [None]:
np.save('/drive/My Drive/Colab Notebooks/colab/best_params.npy',random_search.cv_results_) #--save result from Random search?
  
history1=np.load('/drive/My Drive/Colab Notebooks/colab/best_params.npy',allow_pickle='TRUE').item()

## Train modular model

In [30]:

(X_train, y_train) = get_batch(100000)
(X_valid, y_valid) = get_batch(10000)



In [69]:
learning_rate = 0.01
optimizer=keras.optimizers.SGD(learning_rate = learning_rate, momentum = 0.9, nesterov = True)

model_modular = build_modular_model()
model_modular.compile( loss="binary_crossentropy", 
  optimizer=optimizer,
  metrics=["accuracy"])

hist_modular = model_modular.fit(X_train, y_train, epochs = 10, 
                                validation_data = (X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [75]:

optimizer=keras.optimizers.SGD(learning_rate = .01, momentum = 0.99, nesterov = True)


model_modular.compile( loss="binary_crossentropy", 
  optimizer=optimizer,
  metrics=["accuracy"])

hist_modular = model_modular.fit(X_train, y_train, epochs = 50 , class_weight = {0 : 0.8, 1 : 0.2},
                                validation_data = (X_valid, y_valid))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
model_modular(X_train[1:100])