In [60]:
import numpy as np

def to_binary(n, digits):
  """For an integer returns its truncated binary representation with n digits as an array """
  n= int(n)
  result = np.zeros(digits)
  for i in range(digits):
    result[i] = n % 2
    n = n//2
  return result

def mod_5(X): 
  """ Computes the last decimal digit of a number represented in binary""" 
  counter = 0
  for i in range(X.shape[-1]):
    counter = (2*counter + int(X[-1-i]))%5
  return np.array([counter], dtype= np.uint8)

def div_5(X): 
  """ Computes whether a number represented in binary is divisible by 5""" 
  counter = 0
  for i in range(X.shape[-1]):
    counter = (2*counter + int(X[-1-i]))%5
  return np.array([0 if counter == 0 else 1], dtype= np.uint8)


In [61]:
div_5(np.array([1,1,1,1]))

array([0], dtype=uint8)

## Initialize Tensor Flow

In [62]:
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras.layers import Dense

In [63]:
input_length = 20

Generate Single batch

In [64]:
batch_size = 20000

def get_batch(batch_size):
  x_list = np.random.randint(0, 2, (batch_size, input_length), dtype = np.uint8) % 2
  y_list = np.apply_along_axis(div_5, 1, x_list) 
  return (x_list, y_list)


In [65]:
model_shape = [50] * 5

model = tf.keras.models.Sequential()

model.add(keras.layers.Flatten(input_shape = (input_length,)))

for l in range(len(model_shape)):
  model.add(keras.layers.BatchNormalization()),
  model.add(Dense(model_shape[l], activation = "elu", kernel_initializer = "he_normal"))
  if l+3 >= len(model_shape):
      model.add(tf.keras.layers.Dropout(0.1)),

model.add(Dense(2, activation="softmax"))

#model.summary()

## Initialize the model

In [66]:
lr = 0.01 #Should determine this systematically

def set_up_model (num_layers =  1, layer_size = 10, learning_rate = lr, momentum = 0.9, 
                  nesterov = True, dropout = 0, first_layer = None, first_layer_mul = None):

  model_shape = [layer_size] * num_layers
  if first_layer:
    model_shape[0] = first_layer
  elif first_layer_mul:
    model_shape[0] = model_shape[0] * first_layer_mul
  model = tf.keras.models.Sequential()
  model.add(keras.layers.Flatten(input_shape = (input_length,)))

  for l in range(len(model_shape)):
    model.add(keras.layers.BatchNormalization()),
    model.add(Dense(model_shape[l], activation = "elu", 
                    kernel_initializer = "he_normal"))
    if l+3 >= len(model_shape):
        model.add(tf.keras.layers.Dropout(dropout)),

  model.add(Dense(2, activation="softmax"))

  optimizer = keras.optimizers.SGD(learning_rate = lr, 
                                      momentum = momentum, nesterov = nesterov)
    
  model.compile(loss="sparse_categorical_crossentropy",
                optimizer=optimizer,
                metrics=["accuracy"])
  return model


In [67]:
keras_reg = keras.wrappers.scikit_learn.KerasRegressor(set_up_model)

  """Entry point for launching an IPython kernel.


## Train the Model (test!) 

In [68]:
(X_train,y_train) = get_batch(6 * 10 **4)
X_valid, y_valid = get_batch(10**4)
X_test, y_test = get_batch(10**4)
X_new = get_batch(1)

In [69]:

keras_reg.fit(X_train, y_train, epochs=100,
validation_data=(X_valid, y_valid),
callbacks=[keras.callbacks.EarlyStopping(patience=10)])

mse_test = keras_reg.score(X_test, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100


## Grid search for parameters

In [70]:
from sklearn.model_selection import GridSearchCV
param_grid = [
{ 

  },
]

from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV
param_distribs = {
  'num_layers': [5, 10, 30, 50, 100],
  'layer_size': [16, 32, 64, 128],
  'learning_rate': [0.001, 0.003, 0.01, 0.03, 0.1],
  'first_layer_mul' : [1, 4],
  'momentum': [0.9, 0.99],
}
rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, n_iter=10,
    cv=3)
rnd_search_cv.fit(X_train, y_train, epochs=100,
            validation_data=(X_valid, y_valid),
            callbacks=[keras.callbacks.EarlyStopping(patience=10)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/

 -1.04841445 -0.50214096 -0.50149002 -0.5016371 ]


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


RandomizedSearchCV(cv=3,
                   estimator=<keras.wrappers.scikit_learn.KerasRegressor object at 0x7f5643aefa90>,
                   param_distributions={'first_layer_mul': [1, 4],
                                        'layer_size': [16, 32, 64, 128],
                                        'learning_rate': [0.001, 0.003, 0.01,
                                                          0.03, 0.1],
                                        'momentum': [0.9, 0.99],
                                        'num_layers': [5, 10, 30, 50, 100]})

In [71]:
help(rnd_search_cv)

Help on RandomizedSearchCV in module sklearn.model_selection._search object:

class RandomizedSearchCV(BaseSearchCV)
 |  RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, scoring=None, n_jobs=None, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score=nan, return_train_score=False)
 |  
 |  Randomized search on hyper parameters.
 |  
 |  RandomizedSearchCV implements a "fit" and a "score" method.
 |  It also implements "score_samples", "predict", "predict_proba",
 |  "decision_function", "transform" and "inverse_transform" if they are
 |  implemented in the estimator used.
 |  
 |  The parameters of the estimator used to apply these methods are optimized
 |  by cross-validated search over parameter settings.
 |  
 |  In contrast to GridSearchCV, not all parameter values are tried out, but
 |  rather a fixed number of parameter settings is sampled from the specified
 |  distributions. The number of parameter settings that are tried is
 | 

In [72]:
rnd_search_cv.cv_results_

{'mean_fit_time': array([344.00823951,  62.49617267, 382.42115966, 498.16346224,
        121.61577161, 108.65540163, 359.72630215,  67.90005565,
        210.04180423,  74.77782933]),
 'mean_score_time': array([ 7.66833504, 23.61474999, 24.30897546, 24.70311435, 20.78104202,
         7.22242085, 25.57233882,  9.99627646, 18.68534168,  4.65462637]),
 'mean_test_score': array([        nan, -0.50160547,         nan, -0.50121963, -0.50373128,
        -0.51240051, -1.04841445, -0.50214096, -0.50149002, -0.5016371 ]),
 'param_first_layer_mul': masked_array(data=[1, 1, 1, 1, 1, 4, 1, 4, 4, 4],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'param_layer_size': masked_array(data=[32, 16, 64, 64, 64, 128, 32, 32, 128, 16],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object)