From d199ed5651d52698567b705dc119e11f1a9ae949 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 16 Dec 2016 10:06:41 +1100 Subject: [PATCH 001/119] Fix 1D neural net bug Turns out np.float64(np.array([0])) returns 0, rather than np.array([0]). --- mloop/learners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/learners.py b/mloop/learners.py index ad6e5b1..ce0bb2e 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1711,7 +1711,7 @@ def predict_cost_gradient(self,params): float : Predicted gradient at paramters ''' # scipy.optimize.minimize doesn't seem to like a 32-bit Jacobian, so we convert to 64 - return np.float64(self.neural_net_impl.predict_cost_gradient(params)) + return self.neural_net_impl.predict_cost_gradient(params).astype(np.float64) def predict_costs_from_param_array(self,params): From e91ae242ca2fc756c381adc1d94269607b17f3ed Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 16 Dec 2016 10:44:38 +1100 Subject: [PATCH 002/119] Log NN fit cost --- mloop/nnlearner.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 4674f85..ee3c826 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -67,11 +67,11 @@ def _create_neural_net(self): self.output_var = tf.matmul(prev_h, self.weights[-1]) + self.biases[-1] # Loss function and training - loss_func = ( + self.loss_func = ( tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - self.output_placeholder), reduction_indices=[1])) + self.regularisation_coefficient * sum([tf.nn.l2_loss(W) for W in self.weights])) - self.train_step = tf.train.AdamOptimizer(1.0).minimize(loss_func) + self.train_step = tf.train.AdamOptimizer(1.0).minimize(self.loss_func) # Gradient self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) @@ -96,6 +96,8 @@ def fit_neural_net(self, all_params, all_costs): self.log.error("Params and costs must have the same length") raise ValueError + reg_co = 0.01 + # TODO: Fit hyperparameters. for i in range(self.train_epochs): @@ -108,8 +110,15 @@ def fit_neural_net(self, all_params, all_costs): self.tf_session.run(self.train_step, feed_dict={self.input_placeholder: batch_input, self.output_placeholder: batch_output, - self.regularisation_coefficient: 0.01, + self.regularisation_coefficient: reg_co, }) + self.log.debug('Fit neural network with total training cost ' + + str(self.tf_session.run( + self.loss_func, + feed_dict={self.input_placeholder: all_params, + self.output_placeholder: [[c] for c in all_costs], + self.regularisation_coefficient: reg_co, + }))) def predict_cost(self,params): ''' From a4dd3081011107ac1864872757ea1031657d40fa Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 16 Dec 2016 17:29:57 +1100 Subject: [PATCH 003/119] Add some fuzziness to the NN parameter prediction It has no incentive to explore at all, so can easily get stuck close to a local minimum if it hasn't quite modelled the position correctly. By adding some very basic fuzziness (just randomly tweaking the predicted best parametere) we can at least make it more likely to find true local minima. --- mloop/learners.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mloop/learners.py b/mloop/learners.py index ce0bb2e..430c3b2 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1875,7 +1875,6 @@ def find_next_parameters(self): Return: next_params (array): Returns next parameters from cost search. ''' - # TODO: We could implement some other type of biasing. self.params_count += 1 self.update_search_params() next_params = None @@ -1889,6 +1888,16 @@ def find_next_parameters(self): if result.fun < next_cost: next_params = result.x next_cost = result.fun + # Now tweak the selected parameters to make sure we don't just keep on looking in the same + # place (the actual minimum might be a short distance away). + # TODO: Rather than using [-0.1, 0.1] we should pick the fuzziness based on what we know + # about length scales. + # TODO: It would be nice to deal with uncertainty more cleverly. Even though the current + # method will help find the true local minimum, it doesn't help if we get stuck in a local + # minimum and there's another one a long way away that appears slightly higher. To do this + # cleverly would probably correspond to introducing some kind of uncertainty-based biasing + # (like the GP). + next_params = next_params + nr.uniform(-0.1, 0.1, size=next_params.shape) return next_params def run(self): From d42f0c7c4c1e4ea754f7e95fd39ff88aed0bf8d0 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 16 Dec 2016 17:37:06 +1100 Subject: [PATCH 004/119] Refactor of NNI to support hyperparameter fitting Pull the params associated with a single neural network (of particular topology and hyperparameters out into a separate class). This way NNI can cleanly construct new networks if it decides that topology or hyperparameters need changing. --- mloop/nnlearner.py | 177 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 127 insertions(+), 50 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index ee3c826..5c1d26b 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -3,49 +3,48 @@ import tensorflow as tf import numpy as np -class NeuralNetImpl(): +class SingleNeuralNet(): ''' - Neural network implementation. + A single neural network with fixed hyperparameters/topology. This must run in the same process in which it's created. Args: - num_params (int): The number of params. + num_params: The number of params. + num_layers: The number of layers. + layer_dim: The number of nodes in each layer. + train_epochs: Epochs per train. + batch_size: The training batch size. + keep_prob: The dropoout keep probability. + regularisation_coefficient: The regularisation coefficient. ''' def __init__(self, - num_params = None): - + num_params, + num_layers, + layer_dim, + train_epochs, + batch_size, + keep_prob, + regularisation_coefficient): self.log = logging.getLogger(__name__) - self.log.debug('Initialising neural network impl') - if num_params is None: - self.log.error("num_params must be provided") - raise ValueError - self.num_params = num_params - self.tf_session = tf.InteractiveSession() - # Initial hyperparameters - self.num_layers = 1 - self.layer_dim = 128 - self.train_epochs = 300 - self.batch_size = 64 + self.num_params = num_params + self.num_layers = num_layers + self.layer_dim = layer_dim + self.train_epochs = train_epochs + self.batch_size = batch_size + self.keep_prob = keep_prob + self.regularisation_coefficient = regularisation_coefficient # Inputs self.input_placeholder = tf.placeholder(tf.float32, shape=[None, self.num_params]) self.output_placeholder = tf.placeholder(tf.float32, shape=[None, 1]) - self.keep_prob = tf.placeholder_with_default(1., shape=[]) - self.regularisation_coefficient = tf.placeholder_with_default(0., shape=[]) - - self._create_neural_net() + self.keep_prob_placeholder = tf.placeholder_with_default(1., shape=[]) + self.regularisation_coefficient_placeholder = tf.placeholder_with_default(0., shape=[]) - def _create_neural_net(self): - ''' - Creates the neural net with topology specified by the current hyperparameters. - - ''' - self.log.debug('Creating neural network') - # Forget about any old weights/biases + # Parameters self.weights = [] self.biases = [] @@ -59,7 +58,7 @@ def _create_neural_net(self): prev_layer_dim = dim prev_h = tf.nn.dropout( tf.nn.sigmoid(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), - keep_prob=self.keep_prob) + keep_prob=self.keep_prob_placeholder) # Output node self.weights.append(tf.Variable(tf.random_normal([prev_layer_dim, 1]))) @@ -70,7 +69,8 @@ def _create_neural_net(self): self.loss_func = ( tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - self.output_placeholder), reduction_indices=[1])) - + self.regularisation_coefficient * sum([tf.nn.l2_loss(W) for W in self.weights])) + + self.regularisation_coefficient_placeholder + * sum([tf.nn.l2_loss(W) for W in self.weights])) self.train_step = tf.train.AdamOptimizer(1.0).minimize(self.loss_func) # Gradient @@ -78,46 +78,42 @@ def _create_neural_net(self): self.tf_session.run(tf.initialize_all_variables()) - def fit_neural_net(self, all_params, all_costs): + def fit(self, params, costs): ''' - Determine the appropriate number of layers for the NN given the data. - - Fit the Neural Net with the appropriate topology to the data + Fit the neural net to the provided data Args: - all_params (array): array of all parameter arrays - all_costs (array): array of costs (associated with the corresponding parameters) + params (array): array of parameter arrays + costs (array): array of costs (associated with the corresponding parameters) ''' self.log.debug('Fitting neural network') - if len(all_params) == 0: + if len(params) == 0: self.log.error('No data provided.') raise ValueError - if not len(all_params) == len(all_costs): + if not len(params) == len(costs): self.log.error("Params and costs must have the same length") raise ValueError - reg_co = 0.01 - - # TODO: Fit hyperparameters. - for i in range(self.train_epochs): # Split the data into random batches, and train on each batch - all_indices = np.random.permutation(len(all_params)) - for j in range(math.ceil(len(all_params) / self.batch_size)): - batch_indices = all_indices[j * self.batch_size : (j + 1) * self.batch_size] - batch_input = [all_params[index] for index in batch_indices] - batch_output = [[all_costs[index]] for index in batch_indices] + indices = np.random.permutation(len(params)) + for j in range(math.ceil(len(params) / self.batch_size)): + batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] + batch_input = [params[index] for index in batch_indices] + batch_output = [[costs[index]] for index in batch_indices] self.tf_session.run(self.train_step, feed_dict={self.input_placeholder: batch_input, self.output_placeholder: batch_output, - self.regularisation_coefficient: reg_co, + self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + self.keep_prob_placeholder: self.keep_prob, }) + self.log.debug('Fit neural network with total training cost ' + str(self.tf_session.run( self.loss_func, - feed_dict={self.input_placeholder: all_params, - self.output_placeholder: [[c] for c in all_costs], - self.regularisation_coefficient: reg_co, + feed_dict={self.input_placeholder: params, + self.output_placeholder: [[c] for c in costs], + self.regularisation_coefficient_placeholder: self.regularisation_coefficient, }))) def predict_cost(self,params): @@ -137,3 +133,84 @@ def predict_cost_gradient(self,params): float : Predicted gradient at parameters ''' return self.tf_session.run(self.output_var_gradient, feed_dict={self.input_placeholder: [params]})[0][0] + + +class NeuralNetImpl(): + ''' + Neural network implementation. This may actually create multiple neural networks with different + topologies or hyperparameters, and switch between them based on the data. + + This must run in the same process in which it's created. + + Args: + num_params (int): The number of params. + fit_hyperparameters (bool): Whether to try to fit the hyperparameters to the data. + ''' + + def __init__(self, + num_params = None, + fit_hyperparameters = False): + + self.log = logging.getLogger(__name__) + self.log.debug('Initialising neural network impl') + if num_params is None: + self.log.error("num_params must be provided") + raise ValueError + + self.num_params = num_params + self.fit_hyperparameters = fit_hyperparameters + + self.net = self._make_net(0.01) + + def _make_net(self, reg): + ''' + Helper method to create a new net with a specified regularisation coefficient. + + Args: + reg (float): Regularisation coefficient. + ''' + return SingleNeuralNet( + self.num_params, + 1, # num_layers + 128, # layer_dim + 1000, # train_epochs + 64, # batch_size + 1., # keep_prob + reg) + + + def fit_neural_net(self, all_params, all_costs): + ''' + Fits the neural net with the appropriate topology to the data + + Args: + all_params (array): array of all parameter arrays + all_costs (array): array of costs (associated with the corresponding parameters) + ''' + self.log.debug('Fitting neural network') + if len(all_params) == 0: + self.log.error('No data provided.') + raise ValueError + if not len(all_params) == len(all_costs): + self.log.error("Params and costs must have the same length") + raise ValueError + + self.net.fit(all_params, all_costs) + + def predict_cost(self,params): + ''' + Produces a prediction of cost from the neural net at params. + + Returns: + float : Predicted cost at parameters + ''' + return self.net.predict_cost(params) + + def predict_cost_gradient(self,params): + ''' + Produces a prediction of the gradient of the cost function at params. + + Returns: + float : Predicted gradient at parameters + ''' + return self.net.predict_cost_gradient(params) From ac49a63a7ba02c49b03eca32b32abdd774a12944 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 16 Dec 2016 17:38:32 +1100 Subject: [PATCH 005/119] Add support for fitting the regularisation coeff Every 20 fits, it generates a bunch of new networks with various regularisation coefficients. It trains each on (most of) the current data, and then tests on a cross validation set. If the best of these performs significantly better than the current network on the cv set then it replaces the current net with the new one. Disable this for now because it seems like it still needs a bit of tweaking. Also add a comment about possibly artifically capping costs. --- mloop/nnlearner.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 5c1d26b..a30669b 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -116,6 +116,20 @@ def fit(self, params, costs): self.regularisation_coefficient_placeholder: self.regularisation_coefficient, }))) + + def cross_validation_loss(self, params, costs): + ''' + Returns the loss of the network on a cross validation set. + + Args: + params (array): array of parameter arrays + costs (array): array of costs (associated with the corresponding parameters) + ''' + return self.tf_session.run(self.loss_func, + feed_dict={self.input_placeholder: params, + self.output_placeholder: [[c] for c in costs], + }) + def predict_cost(self,params): ''' Produces a prediction of cost from the neural net at params. @@ -159,6 +173,7 @@ def __init__(self, self.num_params = num_params self.fit_hyperparameters = fit_hyperparameters + self.last_hyperfit = 0 self.net = self._make_net(0.01) @@ -195,6 +210,43 @@ def fit_neural_net(self, all_params, all_costs): self.log.error("Params and costs must have the same length") raise ValueError + # TODO: Consider adding some kind of "cost capping". Our NNs will never predict costs going + # off to infinity, so we could be "wasting" training cost due to totally irrelevant points. + # If we capped the costs to some value then this might help. Note that this is really just + # another form of cost scaling. + + if self.fit_hyperparameters: + # Every 20 fits (starting at 5, just because), re-fit the hyperparameters + if False and int(len(all_params + 5) / 20) > self.last_hyperfit: + self.last_hyperfit = int(len(all_params + 5) / 20) + + # Fit regularisation + + # Split the data into training and cross validation + cv_size = int(len(all_params) / 10) + train_params = all_params[:-cv_size] + train_costs = all_costs[:-cv_size] + cv_params = all_params[cv_size:] + cv_costs = all_costs[cv_size:] + + orig_cv_loss = self.net.cross_validation_loss(cv_params, cv_costs) + best_cv_loss = orig_cv_loss + + self.log.debug("Fitting regularisation, current cv loss=" + str(orig_cv_loss)) + + # Try a bunch of different regularisation parameters, switching to a new one if it + # does significantly better on the cross validation set than the old one. + for r in [0.001, 0.01, 0.1, 1, 10]: + net = self._make_net(r) + net.fit(train_params, train_costs) + this_cv_loss = net.cross_validation_loss(cv_params, cv_costs) + if this_cv_loss < best_cv_loss and this_cv_loss < 0.1 * orig_cv_loss: + best_cv_loss = this_cv_loss + self.log.debug("Switching to reg=" + str(r) + ", cv loss=" + str(best_cv_loss)) + self.net = net + + # TODO: Fit depth + self.net.fit(all_params, all_costs) def predict_cost(self,params): From 707bada55ccc9edbcdba135ccf64bb908fb0e85d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Sun, 18 Dec 2016 22:41:20 +1100 Subject: [PATCH 006/119] Fix always-off bug in regularisation-fitting --- mloop/nnlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index a30669b..9801d97 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -217,7 +217,7 @@ def fit_neural_net(self, all_params, all_costs): if self.fit_hyperparameters: # Every 20 fits (starting at 5, just because), re-fit the hyperparameters - if False and int(len(all_params + 5) / 20) > self.last_hyperfit: + if int(len(all_params + 5) / 20) > self.last_hyperfit: self.last_hyperfit = int(len(all_params + 5) / 20) # Fit regularisation From 0e0b5185b17a6c703e03bd60bc13d48626be3785 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 1 May 2017 14:10:10 +1000 Subject: [PATCH 007/119] Use mean instead of sum for regularization If we use sum then we need to change the coefficient pretty drastically when the number of layers changes. Mean should mean that a particular coefficient has the same meaning for all topologies. --- mloop/nnlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 9801d97..c5128d4 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -70,7 +70,7 @@ def __init__(self, tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - self.output_placeholder), reduction_indices=[1])) + self.regularisation_coefficient_placeholder - * sum([tf.nn.l2_loss(W) for W in self.weights])) + * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) self.train_step = tf.train.AdamOptimizer(1.0).minimize(self.loss_func) # Gradient From 27210d330414adb69cdd2d4416de918eb27a2681 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 1 May 2017 14:44:47 +1000 Subject: [PATCH 008/119] Don't fuzz parameters for now --- mloop/learners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/learners.py b/mloop/learners.py index 430c3b2..41666f6 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1897,7 +1897,7 @@ def find_next_parameters(self): # minimum and there's another one a long way away that appears slightly higher. To do this # cleverly would probably correspond to introducing some kind of uncertainty-based biasing # (like the GP). - next_params = next_params + nr.uniform(-0.1, 0.1, size=next_params.shape) + #next_params = next_params + nr.uniform(-0.1, 0.1, size=next_params.shape) return next_params def run(self): From 37621a2ab588527f2d1d9523860ae5e43e4c3508 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 1 May 2017 14:45:16 +1000 Subject: [PATCH 009/119] Print unregularized training loss --- mloop/nnlearner.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index c5128d4..f4a23e0 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -114,9 +114,15 @@ def fit(self, params, costs): feed_dict={self.input_placeholder: params, self.output_placeholder: [[c] for c in costs], self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + })) + + ', with unregularized cost ' + + str(self.tf_session.run( + self.loss_func, + feed_dict={self.input_placeholder: params, + self.output_placeholder: [[c] for c in costs], + self.regularisation_coefficient_placeholder: 0, }))) - def cross_validation_loss(self, params, costs): ''' Returns the loss of the network on a cross validation set. From b6e06cd886edd19207eeb9f296aae1d73e2c5d13 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 1 May 2017 14:45:53 +1000 Subject: [PATCH 010/119] Fix typo in visualizations --- mloop/visualizations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 8219fbc..c4dd600 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -38,7 +38,7 @@ def show_all_default_visualizations(controller, show_plots=True): log = logging.getLogger(__name__) configure_plots() log.debug('Creating controller visualizations.') - create_contoller_visualizations(controller.total_archive_filename, + create_controller_visualizations(controller.total_archive_filename, file_type=controller.controller_archive_file_type) if isinstance(controller, mlc.DifferentialEvolutionController): @@ -91,7 +91,7 @@ def configure_plots(): mpl.rcParams['legend.scatterpoints'] = 1 mpl.rcParams['legend.fontsize']= 'medium' -def create_contoller_visualizations(filename, +def create_controller_visualizations(filename, file_type='pkl', plot_cost_vs_run=True, plot_parameters_vs_run=True, From 9dac2d7413c633a02a70f62f49422b7842a4fda3 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 1 May 2017 14:49:07 +1000 Subject: [PATCH 011/119] Tweak params --- mloop/nnlearner.py | 4 ++-- mloop/visualizations.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index f4a23e0..55d4d15 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -192,8 +192,8 @@ def _make_net(self, reg): ''' return SingleNeuralNet( self.num_params, - 1, # num_layers - 128, # layer_dim + 2, # num_layers + 32, # layer_dim 1000, # train_epochs 64, # batch_size 1., # keep_prob diff --git a/mloop/visualizations.py b/mloop/visualizations.py index c4dd600..cd12272 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -181,6 +181,7 @@ def plot_cost_vs_run(self): plt.scatter(self.in_numbers,self.in_costs,marker='o',c=self.cost_colors,s=5*mpl.rcParams['lines.markersize']) plt.xlabel(run_label) plt.ylabel(cost_label) + plt.ylim((0,100)) plt.title('Controller: Cost vs run number.') artists = [] for ut in self.unique_types: From 347202cead811e6bcce164bcac3acefe146ea1a5 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 1 May 2017 17:00:23 +1000 Subject: [PATCH 012/119] Switch to abs activation function, and 1 layer This is the best match for the landscape I've seen yet. Next we should try extra sigmoid/relu layers on top of the abs. Or maybe multiple abs layers, but I doubt that's necessary. --- mloop/nnlearner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 55d4d15..a4e846a 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -57,7 +57,7 @@ def __init__(self, self.biases.append(tf.Variable(tf.random_normal([dim]))) prev_layer_dim = dim prev_h = tf.nn.dropout( - tf.nn.sigmoid(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), + tf.abs(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), keep_prob=self.keep_prob_placeholder) # Output node @@ -192,7 +192,7 @@ def _make_net(self, reg): ''' return SingleNeuralNet( self.num_params, - 2, # num_layers + 1, # num_layers 32, # layer_dim 1000, # train_epochs 64, # batch_size From 51e601223789f4578469eed7cf16ea0ecdbd6044 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 2 May 2017 09:34:34 +1000 Subject: [PATCH 013/119] Support different activation functions --- mloop/nnlearner.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index a4e846a..a0c215c 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -11,8 +11,8 @@ class SingleNeuralNet(): Args: num_params: The number of params. - num_layers: The number of layers. - layer_dim: The number of nodes in each layer. + layer_dims: The number of nodes in each layer. + layer_activations: The activation function for each layer. train_epochs: Epochs per train. batch_size: The training batch size. keep_prob: The dropoout keep probability. @@ -21,8 +21,8 @@ class SingleNeuralNet(): def __init__(self, num_params, - num_layers, - layer_dim, + layer_dims, + layer_activations, train_epochs, batch_size, keep_prob, @@ -30,9 +30,11 @@ def __init__(self, self.log = logging.getLogger(__name__) self.tf_session = tf.InteractiveSession() + if not len(layer_dims) == len(layer_activations): + self.log.error('len(layer_dims) != len(layer_activations)') + raise ValueError + self.num_params = num_params - self.num_layers = num_layers - self.layer_dim = layer_dim self.train_epochs = train_epochs self.batch_size = batch_size self.keep_prob = keep_prob @@ -52,12 +54,12 @@ def __init__(self, # TODO: Use length scale for setting initial weights? prev_layer_dim = self.num_params prev_h = self.input_placeholder - for dim in [self.layer_dim] * self.num_layers: + for (dim, act) in zip(layer_dims, layer_activations): self.weights.append(tf.Variable(tf.random_normal([prev_layer_dim, dim], stddev=0.1))) self.biases.append(tf.Variable(tf.random_normal([dim]))) prev_layer_dim = dim prev_h = tf.nn.dropout( - tf.abs(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), + act(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), keep_prob=self.keep_prob_placeholder) # Output node @@ -190,10 +192,12 @@ def _make_net(self, reg): Args: reg (float): Regularisation coefficient. ''' + def gelu_fast(_x): + return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3)))) return SingleNeuralNet( self.num_params, - 1, # num_layers - 32, # layer_dim + [32],#, 32], # layer_dims + [gelu_fast],#tf.abs, tf.abs], # layer_activations 1000, # train_epochs 64, # batch_size 1., # keep_prob From 393d32e94c340a6e5b846625f267d70d80aae646 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 2 May 2017 14:41:49 +1000 Subject: [PATCH 014/119] Tweaks --- mloop/nnlearner.py | 6 ++++-- mloop/visualizations.py | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index a0c215c..e9a6d51 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -194,10 +194,12 @@ def _make_net(self, reg): ''' def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3)))) + def amazing_abs(_x): + return tf.maximum(1 - tf.abs(_x), 0) return SingleNeuralNet( self.num_params, - [32],#, 32], # layer_dims - [gelu_fast],#tf.abs, tf.abs], # layer_activations + [32, 32, 32, 32],#, 32], # layer_dims + [tf.abs, tf.nn.relu, tf.abs, tf.nn.relu], # layer_activations 1000, # train_epochs 64, # batch_size 1., # keep_prob diff --git a/mloop/visualizations.py b/mloop/visualizations.py index cd12272..497385d 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -673,7 +673,7 @@ def plot_cross_sections(self): plt.figure(figure_counter) points = 100 (_,cost_arrays) = self.return_cross_sections(points=points) - rel_params = np.linspace(0,1,points) + rel_params = np.linspace(-5,5,points) for ind in range(self.num_params): plt.plot(rel_params,cost_arrays[ind,:],'-',color=self.param_colors[ind]) if self.has_trust_region: @@ -683,7 +683,7 @@ def plot_cross_sections(self): for ind in range(self.num_params): plt.plot([self.scaled_trust_min[ind],self.scaled_trust_max[ind]],[ytrust,ytrust],'s', color=self.param_colors[ind]) plt.xlabel(scale_param_label) - plt.xlim((0,1)) + plt.xlim((-5,5)) plt.ylabel(cost_label) plt.title('NN Learner: Predicted landscape' + ('with trust regions.' if self.has_trust_region else '.')) artists = [] @@ -707,7 +707,7 @@ def plot_surface(self): params = [(x,y) for x in param_set[0] for y in param_set[1]] costs = self.predict_costs_from_param_array(params) ax.scatter([param[0] for param in params], [param[1] for param in params], costs) - ax.set_zlim(top=100) + ax.set_zlim(top=500,bottom=0) ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('cost') From 6cbfca11cda923c46cabd9e8a04e07b1d207e48f Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 2 May 2017 17:21:01 +1000 Subject: [PATCH 015/119] Support for saving/loading NN training state --- mloop/learners.py | 14 ++++++++++++-- mloop/nnlearner.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ mloop/visualizations.py | 2 +- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 41666f6..f0d8008 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1661,7 +1661,7 @@ def __init__(self, # re-train). self.cost_scaler = skp.StandardScaler(with_mean=False, with_std=False) - self.archive_dict.update({'archive_type':'nerual_net_learner', + self.archive_dict.update({'archive_type':'neural_net_learner', 'bad_run_indexs':self.bad_run_indexs, 'generation_num':self.generation_num, 'search_precision':self.search_precision, @@ -1683,6 +1683,14 @@ def create_neural_net(self): import mloop.nnlearner as mlnn self.neural_net_impl = mlnn.NeuralNetImpl(self.num_params) + def import_neural_net(self): + ''' + Imports neural net parameters from the training dictionary provided at construction. + ''' + if not self.training_dict: + raise ValueError + self.neural_net_impl.load(self.training_dict['net']) + def fit_neural_net(self): ''' Determine the appropriate number of layers for the NN given the data. @@ -1866,7 +1874,9 @@ def update_archive(self): 'params_count':self.params_count, 'update_hyperparameters':self.update_hyperparameters, 'length_scale':self.length_scale, - 'noise_level':self.noise_level}) + 'noise_level':self.noise_level}) + if self.neural_net_impl: + self.archive_dict.update({'net':self.neural_net_impl.save()}) def find_next_parameters(self): ''' diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index e9a6d51..cbabf3f 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -34,6 +34,9 @@ def __init__(self, self.log.error('len(layer_dims) != len(layer_activations)') raise ValueError + # All member variables of this class are constants. The only things that change are the TF + # variables. + self.num_params = num_params self.train_epochs = train_epochs self.batch_size = batch_size @@ -78,8 +81,24 @@ def __init__(self, # Gradient self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) + # Saver for saving and restoring params + self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) + self.tf_session.run(tf.initialize_all_variables()) + def load(self, archive): + ''' + Imports the net from an archive dictionary. + ''' + self.saver.restore(self.tf_session, str(archive['saver_path'])) + + def save(self): + ''' + Exports the net to an archive dictionary. + ''' + # TODO: Use a proper timestamped filename. + return {'saver_path': self.saver.save(self.tf_session, 'net.ckpt')} + def fit(self, params, costs): ''' Fit the neural net to the provided data @@ -179,9 +198,13 @@ def __init__(self, self.log.error("num_params must be provided") raise ValueError + # Constants. self.num_params = num_params self.fit_hyperparameters = fit_hyperparameters + + # Tracking variables. These need to be set when importing and saved when exporting. self.last_hyperfit = 0 + self.last_net_reg = 0.01 self.net = self._make_net(0.01) @@ -205,6 +228,26 @@ def amazing_abs(_x): 1., # keep_prob reg) + def load(self, archive): + ''' + Imports the net from an archive dictionary. + ''' + self.log.debug('Importing neural network') + self.last_hyperfit = int(archive['last_hyperfit']) + self.last_net_reg = float(archive['last_net_reg']) + + self.net = self._make_net(self.last_net_reg) + self.net.load(dict(archive['net'])) + + def save(self): + ''' + Exports the net to an archive dictionary. + ''' + self.log.debug('Exporting neural network') + return {'last_hyperfit': self.last_hyperfit, + 'last_net_reg': self.last_net_reg, + 'net': self.net.save(), + } def fit_neural_net(self, all_params, all_costs): ''' @@ -255,6 +298,7 @@ def fit_neural_net(self, all_params, all_costs): if this_cv_loss < best_cv_loss and this_cv_loss < 0.1 * orig_cv_loss: best_cv_loss = this_cv_loss self.log.debug("Switching to reg=" + str(r) + ", cv loss=" + str(best_cv_loss)) + self.last_net_reg = r self.net = net # TODO: Fit depth diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 497385d..a339f63 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -602,7 +602,7 @@ def __init__(self, filename, file_type = 'pkl', **kwargs): self.trust_region = np.squeeze(np.array(self.training_dict['trust_region'], dtype=float)) self.create_neural_net() - self.fit_neural_net() + self.import_neural_net() if np.all(np.isfinite(self.min_boundary)) and np.all(np.isfinite(self.min_boundary)): self.finite_flag = True From a0f546b2b0e4c66f85fac534de5faf1d5e5cef88 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 3 May 2017 12:15:48 +1000 Subject: [PATCH 016/119] Fix network saving/loading Problem before was that I was using the same TF graph for everything, so when making extra network it was adding the nodes to the graph rather than using a new graph, and this confused the restoration (since the checkpoint files only expected one net in the graph, but we had two). Fixed this by associating a new tf.Graph with each net. Also closed the TF session when we stop using a net. This is probably good for efficiency. --- mloop/learners.py | 11 +++-- mloop/nnlearner.py | 127 +++++++++++++++++++++++++++++------------------- mloop/visualizations.py | 2 +- 3 files changed, 85 insertions(+), 55 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index f0d8008..09764b4 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -19,6 +19,7 @@ import sklearn.gaussian_process.kernels as skk import sklearn.preprocessing as skp import multiprocessing as mp +import mloop.nnlearner as mlnn learner_thread_count = 0 default_learner_archive_filename = 'learner_archive' @@ -1676,19 +1677,23 @@ def __init__(self, #Remove logger so gaussian process can be safely picked for multiprocessing on Windows self.log = None + def _construct_net(self): + self.neural_net_impl = mlnn.NeuralNetImpl(self.num_params) + def create_neural_net(self): ''' Creates the neural net. Must be called from the same process as fit_neural_net, predict_cost and predict_costs_from_param_array. ''' - import mloop.nnlearner as mlnn - self.neural_net_impl = mlnn.NeuralNetImpl(self.num_params) + self._construct_net() + self.neural_net_impl.init() def import_neural_net(self): ''' - Imports neural net parameters from the training dictionary provided at construction. + Imports neural net parameters from the training dictionary provided at construction. Must be called from the same process as fit_neural_net, predict_cost and predict_costs_from_param_array. You must call exactly one of this and create_neural_net before calling other methods. ''' if not self.training_dict: raise ValueError + self._construct_net() self.neural_net_impl.load(self.training_dict['net']) def fit_neural_net(self): diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index cbabf3f..e904a0a 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -28,7 +28,8 @@ def __init__(self, keep_prob, regularisation_coefficient): self.log = logging.getLogger(__name__) - self.tf_session = tf.InteractiveSession() + self.graph = tf.Graph() + self.tf_session = tf.Session(graph=self.graph) if not len(layer_dims) == len(layer_activations): self.log.error('len(layer_dims) != len(layer_activations)') @@ -43,61 +44,75 @@ def __init__(self, self.keep_prob = keep_prob self.regularisation_coefficient = regularisation_coefficient - # Inputs - self.input_placeholder = tf.placeholder(tf.float32, shape=[None, self.num_params]) - self.output_placeholder = tf.placeholder(tf.float32, shape=[None, 1]) - self.keep_prob_placeholder = tf.placeholder_with_default(1., shape=[]) - self.regularisation_coefficient_placeholder = tf.placeholder_with_default(0., shape=[]) - - # Parameters - self.weights = [] - self.biases = [] - - # Input + internal nodes - # TODO: Use length scale for setting initial weights? - prev_layer_dim = self.num_params - prev_h = self.input_placeholder - for (dim, act) in zip(layer_dims, layer_activations): - self.weights.append(tf.Variable(tf.random_normal([prev_layer_dim, dim], stddev=0.1))) - self.biases.append(tf.Variable(tf.random_normal([dim]))) - prev_layer_dim = dim - prev_h = tf.nn.dropout( - act(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), - keep_prob=self.keep_prob_placeholder) - - # Output node - self.weights.append(tf.Variable(tf.random_normal([prev_layer_dim, 1]))) - self.biases.append(tf.Variable(tf.random_normal([1]))) - self.output_var = tf.matmul(prev_h, self.weights[-1]) + self.biases[-1] - - # Loss function and training - self.loss_func = ( - tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - self.output_placeholder), - reduction_indices=[1])) - + self.regularisation_coefficient_placeholder - * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) - self.train_step = tf.train.AdamOptimizer(1.0).minimize(self.loss_func) - - # Gradient - self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) - - # Saver for saving and restoring params - self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) - - self.tf_session.run(tf.initialize_all_variables()) + with self.graph.as_default(): + # Inputs + self.input_placeholder = tf.placeholder(tf.float32, shape=[None, self.num_params]) + self.output_placeholder = tf.placeholder(tf.float32, shape=[None, 1]) + self.keep_prob_placeholder = tf.placeholder_with_default(1., shape=[]) + self.regularisation_coefficient_placeholder = tf.placeholder_with_default(0., shape=[]) + + # Parameters + self.weights = [] + self.biases = [] + + # Input + internal nodes + # TODO: Use length scale for setting initial weights? + prev_layer_dim = self.num_params + prev_h = self.input_placeholder + for (i, (dim, act)) in enumerate(zip(layer_dims, layer_activations)): + self.weights.append(tf.Variable(tf.random_normal([prev_layer_dim, dim], stddev=0.1), name="weight_"+str(i))) + self.biases.append(tf.Variable(tf.random_normal([dim]), name="bias_"+str(i))) + prev_layer_dim = dim + prev_h = tf.nn.dropout( + act(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), + keep_prob=self.keep_prob_placeholder) + + # Output node + self.weights.append(tf.Variable(tf.random_normal([prev_layer_dim, 1]), name="weight_out")) + self.biases.append(tf.Variable(tf.random_normal([1]), name="bias_out")) + self.output_var = tf.matmul(prev_h, self.weights[-1]) + self.biases[-1] + + # Loss function and training + self.loss_func = ( + tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - self.output_placeholder), + reduction_indices=[1])) + + self.regularisation_coefficient_placeholder + * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) + self.train_step = tf.train.AdamOptimizer(1.0).minimize(self.loss_func) + + # Gradient + self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) + + # Initialiser for ... initialising + self.initialiser = tf.initialize_all_variables() + + # Saver for saving and restoring params + self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) + + def destroy(self): + self.tf_session.close() + + def init(self): + ''' + Initializes the net. + ''' + self.tf_session.run(self.initialiser) def load(self, archive): ''' - Imports the net from an archive dictionary. + Imports the net from an archive dictionary. You must call exactly one of this and init() before calling any other methods. ''' - self.saver.restore(self.tf_session, str(archive['saver_path'])) + self.log.debug("Loading neural network") + self.saver.restore(self.tf_session, "./" + str(archive['saver_path'])) def save(self): ''' Exports the net to an archive dictionary. ''' - # TODO: Use a proper timestamped filename. - return {'saver_path': self.saver.save(self.tf_session, 'net.ckpt')} + # TODO: Use a proper timestamped filename, maybe? + path = self.saver.save(self.tf_session, "net.ckpt") + self.log.debug("Saving neural network to: " + path) + return {'saver_path': path} def fit(self, params, costs): ''' @@ -210,7 +225,7 @@ def __init__(self, def _make_net(self, reg): ''' - Helper method to create a new net with a specified regularisation coefficient. + Helper method to create a new net with a specified regularisation coefficient. The net is not initialised, so you must call init() or load() on it before any other method. Args: reg (float): Regularisation coefficient. @@ -228,14 +243,21 @@ def amazing_abs(_x): 1., # keep_prob reg) + def init(self): + ''' + Initializes the net. + ''' + self.net.init() + def load(self, archive): ''' - Imports the net from an archive dictionary. + Imports the net from an archive dictionary. You must call exactly one of this and init() before calling any other methods. ''' - self.log.debug('Importing neural network') self.last_hyperfit = int(archive['last_hyperfit']) self.last_net_reg = float(archive['last_net_reg']) + # Destroy the old net, and replace it with the new loaded one. + self.net.destroy() self.net = self._make_net(self.last_net_reg) self.net.load(dict(archive['net'])) @@ -243,7 +265,6 @@ def save(self): ''' Exports the net to an archive dictionary. ''' - self.log.debug('Exporting neural network') return {'last_hyperfit': self.last_hyperfit, 'last_net_reg': self.last_net_reg, 'net': self.net.save(), @@ -293,13 +314,17 @@ def fit_neural_net(self, all_params, all_costs): # does significantly better on the cross validation set than the old one. for r in [0.001, 0.01, 0.1, 1, 10]: net = self._make_net(r) + net.init() net.fit(train_params, train_costs) this_cv_loss = net.cross_validation_loss(cv_params, cv_costs) if this_cv_loss < best_cv_loss and this_cv_loss < 0.1 * orig_cv_loss: best_cv_loss = this_cv_loss self.log.debug("Switching to reg=" + str(r) + ", cv loss=" + str(best_cv_loss)) self.last_net_reg = r + self.net.destroy() self.net = net + else: + net.destroy() # TODO: Fit depth diff --git a/mloop/visualizations.py b/mloop/visualizations.py index a339f63..ac6b36c 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -601,8 +601,8 @@ def __init__(self, filename, file_type = 'pkl', **kwargs): self.has_trust_region = bool(np.array(self.training_dict['has_trust_region'])) self.trust_region = np.squeeze(np.array(self.training_dict['trust_region'], dtype=float)) - self.create_neural_net() self.import_neural_net() + self.fit_neural_net() if np.all(np.isfinite(self.min_boundary)) and np.all(np.isfinite(self.min_boundary)): self.finite_flag = True From 55ce02126a891c97fabb7ca73b12a8667cd79522 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 3 May 2017 12:18:16 +1000 Subject: [PATCH 017/119] Add comment about privateness to SingleNeuralNet --- mloop/nnlearner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index e904a0a..e47378a 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -9,6 +9,8 @@ class SingleNeuralNet(): This must run in the same process in which it's created. + This class should be considered private to this module. + Args: num_params: The number of params. layer_dims: The number of nodes in each layer. From fabc00ebdf70c62018eba05de808c8727b5bc6b3 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 3 May 2017 12:18:37 +1000 Subject: [PATCH 018/119] Use default Adam learning rate This seems to improve performance with deep nets a LOT (I think before we were overshooting the minima). --- mloop/nnlearner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index e47378a..2a83af7 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -80,7 +80,8 @@ def __init__(self, reduction_indices=[1])) + self.regularisation_coefficient_placeholder * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) - self.train_step = tf.train.AdamOptimizer(1.0).minimize(self.loss_func) + # TODO: Set learning rate based on length scale? + self.train_step = tf.train.AdamOptimizer().minimize(self.loss_func) # Gradient self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) From 6e5cf676b113af8d70e34224f23bc8eace618856 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 3 May 2017 12:19:17 +1000 Subject: [PATCH 019/119] New training procedure To make sure we're not underfitting, we keep training until the improvement is small. --- mloop/nnlearner.py | 71 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 2a83af7..ef5293d 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -117,6 +117,23 @@ def save(self): self.log.debug("Saving neural network to: " + path) return {'saver_path': path} + def _loss(self, params, costs): + ''' + Returns the loss and unregularised loss for the given params and costs. + ''' + return (self.tf_session.run( + self.loss_func, + feed_dict={self.input_placeholder: params, + self.output_placeholder: [[c] for c in costs], + self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + }), + self.tf_session.run( + self.loss_func, + feed_dict={self.input_placeholder: params, + self.output_placeholder: [[c] for c in costs], + self.regularisation_coefficient_placeholder: 0, + })) + def fit(self, params, costs): ''' Fit the neural net to the provided data @@ -133,34 +150,32 @@ def fit(self, params, costs): self.log.error("Params and costs must have the same length") raise ValueError - for i in range(self.train_epochs): - # Split the data into random batches, and train on each batch - indices = np.random.permutation(len(params)) - for j in range(math.ceil(len(params) / self.batch_size)): - batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] - batch_input = [params[index] for index in batch_indices] - batch_output = [[costs[index]] for index in batch_indices] - self.tf_session.run(self.train_step, - feed_dict={self.input_placeholder: batch_input, - self.output_placeholder: batch_output, - self.regularisation_coefficient_placeholder: self.regularisation_coefficient, - self.keep_prob_placeholder: self.keep_prob, - }) - - self.log.debug('Fit neural network with total training cost ' - + str(self.tf_session.run( - self.loss_func, - feed_dict={self.input_placeholder: params, - self.output_placeholder: [[c] for c in costs], - self.regularisation_coefficient_placeholder: self.regularisation_coefficient, - })) - + ', with unregularized cost ' - + str(self.tf_session.run( - self.loss_func, - feed_dict={self.input_placeholder: params, - self.output_placeholder: [[c] for c in costs], - self.regularisation_coefficient_placeholder: 0, - }))) + # The general training procedure is as follows: + # - set a threshold of 80% of the current loss + # - train for train_epochs epochs + # - if the new loss is greater than the threshold then we haven't improved much, so stop + # - else start from the top + while True: + threshold = 0.8 * self._loss(params, costs)[0] + for i in range(self.train_epochs): + # Split the data into random batches, and train on each batch + indices = np.random.permutation(len(params)) + for j in range(math.ceil(len(params) / self.batch_size)): + batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] + batch_input = [params[index] for index in batch_indices] + batch_output = [[costs[index]] for index in batch_indices] + self.tf_session.run(self.train_step, + feed_dict={self.input_placeholder: batch_input, + self.output_placeholder: batch_output, + self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + self.keep_prob_placeholder: self.keep_prob, + }) + (l, ul) = self._loss(params, costs) + self.log.debug('Fit neural network with total training cost ' + str(l) + + ', with unregularized cost ' + str(ul)) + if l > threshold: + break + self.log.debug('Cost decreased by a lot, train again') def cross_validation_loss(self, params, costs): ''' From ec20782665c68e52c865a1e1f99a82534000fbbe Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 3 May 2017 14:40:24 +1000 Subject: [PATCH 020/119] Don't try to train past 0 loss --- mloop/nnlearner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index ef5293d..5b17793 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -157,6 +157,8 @@ def fit(self, params, costs): # - else start from the top while True: threshold = 0.8 * self._loss(params, costs)[0] + if threshold == 0: + break for i in range(self.train_epochs): # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) From 21ae741cec4361e6020d416e3d3f19a2cebee60e Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 3 May 2017 17:02:50 +1000 Subject: [PATCH 021/119] Remove artificial ylim in cost vs run plot --- mloop/visualizations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index ac6b36c..6f8658e 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -181,7 +181,6 @@ def plot_cost_vs_run(self): plt.scatter(self.in_numbers,self.in_costs,marker='o',c=self.cost_colors,s=5*mpl.rcParams['lines.markersize']) plt.xlabel(run_label) plt.ylabel(cost_label) - plt.ylim((0,100)) plt.title('Controller: Cost vs run number.') artists = [] for ut in self.unique_types: From ca5aa517732564a6b6db48fd2e0da1280d744267 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 3 May 2017 17:03:14 +1000 Subject: [PATCH 022/119] Move training threshold down to 10% --- mloop/nnlearner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 5b17793..69be86c 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -151,12 +151,12 @@ def fit(self, params, costs): raise ValueError # The general training procedure is as follows: - # - set a threshold of 80% of the current loss + # - set a threshold of 10% of the current loss # - train for train_epochs epochs # - if the new loss is greater than the threshold then we haven't improved much, so stop # - else start from the top while True: - threshold = 0.8 * self._loss(params, costs)[0] + threshold = 0.1 * self._loss(params, costs)[0] if threshold == 0: break for i in range(self.train_epochs): From 5c927eca15904e13814f19d31dee3a9ce9e7fcab Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 4 May 2017 17:44:26 +1000 Subject: [PATCH 023/119] Try some uncertainty Rather than returning the actual expected value we return an LCB-style value. We get this from feeding through the network with dropout a bunch of times, and taking the 20th percentile. I think if we're going to do this then it makes sense to train with dropout. --- mloop/nnlearner.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 69be86c..19c4d55 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -156,9 +156,10 @@ def fit(self, params, costs): # - if the new loss is greater than the threshold then we haven't improved much, so stop # - else start from the top while True: - threshold = 0.1 * self._loss(params, costs)[0] + threshold = 0.9 * self._loss(params, costs)[0] if threshold == 0: break + tot = 0 for i in range(self.train_epochs): # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) @@ -172,10 +173,17 @@ def fit(self, params, costs): self.regularisation_coefficient_placeholder: self.regularisation_coefficient, self.keep_prob_placeholder: self.keep_prob, }) + tot += self._loss(params, costs)[0] + if i % 10 == 0: + (l, ul) = self._loss(params, costs) + self.log.debug('Fit neural network with total training cost ' + str(l) + + ', with unregularized cost ' + str(ul)) + (l, ul) = self._loss(params, costs) + al = tot / float(self.train_epochs) self.log.debug('Fit neural network with total training cost ' + str(l) - + ', with unregularized cost ' + str(ul)) - if l > threshold: + + ', with unregularized cost ' + str(ul) + "avg: " + str(al)) + if al > threshold: break self.log.debug('Cost decreased by a lot, train again') @@ -199,7 +207,13 @@ def predict_cost(self,params): Returns: float : Predicted cost at parameters ''' - return self.tf_session.run(self.output_var, feed_dict={self.input_placeholder: [params]})[0][0] + runs = 100 + # Do some runs with dropout, and return the smallest. This is kind of LCB. + results = [y[0] for y in self.tf_session.run(self.output_var, feed_dict={ + self.input_placeholder: [params] * runs, + self.keep_prob_placeholder: 0.99})] + results.sort() + return results[int(runs * 0.2)] def predict_cost_gradient(self,params): ''' @@ -256,11 +270,10 @@ def amazing_abs(_x): return tf.maximum(1 - tf.abs(_x), 0) return SingleNeuralNet( self.num_params, - [32, 32, 32, 32],#, 32], # layer_dims - [tf.abs, tf.nn.relu, tf.abs, tf.nn.relu], # layer_activations - 1000, # train_epochs + [64]*5, [tf.nn.relu]*5, + 100, # train_epochs 64, # batch_size - 1., # keep_prob + 0.99, # keep_prob reg) def init(self): From f05863707e82f81b61712c0c38decd9b07636322 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 4 May 2017 17:46:25 +1000 Subject: [PATCH 024/119] Save cost vs run to a file --- mloop/visualizations.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 6f8658e..aec84f3 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -186,6 +186,11 @@ def plot_cost_vs_run(self): for ut in self.unique_types: artists.append(plt.Line2D((0,1),(0,0), color=_color_from_controller_name(ut), marker='o', linestyle='')) plt.legend(artists,self.unique_types,loc=legend_loc) + f = open("output.txt", "w") + for (x, y) in zip(self.in_numbers, self.in_costs): + f.write(str(x) + " " + str(y) + "\n") + f.close() + def plot_parameters_vs_run(self): ''' From 7bc3245ed6020fea1f5267ebd7c376a8307db6fd Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 9 May 2017 15:53:25 +1000 Subject: [PATCH 025/119] Code cleanup in nnlearner --- mloop/nnlearner.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 19c4d55..e315d0d 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -61,17 +61,26 @@ def __init__(self, # TODO: Use length scale for setting initial weights? prev_layer_dim = self.num_params prev_h = self.input_placeholder + stddev=0.1 for (i, (dim, act)) in enumerate(zip(layer_dims, layer_activations)): - self.weights.append(tf.Variable(tf.random_normal([prev_layer_dim, dim], stddev=0.1), name="weight_"+str(i))) - self.biases.append(tf.Variable(tf.random_normal([dim]), name="bias_"+str(i))) + self.weights.append(tf.Variable( + tf.random_normal([prev_layer_dim, dim], stddev=stddev), + name="weight_"+str(i))) + self.biases.append(tf.Variable( + tf.random_normal([dim], stddev=stddev), + name="bias_"+str(i))) prev_layer_dim = dim prev_h = tf.nn.dropout( act(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), keep_prob=self.keep_prob_placeholder) # Output node - self.weights.append(tf.Variable(tf.random_normal([prev_layer_dim, 1]), name="weight_out")) - self.biases.append(tf.Variable(tf.random_normal([1]), name="bias_out")) + self.weights.append(tf.Variable( + tf.random_normal([prev_layer_dim, 1], stddev=stddev), + name="weight_out")) + self.biases.append(tf.Variable( + tf.random_normal([1], stddev=stddev), + name="bias_out")) self.output_var = tf.matmul(prev_h, self.weights[-1]) + self.biases[-1] # Loss function and training @@ -151,12 +160,13 @@ def fit(self, params, costs): raise ValueError # The general training procedure is as follows: - # - set a threshold of 10% of the current loss + # - set a threshold based on the current loss # - train for train_epochs epochs # - if the new loss is greater than the threshold then we haven't improved much, so stop # - else start from the top while True: threshold = 0.9 * self._loss(params, costs)[0] + self.log.debug("Training with threshold " + str(threshold)) if threshold == 0: break tot = 0 @@ -173,19 +183,18 @@ def fit(self, params, costs): self.regularisation_coefficient_placeholder: self.regularisation_coefficient, self.keep_prob_placeholder: self.keep_prob, }) - tot += self._loss(params, costs)[0] + (l, ul) = self._loss(params, costs) + self.losses_list.append(l) + tot += l if i % 10 == 0: - (l, ul) = self._loss(params, costs) self.log.debug('Fit neural network with total training cost ' + str(l) + ', with unregularized cost ' + str(ul)) (l, ul) = self._loss(params, costs) al = tot / float(self.train_epochs) - self.log.debug('Fit neural network with total training cost ' + str(l) - + ', with unregularized cost ' + str(ul) + "avg: " + str(al)) - if al > threshold: + self.log.debug('Loss ' + str(l) + ', average loss ' + str(al)) + if l > threshold: break - self.log.debug('Cost decreased by a lot, train again') def cross_validation_loss(self, params, costs): ''' From 34606698cc8e07f315c892f48e22ed1797335792 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 9 May 2017 15:54:29 +1000 Subject: [PATCH 026/119] Disable the LCB stuff for now --- mloop/nnlearner.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index e315d0d..c343a39 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -216,13 +216,14 @@ def predict_cost(self,params): Returns: float : Predicted cost at parameters ''' - runs = 100 - # Do some runs with dropout, and return the smallest. This is kind of LCB. - results = [y[0] for y in self.tf_session.run(self.output_var, feed_dict={ - self.input_placeholder: [params] * runs, - self.keep_prob_placeholder: 0.99})] - results.sort() - return results[int(runs * 0.2)] + return self.tf_session.run(self.output_var, feed_dict={self.input_placeholder: [params]})[0][0] + #runs = 100 + ## Do some runs with dropout, and return the smallest. This is kind of LCB. + #results = [y[0] for y in self.tf_session.run(self.output_var, feed_dict={ + # self.input_placeholder: [params] * runs, + # self.keep_prob_placeholder: 0.99})] + #results.sort() + #return results[int(runs * 0.2)] def predict_cost_gradient(self,params): ''' From 32caaedd72c27b8a6369677dd58eff167d3b2a35 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 9 May 2017 15:56:56 +1000 Subject: [PATCH 027/119] Keep track of training losses --- mloop/learners.py | 8 +++----- mloop/nnlearner.py | 26 +++++++++++++++++++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 09764b4..d4b4539 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -2038,9 +2038,7 @@ def find_local_minima(self): self.has_local_minima = True self.log.info('Search completed') + # Methods for debugging/analysis. - - - - - + def get_losses(self): + return self.neural_net_impl.get_losses() diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index c343a39..afd124f 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -19,6 +19,7 @@ class SingleNeuralNet(): batch_size: The training batch size. keep_prob: The dropoout keep probability. regularisation_coefficient: The regularisation coefficient. + losses_list: A list to which this object will append training losses. ''' def __init__(self, @@ -28,7 +29,8 @@ def __init__(self, train_epochs, batch_size, keep_prob, - regularisation_coefficient): + regularisation_coefficient, + losses_list): self.log = logging.getLogger(__name__) self.graph = tf.Graph() self.tf_session = tf.Session(graph=self.graph) @@ -37,15 +39,15 @@ def __init__(self, self.log.error('len(layer_dims) != len(layer_activations)') raise ValueError - # All member variables of this class are constants. The only things that change are the TF - # variables. - + # Hyperparameters for the net. These are all constant. self.num_params = num_params self.train_epochs = train_epochs self.batch_size = batch_size self.keep_prob = keep_prob self.regularisation_coefficient = regularisation_coefficient + self.losses_list = losses_list + with self.graph.as_default(): # Inputs self.input_placeholder = tf.placeholder(tf.float32, shape=[None, self.num_params]) @@ -265,6 +267,8 @@ def __init__(self, self.last_hyperfit = 0 self.last_net_reg = 0.01 + self.losses_list = [] + self.net = self._make_net(0.01) def _make_net(self, reg): @@ -284,7 +288,8 @@ def amazing_abs(_x): 100, # train_epochs 64, # batch_size 0.99, # keep_prob - reg) + reg, + self.losses_list) def init(self): ''' @@ -299,6 +304,8 @@ def load(self, archive): self.last_hyperfit = int(archive['last_hyperfit']) self.last_net_reg = float(archive['last_net_reg']) + self.losses_list = list(archive['losses_list']) + # Destroy the old net, and replace it with the new loaded one. self.net.destroy() self.net = self._make_net(self.last_net_reg) @@ -310,6 +317,7 @@ def save(self): ''' return {'last_hyperfit': self.last_hyperfit, 'last_net_reg': self.last_net_reg, + 'losses_list': self.losses_list, 'net': self.net.save(), } @@ -390,3 +398,11 @@ def predict_cost_gradient(self,params): float : Predicted gradient at parameters ''' return self.net.predict_cost_gradient(params) + + # Methods for debugging/analysis. + + def get_losses(self): + ''' + Returns a list of training losses experienced by the network. + ''' + return self.losses_list From 2893679beac5debfad4cd9945055641fd4a70843 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 9 May 2017 15:57:11 +1000 Subject: [PATCH 028/119] Tweak some NN params --- mloop/nnlearner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index afd124f..104715e 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -265,11 +265,11 @@ def __init__(self, # Tracking variables. These need to be set when importing and saved when exporting. self.last_hyperfit = 0 - self.last_net_reg = 0.01 + self.last_net_reg = 0.001 self.losses_list = [] - self.net = self._make_net(0.01) + self.net = self._make_net(0.001) def _make_net(self, reg): ''' @@ -284,10 +284,10 @@ def amazing_abs(_x): return tf.maximum(1 - tf.abs(_x), 0) return SingleNeuralNet( self.num_params, - [64]*5, [tf.nn.relu]*5, + [64]*5, [gelu_fast]*5, 100, # train_epochs 64, # batch_size - 0.99, # keep_prob + 0.8, # keep_prob reg, self.losses_list) From a452cf1188de9cbac0de3981bf86e8a025fe716d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 9 May 2017 15:57:36 +1000 Subject: [PATCH 029/119] Scale costs in the NN We fit the scaler to the costs after the initial training runs, and never change it. --- mloop/learners.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index d4b4539..2409519 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1556,7 +1556,11 @@ def __init__(self, #Configuration of the fake neural net learner self.length_scale = mlu.safe_squeeze(self.training_dict['length_scale']) self.noise_level = float(self.training_dict['noise_level']) - + + self.cost_scaler = skp.StandardScaler() + self.cost_scaler_init_index = self.training_dict['cost_scaler_init_index'] + if not self.cost_scaler_init_index is None: + self.cost_scaler.fit(self.all_costs[:self.cost_scaler_init_index,np.newaxis]) try: self.predicted_best_parameters = mlu.safe_squeeze(self.training_dict['predicted_best_parameters']) @@ -1609,6 +1613,9 @@ def __init__(self, self.has_local_minima = False self.has_global_minima = False + + self.cost_scaler = skp.StandardScaler() + self.cost_scaler_init_index = None #Multiprocessor controls self.new_params_event = mp.Event() @@ -1656,12 +1663,6 @@ def __init__(self, self.cost_has_noise = True self.noise_level = 1 - # Set up the scaler to do nothing. - # TODO: Figure out how to use scaling for the NN (it's a bit difficult because we don't - # completely re-train each time, and don't want the scaling changing without doing a complete - # re-train). - self.cost_scaler = skp.StandardScaler(with_mean=False, with_std=False) - self.archive_dict.update({'archive_type':'neural_net_learner', 'bad_run_indexs':self.bad_run_indexs, 'generation_num':self.generation_num, @@ -1698,12 +1699,11 @@ def import_neural_net(self): def fit_neural_net(self): ''' - Determine the appropriate number of layers for the NN given the data. - - Fit the Neural Net with the appropriate topology to the data + Fits a neural net to the data. + cost_scaler must have been fitted before calling this method. ''' - self.scaled_costs = self.cost_scaler.fit_transform(self.all_costs[:,np.newaxis])[:,0] + self.scaled_costs = self.cost_scaler.transform(self.all_costs[:,np.newaxis])[:,0] self.neural_net_impl.fit_neural_net(self.all_params, self.scaled_costs) @@ -1879,7 +1879,8 @@ def update_archive(self): 'params_count':self.params_count, 'update_hyperparameters':self.update_hyperparameters, 'length_scale':self.length_scale, - 'noise_level':self.noise_level}) + 'noise_level':self.noise_level, + 'cost_scaler_init_index':self.cost_scaler_init_index}) if self.neural_net_impl: self.archive_dict.update({'net':self.neural_net_impl.save()}) @@ -1933,6 +1934,9 @@ def run(self): self.wait_for_new_params_event() self.log.debug('NN learner reading costs') self.get_params_and_costs() + if self.cost_scaler_init_index is None: + self.cost_scaler_init_index = len(self.all_costs) + self.cost_scaler.fit(self.all_costs[:,np.newaxis]) self.fit_neural_net() for _ in range(self.generation_num): self.log.debug('Neural network learner generating parameter:'+ str(self.params_count+1)) @@ -1991,7 +1995,7 @@ def find_global_minima(self): self.predicted_best_parameters = curr_best_params self.predicted_best_scaled_cost = curr_best_cost - self.predicted_best_cost = float(self.cost_scaler.inverse_transform(self.predicted_best_scaled_cost)) + self.predicted_best_cost = float(self.cost_scaler.inverse_transform([self.predicted_best_scaled_cost])) self.archive_dict.update({'predicted_best_parameters':self.predicted_best_parameters, 'predicted_best_scaled_cost':self.predicted_best_scaled_cost, 'predicted_best_cost':self.predicted_best_cost}) From fcbaba425084a41081ab398baa064e6824b3784d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 9 May 2017 15:58:18 +1000 Subject: [PATCH 030/119] Remove unnecessary NN fit in visualizations Now that we're doing cost scaling properly this is no longer necessary. --- mloop/visualizations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index aec84f3..6bab3a9 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -606,7 +606,6 @@ def __init__(self, filename, file_type = 'pkl', **kwargs): self.trust_region = np.squeeze(np.array(self.training_dict['trust_region'], dtype=float)) self.import_neural_net() - self.fit_neural_net() if np.all(np.isfinite(self.min_boundary)) and np.all(np.isfinite(self.min_boundary)): self.finite_flag = True From ce15c4ae7215d1f79382f7a747f6f7c484abe6e4 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 9 May 2017 16:00:55 +1000 Subject: [PATCH 031/119] Plot training losses --- mloop/visualizations.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 6bab3a9..32abbbf 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -578,6 +578,7 @@ def create_neural_net_learner_visualizations(filename, if plot_cross_sections: visualization.plot_cross_sections() visualization.plot_surface() + visualization.plot_losses() class NeuralNetVisualizer(mll.NeuralNetLearner): @@ -716,3 +717,17 @@ def plot_surface(self): ax.set_zlabel('cost') ax.scatter(self.all_params[:,0], self.all_params[:,1], self.all_costs, c='r') + + def plot_losses(self): + ''' + Produce a figure of the loss as a function of training run. + ''' + global figure_counter + figure_counter += 1 + fig = plt.figure(figure_counter) + + losses = self.get_losses() + plt.scatter(range(len(losses)), losses) + plt.xlabel("Run") + plt.ylabel("Training cost") + plt.title('Loss vs training run.') From 67bb797bab37f730fcf0e3154ffe23ffc589fd59 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 10 May 2017 09:14:38 +1000 Subject: [PATCH 032/119] Add test to repo --- local_tests/.gitignore | 6 +++ local_tests/MLOOPQuickTest.py | 123 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 local_tests/.gitignore create mode 100644 local_tests/MLOOPQuickTest.py diff --git a/local_tests/.gitignore b/local_tests/.gitignore new file mode 100644 index 0000000..76f97fa --- /dev/null +++ b/local_tests/.gitignore @@ -0,0 +1,6 @@ +M-LOOP_archives/ +M-LOOP_logs/ +output/ +results/ +checkpoint +net* diff --git a/local_tests/MLOOPQuickTest.py b/local_tests/MLOOPQuickTest.py new file mode 100644 index 0000000..660f654 --- /dev/null +++ b/local_tests/MLOOPQuickTest.py @@ -0,0 +1,123 @@ +''' +Created on 15Jun.,2016 + +@author: michaelhush +''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +import mloop.controllers as mlc +import mloop.interfaces as mli +import mloop.testing as mlt +import mloop.visualizations as mlv +import mloop.utilities as mlu +import numpy as np +import logging +import matplotlib.pyplot as plt + +def main(): + + mlu.config_logger(file_log_level=logging.DEBUG, + console_log_level=logging.DEBUG) + + log = logging.getLogger('mloop.main') + + log.info("Making queues") + + tnum_params = 10 + + tmin_boundary=[-10.] * tnum_params + tmax_boundary=[ 10.] * tnum_params + + tmax_num_runs = 40 + tcost = -1. + + log.info("Making landscape") + + tlandscape = mlt.TestLandscape(num_params = tnum_params) + + tlandscape.set_random_quadratic_landscape(np.array(tmin_boundary),np.array(tmax_boundary),random_scale=False) + + which = 4 + if which == 0: + log.info("Running random controller") + interface = mli.TestInterface(test_landscape=tlandscape) + random_controller = mlc.create_controller(interface, + controller_type='random', + max_num_runs= tmax_num_runs, + num_params=tnum_params, + min_boundary=tmin_boundary, + max_boundary=tmax_boundary, + trust_region=0.1) + random_controller.optimize() + mlv.show_all_default_visualizations(random_controller, show_plots=False) + log.info("Random controller ended") + elif which == 1: + log.info("Running Nelder-Mead controller") + interface = mli.TestInterface(test_landscape=tlandscape) + nelder_mead_controller = mlc.create_controller(interface, + controller_type='nelder_mead', + max_num_runs= tmax_num_runs, + num_params=tnum_params, + min_boundary=tmin_boundary, + max_boundary=tmax_boundary) + nelder_mead_controller.optimize() + mlv.show_all_default_visualizations(nelder_mead_controller, show_plots=False) + log.info("Running Nelder-Mead controller") + elif which == 2: + log.info("Running differential evolution controller") + interface = mli.TestInterface(test_landscape=tlandscape) + diff_evo_controller = mlc.create_controller(interface, + controller_type='differential_evolution', + evolution_strategy='rand2', + max_num_runs= tmax_num_runs, + num_params=tnum_params, + min_boundary=tmin_boundary, + max_boundary=tmax_boundary) + diff_evo_controller.optimize() + mlv.show_all_default_visualizations(diff_evo_controller, show_plots=False) + log.info("Running differential evolution controller") + elif which == 3: + log.info("Running Gaussian process controller") + interface = mli.TestInterface(test_landscape=tlandscape) + gp_controller = mlc.create_controller(interface, + controller_type='gaussian_process', + no_delay=False, + max_num_runs= tmax_num_runs, + target_cost = tcost, + num_params=tnum_params, + min_boundary=tmin_boundary, + max_boundary=tmax_boundary) + #length_scale = 1.) + gp_controller.optimize() + mlv.show_all_default_visualizations(gp_controller, show_plots=False) + log.info("Gaussian process controller ended") + elif which == 4: + log.info("Running Neural net controller") + interface = mli.TestInterface(test_landscape=tlandscape) + nn_controller = mlc.create_controller(interface, + controller_type='neural_net', + no_delay=False, + max_num_runs= tmax_num_runs, + target_cost = tcost, + num_params=tnum_params, + min_boundary=tmin_boundary, + max_boundary=tmax_boundary) + nn_controller.optimize() + mlv.show_all_default_visualizations(nn_controller, show_plots=False) + log.info("Neural net process controller ended") + else: + raise ValueError + + log.info("True minimum:" + str(tlandscape.expected_minima)) + log.info("True minimum value:" + str(tlandscape.cost_function(p=tlandscape.expected_minima))) + + log.info("Visualizations started.") + + plt.show() + + log.info("MLOOP Quick Test ended") + + +if __name__ == '__main__': + main() From 4d94db783a869174a99cd21a2d2961867025983a Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 10 May 2017 12:43:43 +1000 Subject: [PATCH 033/119] Tidy up NNL cost scaler initialisation --- mloop/learners.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 2409519..0256406 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1557,10 +1557,9 @@ def __init__(self, self.length_scale = mlu.safe_squeeze(self.training_dict['length_scale']) self.noise_level = float(self.training_dict['noise_level']) - self.cost_scaler = skp.StandardScaler() self.cost_scaler_init_index = self.training_dict['cost_scaler_init_index'] if not self.cost_scaler_init_index is None: - self.cost_scaler.fit(self.all_costs[:self.cost_scaler_init_index,np.newaxis]) + self._init_cost_scaler() try: self.predicted_best_parameters = mlu.safe_squeeze(self.training_dict['predicted_best_parameters']) @@ -1614,7 +1613,8 @@ def __init__(self, self.has_local_minima = False self.has_global_minima = False - self.cost_scaler = skp.StandardScaler() + # The scaler will be initialised when we're ready to fit it + self.cost_scaler = None self.cost_scaler_init_index = None #Multiprocessor controls @@ -1681,6 +1681,13 @@ def __init__(self, def _construct_net(self): self.neural_net_impl = mlnn.NeuralNetImpl(self.num_params) + def _init_cost_scaler(self): + ''' + Initialises the cost scaler. cost_scaler_init_index must be set. + ''' + self.cost_scaler = skp.StandardScaler(with_mean=False, with_std=False) + self.cost_scaler.fit(self.all_costs[:self.cost_scaler_init_index,np.newaxis]) + def create_neural_net(self): ''' Creates the neural net. Must be called from the same process as fit_neural_net, predict_cost and predict_costs_from_param_array. @@ -1936,7 +1943,7 @@ def run(self): self.get_params_and_costs() if self.cost_scaler_init_index is None: self.cost_scaler_init_index = len(self.all_costs) - self.cost_scaler.fit(self.all_costs[:,np.newaxis]) + self._init_cost_scaler() self.fit_neural_net() for _ in range(self.generation_num): self.log.debug('Neural network learner generating parameter:'+ str(self.params_count+1)) From fddf85f3cd3062e88e7f59c41ffdcf3c06d50689 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 10 May 2017 12:49:33 +1000 Subject: [PATCH 034/119] Tidy up NeuralNetImpl --- mloop/nnlearner.py | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 104715e..86b64c0 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -1,7 +1,8 @@ import logging import math -import tensorflow as tf + import numpy as np +import tensorflow as tf class SingleNeuralNet(): ''' @@ -244,6 +245,9 @@ class NeuralNetImpl(): This must run in the same process in which it's created. + All parameters should be considered private to this class. That is, you should only interact with + this class via the methods documented to be public. + Args: num_params (int): The number of params. fit_hyperparameters (bool): Whether to try to fit the hyperparameters to the data. @@ -263,17 +267,22 @@ def __init__(self, self.num_params = num_params self.fit_hyperparameters = fit_hyperparameters - # Tracking variables. These need to be set when importing and saved when exporting. + # Variables for tracking the current state of hyperparameter fitting. self.last_hyperfit = 0 self.last_net_reg = 0.001 + # The training losses incurred by the network. This is a concatenation of the losses + # associated with each instance of SingleNeuralNet. self.losses_list = [] - self.net = self._make_net(0.001) + self.net = None + + # Private helper methods. def _make_net(self, reg): ''' - Helper method to create a new net with a specified regularisation coefficient. The net is not initialised, so you must call init() or load() on it before any other method. + Helper method to create a new net with a specified regularisation coefficient. The net is not + initialised, so you must call init() or load() on it before any other method. Args: reg (float): Regularisation coefficient. @@ -291,23 +300,37 @@ def amazing_abs(_x): reg, self.losses_list) + # Public methods. + def init(self): ''' - Initializes the net. + Initializes the net. You must call exactly one of this and load() before calling any other + methods. ''' + if not self.net is None: + self.log.error("Called init() when already initialised/loaded") + raise ValueError + + self.net = self._make_net(self.last_net_reg) self.net.init() def load(self, archive): ''' - Imports the net from an archive dictionary. You must call exactly one of this and init() before calling any other methods. + Imports the net from an archive dictionary. You must call exactly one of this and init() + before calling any other methods. + + You must only load a net from an archive if that archive corresponds to a net with the same + constructor parameters. ''' + if not self.net is None: + self.log.error("Called load() when net already initialised/loaded") + raise ValueError + self.last_hyperfit = int(archive['last_hyperfit']) self.last_net_reg = float(archive['last_net_reg']) self.losses_list = list(archive['losses_list']) - # Destroy the old net, and replace it with the new loaded one. - self.net.destroy() self.net = self._make_net(self.last_net_reg) self.net.load(dict(archive['net'])) @@ -323,7 +346,7 @@ def save(self): def fit_neural_net(self, all_params, all_costs): ''' - Fits the neural net with the appropriate topology to the data + Fits the neural net to the data. Args: all_params (array): array of all parameter arrays @@ -385,6 +408,8 @@ def predict_cost(self,params): ''' Produces a prediction of cost from the neural net at params. + Must not be called before fit_neural_net(). + Returns: float : Predicted cost at parameters ''' @@ -394,12 +419,14 @@ def predict_cost_gradient(self,params): ''' Produces a prediction of the gradient of the cost function at params. + Must not be called before fit_neural_net(). + Returns: float : Predicted gradient at parameters ''' return self.net.predict_cost_gradient(params) - # Methods for debugging/analysis. + # Public mmethods to be used only for debugging/analysis. def get_losses(self): ''' From 1bb04db95d1fe8f760f520a3de70ab01d28ef971 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 10 May 2017 12:50:36 +1000 Subject: [PATCH 035/119] Add support for scaling costs and params to NeuralNetImpl We can already scale costs in the learner, but moving it to NNI makes it more clear that it's an implementation detail (which it is). --- mloop/nnlearner.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 86b64c0..8225a26 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -2,6 +2,7 @@ import math import numpy as np +import sklearn.preprocessing as skp import tensorflow as tf class SingleNeuralNet(): @@ -245,6 +246,9 @@ class NeuralNetImpl(): This must run in the same process in which it's created. + This handles scaling of parameters and costs internally, so there is no need to ensure that these + values are scaled or normalised in any way. + All parameters should be considered private to this class. That is, you should only interact with this class via the methods documented to be public. @@ -271,6 +275,12 @@ def __init__(self, self.last_hyperfit = 0 self.last_net_reg = 0.001 + self.cost_scaler = skp.StandardScaler(with_mean=True, with_std=True) + self.param_scaler = skp.StandardScaler(with_mean=True, with_std=True) + # The samples used to fit param_scaler and cost_scaler. When set, this will be a tuple of + # (params samples, cost samples). + self.scaler_samples = None + # The training losses incurred by the network. This is a concatenation of the losses # associated with each instance of SingleNeuralNet. self.losses_list = [] @@ -300,6 +310,46 @@ def amazing_abs(_x): reg, self.losses_list) + def _fit_scaler(self): + if self.scaler_samples is None: + self.log.error("_fit_scaler() called before samples set") + raise ValueError + self.param_scaler.fit(self.scaler_samples[0]) + # Cost is scalar but numpy doesn't like scalars, so reshape to be a 0D vector instead. + self.cost_scaler.fit(np.array(self.scaler_samples[1]).reshape(-1,1)) + + # Now that the scaler is fitted, calculate the parameters we'll need to unscale gradients. + # We need to know which unscaled gradient would correspond to a scaled gradient of [1,...1], + # which we can calculate as the unscaled gradient associated with a scaled rise of 1 and a + # scaled run of [1,...1]: + rise_unscaled = ( + self._unscale_cost(np.float64(1)) + - self._unscale_cost(np.float64(0))) + run_unscaled = ( + self._unscale_params([np.float64(1)]*self.num_params) + - self._unscale_params([np.float64(0)]*self.num_params)) + self._gradient_unscale = rise_unscaled / run_unscaled + + def _scale_params_and_cost_list(self, params_list_unscaled, cost_list_unscaled): + params_list_scaled = self.param_scaler.transform(params_list_unscaled) + # As above, numpy doesn't like scalars, so we need to do some reshaping. + cost_vector_list_unscaled = np.array(cost_list_unscaled).reshape(-1,1) + cost_vector_list_scaled = self.cost_scaler.transform(cost_vector_list_unscaled) + cost_list_scaled = cost_vector_list_scaled[:,0] + return params_list_scaled, cost_list_scaled + + def _scale_params(self, params_unscaled): + return self.param_scaler.transform([params_unscaled])[0] + + def _unscale_params(self, params_scaled): + return self.param_scaler.inverse_transform([params_scaled])[0] + + def _unscale_cost(self, cost_scaled): + return self.cost_scaler.inverse_transform([[cost_scaled]])[0][0] + + def _unscale_gradient(self, gradient_scaled): + return np.multiply(gradient_scaled, self._gradient_unscale) + # Public methods. def init(self): @@ -331,6 +381,10 @@ def load(self, archive): self.losses_list = list(archive['losses_list']) + self.scaler_samples = archive['scaler_samples'] + if not self.scaler_samples is None: + self._fit_scaler() + self.net = self._make_net(self.last_net_reg) self.net.load(dict(archive['net'])) @@ -341,6 +395,7 @@ def save(self): return {'last_hyperfit': self.last_hyperfit, 'last_net_reg': self.last_net_reg, 'losses_list': self.losses_list, + 'scaler_samples': self.scaler_samples, 'net': self.net.save(), } @@ -360,6 +415,13 @@ def fit_neural_net(self, all_params, all_costs): self.log.error("Params and costs must have the same length") raise ValueError + # If we haven't initialised the scaler yet, do it now. + if self.scaler_samples is None: + self.scaler_samples = (all_params.copy(), all_costs.copy()) + self._fit_scaler() + + all_params, all_costs = self._scale_params_and_cost_list(all_params, all_costs) + # TODO: Consider adding some kind of "cost capping". Our NNs will never predict costs going # off to infinity, so we could be "wasting" training cost due to totally irrelevant points. # If we capped the costs to some value then this might help. Note that this is really just @@ -413,7 +475,7 @@ def predict_cost(self,params): Returns: float : Predicted cost at parameters ''' - return self.net.predict_cost(params) + return self._unscale_cost(self.net.predict_cost(self._scale_params(params))) def predict_cost_gradient(self,params): ''' @@ -424,7 +486,7 @@ def predict_cost_gradient(self,params): Returns: float : Predicted gradient at parameters ''' - return self.net.predict_cost_gradient(params) + return self._unscale_gradient(self.net.predict_cost_gradient(self._scale_params(params))) # Public mmethods to be used only for debugging/analysis. From e801dcec881680666e4b5925a961270565b3c1c0 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 10 May 2017 15:50:07 +1000 Subject: [PATCH 036/119] Make training threshold configurable --- mloop/nnlearner.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 8225a26..9d87fee 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -17,7 +17,11 @@ class SingleNeuralNet(): num_params: The number of params. layer_dims: The number of nodes in each layer. layer_activations: The activation function for each layer. - train_epochs: Epochs per train. + train_epochs: Base epochs per train. + train_threshold_ratio: (Relative) loss improvement per train under which training should + terminate. E.g. 0.1 means we will train (train_epochs at a time) until the improvement in + loss is less than 0.1 of the loss when that train started (so lower values mean we will + train for longer). batch_size: The training batch size. keep_prob: The dropoout keep probability. regularisation_coefficient: The regularisation coefficient. @@ -29,6 +33,7 @@ def __init__(self, layer_dims, layer_activations, train_epochs, + train_threshold_ratio, batch_size, keep_prob, regularisation_coefficient, @@ -44,6 +49,7 @@ def __init__(self, # Hyperparameters for the net. These are all constant. self.num_params = num_params self.train_epochs = train_epochs + self.train_threshold_ratio = train_threshold_ratio self.batch_size = batch_size self.keep_prob = keep_prob self.regularisation_coefficient = regularisation_coefficient @@ -169,7 +175,7 @@ def fit(self, params, costs): # - if the new loss is greater than the threshold then we haven't improved much, so stop # - else start from the top while True: - threshold = 0.9 * self._loss(params, costs)[0] + threshold = (1 - self.train_threshold_ratio) * self._loss(params, costs)[0] self.log.debug("Training with threshold " + str(threshold)) if threshold == 0: break @@ -305,6 +311,7 @@ def amazing_abs(_x): self.num_params, [64]*5, [gelu_fast]*5, 100, # train_epochs + 0.5, # train_threshold_ratio 64, # batch_size 0.8, # keep_prob reg, From 83e8e66151784f7a99a12d94bf26ef06f4c4ca1d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 10 May 2017 15:50:49 +1000 Subject: [PATCH 037/119] Add support for surface density plots --- mloop/visualizations.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 32abbbf..ae256c1 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -578,6 +578,7 @@ def create_neural_net_learner_visualizations(filename, if plot_cross_sections: visualization.plot_cross_sections() visualization.plot_surface() + visualization.plot_density_surface() visualization.plot_losses() @@ -718,6 +719,28 @@ def plot_surface(self): ax.scatter(self.all_params[:,0], self.all_params[:,1], self.all_costs, c='r') + def plot_density_surface(self): + ''' + Produce a density plot of the cost surface (only works when there are 2 parameters) + ''' + if self.num_params != 2: + return + global figure_counter + figure_counter += 1 + fig = plt.figure(figure_counter) + + points = 50 + xs, ys = np.meshgrid( + np.linspace(self.min_boundary[0], self.max_boundary[0], points), + np.linspace(self.min_boundary[1], self.max_boundary[1], points)) + zs_list = self.predict_costs_from_param_array(list(zip(xs.flatten(),ys.flatten()))) + zs = np.array(zs_list).reshape(points,points) + plt.pcolormesh(xs,ys,zs) + plt.scatter(self.all_params[:,0], self.all_params[:,1], c=self.all_costs, vmin=np.min(zs), vmax=np.max(zs), s=100) + plt.colorbar() + plt.xlabel("Param 0") + plt.ylabel("Param 1") + def plot_losses(self): ''' Produce a figure of the loss as a function of training run. From ea1e6eec5d023cb166fb7bd988f857b006915643 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 10 May 2017 15:51:12 +1000 Subject: [PATCH 038/119] Tweak params --- mloop/nnlearner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 9d87fee..77bd07a 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -279,7 +279,7 @@ def __init__(self, # Variables for tracking the current state of hyperparameter fitting. self.last_hyperfit = 0 - self.last_net_reg = 0.001 + self.last_net_reg = 1e-6 self.cost_scaler = skp.StandardScaler(with_mean=True, with_std=True) self.param_scaler = skp.StandardScaler(with_mean=True, with_std=True) @@ -313,7 +313,7 @@ def amazing_abs(_x): 100, # train_epochs 0.5, # train_threshold_ratio 64, # batch_size - 0.8, # keep_prob + 1., # keep_prob reg, self.losses_list) From 8225a129bedc8a3253a466c1be25e389246df74d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 10 May 2017 15:51:49 +1000 Subject: [PATCH 039/119] Hide some less useful plots --- mloop/visualizations.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index ae256c1..5d2d2e5 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -111,10 +111,10 @@ def create_controller_visualizations(filename, visualization = ControllerVisualizer(filename,file_type=file_type) if plot_cost_vs_run: visualization.plot_cost_vs_run() - if plot_parameters_vs_run: - visualization.plot_parameters_vs_run() - if plot_parameters_vs_cost: - visualization.plot_parameters_vs_cost() + #if plot_parameters_vs_run: + # visualization.plot_parameters_vs_run() + #if plot_parameters_vs_cost: + # visualization.plot_parameters_vs_cost() class ControllerVisualizer(): ''' @@ -577,7 +577,7 @@ def create_neural_net_learner_visualizations(filename, visualization = NeuralNetVisualizer(filename, file_type=file_type) if plot_cross_sections: visualization.plot_cross_sections() - visualization.plot_surface() + #visualization.plot_surface() visualization.plot_density_surface() visualization.plot_losses() From 421652011f1cf08f9909ba6284a69b0c7be30f4d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 11 May 2017 22:13:53 +1000 Subject: [PATCH 040/119] Tweak params and update some docs --- mloop/nnlearner.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 77bd07a..21580ad 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -19,9 +19,10 @@ class SingleNeuralNet(): layer_activations: The activation function for each layer. train_epochs: Base epochs per train. train_threshold_ratio: (Relative) loss improvement per train under which training should - terminate. E.g. 0.1 means we will train (train_epochs at a time) until the improvement in - loss is less than 0.1 of the loss when that train started (so lower values mean we will - train for longer). + terminate. E.g. 0.1 means we will train (train_epochs at a time) until the improvement + in loss is less than 0.1 of the loss when that train started (so lower values mean we + will train for longer). Alternatively, you can think of this as the smallest gradient + we'll allow before deciding that the loss isn't improving any more. batch_size: The training batch size. keep_prob: The dropoout keep probability. regularisation_coefficient: The regularisation coefficient. @@ -311,8 +312,8 @@ def amazing_abs(_x): self.num_params, [64]*5, [gelu_fast]*5, 100, # train_epochs - 0.5, # train_threshold_ratio - 64, # batch_size + 0.1, # train_threshold_ratio + 8, # batch_size 1., # keep_prob reg, self.losses_list) From 884b76b089249fadc11ce4163418c1dd05c854a9 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 11 May 2017 23:24:02 +1000 Subject: [PATCH 041/119] Initialise scalers in a more sensible place --- mloop/nnlearner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 21580ad..e2a0efd 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -282,8 +282,6 @@ def __init__(self, self.last_hyperfit = 0 self.last_net_reg = 1e-6 - self.cost_scaler = skp.StandardScaler(with_mean=True, with_std=True) - self.param_scaler = skp.StandardScaler(with_mean=True, with_std=True) # The samples used to fit param_scaler and cost_scaler. When set, this will be a tuple of # (params samples, cost samples). self.scaler_samples = None @@ -322,6 +320,9 @@ def _fit_scaler(self): if self.scaler_samples is None: self.log.error("_fit_scaler() called before samples set") raise ValueError + self.cost_scaler = skp.StandardScaler(with_mean=True, with_std=True) + self.param_scaler = skp.StandardScaler(with_mean=True, with_std=True) + self.param_scaler.fit(self.scaler_samples[0]) # Cost is scalar but numpy doesn't like scalars, so reshape to be a 0D vector instead. self.cost_scaler.fit(np.array(self.scaler_samples[1]).reshape(-1,1)) From c4975fd5c85da539b1a66b693d8dd368b0281a03 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 11 May 2017 23:24:24 +1000 Subject: [PATCH 042/119] Experiment with adding 10 to cost after scaling Seems to work quite well, actually. Need to understand why. --- mloop/nnlearner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index e2a0efd..08370e7 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -343,7 +343,7 @@ def _scale_params_and_cost_list(self, params_list_unscaled, cost_list_unscaled): params_list_scaled = self.param_scaler.transform(params_list_unscaled) # As above, numpy doesn't like scalars, so we need to do some reshaping. cost_vector_list_unscaled = np.array(cost_list_unscaled).reshape(-1,1) - cost_vector_list_scaled = self.cost_scaler.transform(cost_vector_list_unscaled) + cost_vector_list_scaled = self.cost_scaler.transform(cost_vector_list_unscaled) + 10 cost_list_scaled = cost_vector_list_scaled[:,0] return params_list_scaled, cost_list_scaled @@ -354,7 +354,7 @@ def _unscale_params(self, params_scaled): return self.param_scaler.inverse_transform([params_scaled])[0] def _unscale_cost(self, cost_scaled): - return self.cost_scaler.inverse_transform([[cost_scaled]])[0][0] + return self.cost_scaler.inverse_transform([[cost_scaled - 10]])[0][0] def _unscale_gradient(self, gradient_scaled): return np.multiply(gradient_scaled, self._gradient_unscale) From c397b6ea5b88507283eb050fbaa0e28b99160339 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 15 May 2017 16:12:50 +1000 Subject: [PATCH 043/119] Rename scalers to make them look more private --- mloop/nnlearner.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 08370e7..5746160 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -282,7 +282,7 @@ def __init__(self, self.last_hyperfit = 0 self.last_net_reg = 1e-6 - # The samples used to fit param_scaler and cost_scaler. When set, this will be a tuple of + # The samples used to fit the scalers. When set, this will be a tuple of # (params samples, cost samples). self.scaler_samples = None @@ -320,12 +320,12 @@ def _fit_scaler(self): if self.scaler_samples is None: self.log.error("_fit_scaler() called before samples set") raise ValueError - self.cost_scaler = skp.StandardScaler(with_mean=True, with_std=True) - self.param_scaler = skp.StandardScaler(with_mean=True, with_std=True) + self._cost_scaler = skp.StandardScaler(with_mean=True, with_std=True) + self._param_scaler = skp.StandardScaler(with_mean=True, with_std=True) - self.param_scaler.fit(self.scaler_samples[0]) + self._param_scaler.fit(self.scaler_samples[0]) # Cost is scalar but numpy doesn't like scalars, so reshape to be a 0D vector instead. - self.cost_scaler.fit(np.array(self.scaler_samples[1]).reshape(-1,1)) + self._cost_scaler.fit(np.array(self.scaler_samples[1]).reshape(-1,1)) # Now that the scaler is fitted, calculate the parameters we'll need to unscale gradients. # We need to know which unscaled gradient would correspond to a scaled gradient of [1,...1], @@ -340,21 +340,21 @@ def _fit_scaler(self): self._gradient_unscale = rise_unscaled / run_unscaled def _scale_params_and_cost_list(self, params_list_unscaled, cost_list_unscaled): - params_list_scaled = self.param_scaler.transform(params_list_unscaled) + params_list_scaled = self._param_scaler.transform(params_list_unscaled) # As above, numpy doesn't like scalars, so we need to do some reshaping. cost_vector_list_unscaled = np.array(cost_list_unscaled).reshape(-1,1) - cost_vector_list_scaled = self.cost_scaler.transform(cost_vector_list_unscaled) + 10 + cost_vector_list_scaled = self._cost_scaler.transform(cost_vector_list_unscaled) + 10 cost_list_scaled = cost_vector_list_scaled[:,0] return params_list_scaled, cost_list_scaled def _scale_params(self, params_unscaled): - return self.param_scaler.transform([params_unscaled])[0] + return self._param_scaler.transform([params_unscaled])[0] def _unscale_params(self, params_scaled): - return self.param_scaler.inverse_transform([params_scaled])[0] + return self._param_scaler.inverse_transform([params_scaled])[0] def _unscale_cost(self, cost_scaled): - return self.cost_scaler.inverse_transform([[cost_scaled - 10]])[0][0] + return self._cost_scaler.inverse_transform([[cost_scaled - 10]])[0][0] def _unscale_gradient(self, gradient_scaled): return np.multiply(gradient_scaled, self._gradient_unscale) From 666cc73aed68df3214ba5cabd251883dcf15fc2e Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 15 May 2017 16:13:15 +1000 Subject: [PATCH 044/119] Pull the artificial mean offset into a variable --- mloop/nnlearner.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 5746160..6f0c0d4 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -327,6 +327,8 @@ def _fit_scaler(self): # Cost is scalar but numpy doesn't like scalars, so reshape to be a 0D vector instead. self._cost_scaler.fit(np.array(self.scaler_samples[1]).reshape(-1,1)) + self._mean_offset = 10 + # Now that the scaler is fitted, calculate the parameters we'll need to unscale gradients. # We need to know which unscaled gradient would correspond to a scaled gradient of [1,...1], # which we can calculate as the unscaled gradient associated with a scaled rise of 1 and a @@ -343,7 +345,8 @@ def _scale_params_and_cost_list(self, params_list_unscaled, cost_list_unscaled): params_list_scaled = self._param_scaler.transform(params_list_unscaled) # As above, numpy doesn't like scalars, so we need to do some reshaping. cost_vector_list_unscaled = np.array(cost_list_unscaled).reshape(-1,1) - cost_vector_list_scaled = self._cost_scaler.transform(cost_vector_list_unscaled) + 10 + cost_vector_list_scaled = (self._cost_scaler.transform(cost_vector_list_unscaled) + + self._mean_offset) cost_list_scaled = cost_vector_list_scaled[:,0] return params_list_scaled, cost_list_scaled @@ -354,7 +357,7 @@ def _unscale_params(self, params_scaled): return self._param_scaler.inverse_transform([params_scaled])[0] def _unscale_cost(self, cost_scaled): - return self._cost_scaler.inverse_transform([[cost_scaled - 10]])[0][0] + return self._cost_scaler.inverse_transform([[cost_scaled - self._mean_offset]])[0][0] def _unscale_gradient(self, gradient_scaled): return np.multiply(gradient_scaled, self._gradient_unscale) From 3e67b2b57047a46fccb3497d05b46cc6838928ba Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 15 May 2017 16:15:17 +1000 Subject: [PATCH 045/119] Set mean offset to 0 We shouldn't need this. The net should easily be able to fit that. --- mloop/nnlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 6f0c0d4..cbf4b1f 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -327,7 +327,7 @@ def _fit_scaler(self): # Cost is scalar but numpy doesn't like scalars, so reshape to be a 0D vector instead. self._cost_scaler.fit(np.array(self.scaler_samples[1]).reshape(-1,1)) - self._mean_offset = 10 + self._mean_offset = 0 # Now that the scaler is fitted, calculate the parameters we'll need to unscale gradients. # We need to know which unscaled gradient would correspond to a scaled gradient of [1,...1], From b3b2ab9c64eb282ffb42220202615b427a57edc6 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 16 May 2017 16:55:00 +1000 Subject: [PATCH 046/119] Allow epochs to be specified per-train --- mloop/nnlearner.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index cbf4b1f..ae66cd3 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -17,7 +17,6 @@ class SingleNeuralNet(): num_params: The number of params. layer_dims: The number of nodes in each layer. layer_activations: The activation function for each layer. - train_epochs: Base epochs per train. train_threshold_ratio: (Relative) loss improvement per train under which training should terminate. E.g. 0.1 means we will train (train_epochs at a time) until the improvement in loss is less than 0.1 of the loss when that train started (so lower values mean we @@ -33,7 +32,6 @@ def __init__(self, num_params, layer_dims, layer_activations, - train_epochs, train_threshold_ratio, batch_size, keep_prob, @@ -49,7 +47,6 @@ def __init__(self, # Hyperparameters for the net. These are all constant. self.num_params = num_params - self.train_epochs = train_epochs self.train_threshold_ratio = train_threshold_ratio self.batch_size = batch_size self.keep_prob = keep_prob @@ -154,7 +151,7 @@ def _loss(self, params, costs): self.regularisation_coefficient_placeholder: 0, })) - def fit(self, params, costs): + def fit(self, params, costs, epochs): ''' Fit the neural net to the provided data @@ -181,7 +178,7 @@ def fit(self, params, costs): if threshold == 0: break tot = 0 - for i in range(self.train_epochs): + for i in range(epochs): # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) for j in range(math.ceil(len(params) / self.batch_size)): @@ -202,7 +199,7 @@ def fit(self, params, costs): + ', with unregularized cost ' + str(ul)) (l, ul) = self._loss(params, costs) - al = tot / float(self.train_epochs) + al = tot / float(epochs) self.log.debug('Loss ' + str(l) + ', average loss ' + str(al)) if l > threshold: break @@ -278,6 +275,8 @@ def __init__(self, self.num_params = num_params self.fit_hyperparameters = fit_hyperparameters + self.epochs = 100 + # Variables for tracking the current state of hyperparameter fitting. self.last_hyperfit = 0 self.last_net_reg = 1e-6 @@ -309,7 +308,6 @@ def amazing_abs(_x): return SingleNeuralNet( self.num_params, [64]*5, [gelu_fast]*5, - 100, # train_epochs 0.1, # train_threshold_ratio 8, # batch_size 1., # keep_prob @@ -463,7 +461,7 @@ def fit_neural_net(self, all_params, all_costs): for r in [0.001, 0.01, 0.1, 1, 10]: net = self._make_net(r) net.init() - net.fit(train_params, train_costs) + net.fit(train_params, train_costs, self.epochs) this_cv_loss = net.cross_validation_loss(cv_params, cv_costs) if this_cv_loss < best_cv_loss and this_cv_loss < 0.1 * orig_cv_loss: best_cv_loss = this_cv_loss @@ -476,7 +474,7 @@ def fit_neural_net(self, all_params, all_costs): # TODO: Fit depth - self.net.fit(all_params, all_costs) + self.net.fit(all_params, all_costs, self.epochs) def predict_cost(self,params): ''' From 1f0fcb7568c06e7a7e85d73116979f6e2c2007ea Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 16 May 2017 16:55:48 +1000 Subject: [PATCH 047/119] Support training multiple networks simultaneously Sampling a random one when getting values. --- mloop/nnlearner.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index ae66cd3..74025f4 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -242,6 +242,63 @@ def predict_cost_gradient(self,params): ''' return self.tf_session.run(self.output_var_gradient, feed_dict={self.input_placeholder: [params]})[0][0] +class SampledNeuralNet(): + def __init__(self, + net_creator, + count): + self.log = logging.getLogger(__name__) + self.net_creator = net_creator + self.nets = [self.net_creator() for _ in range(count)] + self.fit_count = 0 + + def _random_net(self): + return self.nets[np.random.randint(0, len(self.nets))] + + def destroy(self): + for n in self.nets: + n.destroy() + + def init(self): + for n in self.nets: + n.init() + + def load(self, archive): + for i, n in enumerate(self.nets): + #n.load(archive[str(i)]) + n.load(archive) + + def save(self): + return self.nets[0].save() + #ret = {} + #for i, n in enumerate(self.nets): + # ret[str(i)] = n.save() + #return ret + + def fit(self, params, costs, epochs): + self.fit_count += 1 + # Every per'th fit we clear out a net and re-train it. + #per = 2 + #if self.fit_count % per == 0: + # index = int(self.fit_count / per) % len(self.nets) + # self.log.debug("Re-creating net " + str(index)) + # self.nets[index].destroy() + # self.nets[index] = self.net_creator() + # self.nets[index].init() + + for n in self.nets: + n.fit(params, costs, epochs) + + def cross_validation_loss(self, params, costs): + return np.mean([n.cross_validation_loss(params, costs) for n in self.nets]) + + def predict_cost(self,params): + return self._random_net().predict_cost(params) + #return np.mean([n.predict_cost(params) for n in self.nets]) + + def predict_cost_gradient(self,params): + return self._random_net().predict_cost_gradient(params) + #return np.mean([n.predict_cost_gradient(params) for n in self.nets]) + class NeuralNetImpl(): ''' @@ -305,14 +362,15 @@ def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3)))) def amazing_abs(_x): return tf.maximum(1 - tf.abs(_x), 0) - return SingleNeuralNet( - self.num_params, - [64]*5, [gelu_fast]*5, - 0.1, # train_threshold_ratio - 8, # batch_size - 1., # keep_prob - reg, - self.losses_list) + creator = lambda: SingleNeuralNet( + self.num_params, + [64]*5, [tf.nn.relu]*5, + 0.5, # train_threshold_ratio + 16, # batch_size + 1., # keep_prob + reg, + self.losses_list) + return SampledNeuralNet(creator, 3) def _fit_scaler(self): if self.scaler_samples is None: From a493033b956ca3565be741a50e8869c93128298f Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 16 May 2017 16:57:05 +1000 Subject: [PATCH 048/119] Add basic training timing --- mloop/nnlearner.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 74025f4..5cb9a5d 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -1,5 +1,6 @@ import logging import math +import time import numpy as np import sklearn.preprocessing as skp @@ -172,12 +173,14 @@ def fit(self, params, costs, epochs): # - train for train_epochs epochs # - if the new loss is greater than the threshold then we haven't improved much, so stop # - else start from the top + start = time.time() while True: threshold = (1 - self.train_threshold_ratio) * self._loss(params, costs)[0] self.log.debug("Training with threshold " + str(threshold)) if threshold == 0: break tot = 0 + run_start = time.time() for i in range(epochs): # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) @@ -197,12 +200,14 @@ def fit(self, params, costs, epochs): if i % 10 == 0: self.log.debug('Fit neural network with total training cost ' + str(l) + ', with unregularized cost ' + str(ul)) + self.log.debug("Run trained for: " + str(time.time() - run_start)) (l, ul) = self._loss(params, costs) al = tot / float(epochs) self.log.debug('Loss ' + str(l) + ', average loss ' + str(al)) if l > threshold: break + self.log.debug("Total trained for: " + str(time.time() - start)) def cross_validation_loss(self, params, costs): ''' From 3bfe89b88dc0665762c8bcdce141b4ec759f41ad Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 16 May 2017 16:57:42 +1000 Subject: [PATCH 049/119] Train less for incremental points --- mloop/nnlearner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 5cb9a5d..e3a378b 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -490,8 +490,11 @@ def fit_neural_net(self, all_params, all_costs): # If we haven't initialised the scaler yet, do it now. if self.scaler_samples is None: + first_fit = True self.scaler_samples = (all_params.copy(), all_costs.copy()) self._fit_scaler() + else: + first_fit = False all_params, all_costs = self._scale_params_and_cost_list(all_params, all_costs) @@ -537,7 +540,7 @@ def fit_neural_net(self, all_params, all_costs): # TODO: Fit depth - self.net.fit(all_params, all_costs, self.epochs) + self.net.fit(all_params, all_costs, self.epochs if first_fit else int(self.epochs / 10)) def predict_cost(self,params): ''' From 5b2331036cb69bab4948a53bb440e09d7b93dfac Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 16 May 2017 17:02:01 +1000 Subject: [PATCH 050/119] Initial attempt at shifting batching into TF graph Doesn't work yet. --- mloop/nnlearner.py | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index e3a378b..d2138c5 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -66,10 +66,19 @@ def __init__(self, self.weights = [] self.biases = [] + # Loss function and training + inp, outp = tf.train.shuffle_batch( + tensors=[self.input_placeholder, self.output_placeholder], + batch_size=self.batch_size, + capacity=5000, + min_after_dequeue=0, + enqueue_many=True, + allow_smaller_final_batch=True) + # Input + internal nodes # TODO: Use length scale for setting initial weights? prev_layer_dim = self.num_params - prev_h = self.input_placeholder + prev_h = inp stddev=0.1 for (i, (dim, act)) in enumerate(zip(layer_dims, layer_activations)): self.weights.append(tf.Variable( @@ -92,9 +101,8 @@ def __init__(self, name="bias_out")) self.output_var = tf.matmul(prev_h, self.weights[-1]) + self.biases[-1] - # Loss function and training self.loss_func = ( - tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - self.output_placeholder), + tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - outp), reduction_indices=[1])) + self.regularisation_coefficient_placeholder * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) @@ -182,18 +190,24 @@ def fit(self, params, costs, epochs): tot = 0 run_start = time.time() for i in range(epochs): - # Split the data into random batches, and train on each batch - indices = np.random.permutation(len(params)) - for j in range(math.ceil(len(params) / self.batch_size)): - batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] - batch_input = [params[index] for index in batch_indices] - batch_output = [[costs[index]] for index in batch_indices] - self.tf_session.run(self.train_step, - feed_dict={self.input_placeholder: batch_input, - self.output_placeholder: batch_output, - self.regularisation_coefficient_placeholder: self.regularisation_coefficient, - self.keep_prob_placeholder: self.keep_prob, - }) + self.tf_session.run(self.train_step, + feed_dict={self.input_placeholder: params, + self.output_placeholder: [[c] for c in costs], + self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + self.keep_prob_placeholder: self.keep_prob, + }) + ## Split the data into random batches, and train on each batch + #indices = np.random.permutation(len(params)) + #for j in range(math.ceil(len(params) / self.batch_size)): + # batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] + # batch_input = [params[index] for index in batch_indices] + # batch_output = [[costs[index]] for index in batch_indices] + # self.tf_session.run(self.train_step, + # feed_dict={self.input_placeholder: batch_input, + # self.output_placeholder: batch_output, + # self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + # self.keep_prob_placeholder: self.keep_prob, + # }) (l, ul) = self._loss(params, costs) self.losses_list.append(l) tot += l From d171c57e089cfa9a70533e81a12ffc838755c794 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 16 May 2017 18:48:44 +1000 Subject: [PATCH 051/119] Speed up loss evaluation Each call to Session.run() has some overhead, and when we do it every single epoch those overheads add up. One of those calls-per-epoch was unnecessary, so remove that. This seems to result in a speed up of about 10%. --- mloop/nnlearner.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index e3a378b..13be6ed 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -93,13 +93,15 @@ def __init__(self, self.output_var = tf.matmul(prev_h, self.weights[-1]) + self.biases[-1] # Loss function and training - self.loss_func = ( - tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - self.output_placeholder), - reduction_indices=[1])) - + self.regularisation_coefficient_placeholder - * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) + self.loss_unreg = tf.reduce_mean(tf.reduce_sum( + tf.square(self.output_var - self.output_placeholder), + reduction_indices=[1])) + self.loss_reg = (self.regularisation_coefficient_placeholder + * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) + self.loss_total = self.loss_unreg + self.loss_reg + # TODO: Set learning rate based on length scale? - self.train_step = tf.train.AdamOptimizer().minimize(self.loss_func) + self.train_step = tf.train.AdamOptimizer().minimize(self.loss_total) # Gradient self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) @@ -139,18 +141,12 @@ def _loss(self, params, costs): ''' Returns the loss and unregularised loss for the given params and costs. ''' - return (self.tf_session.run( - self.loss_func, + return self.tf_session.run( + [self.loss_total, self.loss_unreg], feed_dict={self.input_placeholder: params, self.output_placeholder: [[c] for c in costs], self.regularisation_coefficient_placeholder: self.regularisation_coefficient, - }), - self.tf_session.run( - self.loss_func, - feed_dict={self.input_placeholder: params, - self.output_placeholder: [[c] for c in costs], - self.regularisation_coefficient_placeholder: 0, - })) + }) def fit(self, params, costs, epochs): ''' @@ -217,7 +213,7 @@ def cross_validation_loss(self, params, costs): params (array): array of parameter arrays costs (array): array of costs (associated with the corresponding parameters) ''' - return self.tf_session.run(self.loss_func, + return self.tf_session.run(self.loss_total, feed_dict={self.input_placeholder: params, self.output_placeholder: [[c] for c in costs], }) From 38926a95456e73b420373de2f96571c068159653 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 16 May 2017 18:56:05 +1000 Subject: [PATCH 052/119] Add more timing --- mloop/nnlearner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 13be6ed..75af59a 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -178,9 +178,11 @@ def fit(self, params, costs, epochs): tot = 0 run_start = time.time() for i in range(epochs): + epoch_start = time.time() # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) for j in range(math.ceil(len(params) / self.batch_size)): + batch_start = time.time() batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] batch_input = [params[index] for index in batch_indices] batch_output = [[costs[index]] for index in batch_indices] @@ -190,12 +192,14 @@ def fit(self, params, costs, epochs): self.regularisation_coefficient_placeholder: self.regularisation_coefficient, self.keep_prob_placeholder: self.keep_prob, }) + self.log.debug("Batch time: " + str(time.time() - batch_start)) (l, ul) = self._loss(params, costs) self.losses_list.append(l) tot += l if i % 10 == 0: self.log.debug('Fit neural network with total training cost ' + str(l) + ', with unregularized cost ' + str(ul)) + self.log.debug("Epoch trained for: " + str(time.time() - epoch_start)) self.log.debug("Run trained for: " + str(time.time() - run_start)) (l, ul) = self._loss(params, costs) From 40feacdb08dac51f783784eed9c68dc567ffc91b Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 13:49:23 +1000 Subject: [PATCH 053/119] Switch to global_variables_initializer --- mloop/nnlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 75af59a..5544619 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -107,7 +107,7 @@ def __init__(self, self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) # Initialiser for ... initialising - self.initialiser = tf.initialize_all_variables() + self.initialiser = tf.global_variables_initializer() # Saver for saving and restoring params self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) From e7d78f014b86a3ddfafe7963b7d757ca1587cabd Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 13:51:30 +1000 Subject: [PATCH 054/119] Tidy up net initialisation --- mloop/nnlearner.py | 62 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 5544619..38bea56 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -56,56 +56,70 @@ def __init__(self, self.losses_list = losses_list with self.graph.as_default(): - # Inputs + ## Inputs self.input_placeholder = tf.placeholder(tf.float32, shape=[None, self.num_params]) self.output_placeholder = tf.placeholder(tf.float32, shape=[None, 1]) self.keep_prob_placeholder = tf.placeholder_with_default(1., shape=[]) self.regularisation_coefficient_placeholder = tf.placeholder_with_default(0., shape=[]) - # Parameters - self.weights = [] - self.biases = [] + ## Initialise the network + + weights = [] + biases = [] # Input + internal nodes # TODO: Use length scale for setting initial weights? prev_layer_dim = self.num_params - prev_h = self.input_placeholder stddev=0.1 for (i, (dim, act)) in enumerate(zip(layer_dims, layer_activations)): - self.weights.append(tf.Variable( + weights.append(tf.Variable( tf.random_normal([prev_layer_dim, dim], stddev=stddev), name="weight_"+str(i))) - self.biases.append(tf.Variable( + biases.append(tf.Variable( tf.random_normal([dim], stddev=stddev), name="bias_"+str(i))) prev_layer_dim = dim - prev_h = tf.nn.dropout( - act(tf.matmul(prev_h, self.weights[-1]) + self.biases[-1]), - keep_prob=self.keep_prob_placeholder) # Output node - self.weights.append(tf.Variable( + weights.append(tf.Variable( tf.random_normal([prev_layer_dim, 1], stddev=stddev), name="weight_out")) - self.biases.append(tf.Variable( + biases.append(tf.Variable( tf.random_normal([1], stddev=stddev), name="bias_out")) - self.output_var = tf.matmul(prev_h, self.weights[-1]) + self.biases[-1] - # Loss function and training - self.loss_unreg = tf.reduce_mean(tf.reduce_sum( - tf.square(self.output_var - self.output_placeholder), - reduction_indices=[1])) - self.loss_reg = (self.regularisation_coefficient_placeholder - * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) - self.loss_total = self.loss_unreg + self.loss_reg + # Get the output var given an input var + def get_output_var(input_var): + prev_h = input_var + for w, b in zip(weights[:-1], biases[:-1]): + prev_h = tf.nn.dropout( + act(tf.matmul(prev_h, w) + b), + keep_prob=self.keep_prob_placeholder) + return tf.matmul(prev_h, weights[-1]) + biases[-1] + + ## Define tensors for evaluating the output var and gradient on the full input + self.output_var = get_output_var(self.input_placeholder) + self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) + + ## Declare common loss functions + + # Get the raw loss given the expected and actual output vars + def get_loss_raw(expected, actual): + return tf.reduce_mean(tf.reduce_sum( + tf.square(expected - actual), + reduction_indices=[1])) + + # Regularisation component of the loss. + loss_reg = (self.regularisation_coefficient_placeholder + * tf.reduce_mean([tf.nn.l2_loss(W) for W in weights])) + + ## Define tensors for evaluating the loss on the full input + self.loss_raw = get_loss_raw(self.output_placeholder, self.output_var) + self.loss_total = self.loss_raw + loss_reg # TODO: Set learning rate based on length scale? self.train_step = tf.train.AdamOptimizer().minimize(self.loss_total) - # Gradient - self.output_var_gradient = tf.gradients(self.output_var, self.input_placeholder) - # Initialiser for ... initialising self.initialiser = tf.global_variables_initializer() @@ -142,7 +156,7 @@ def _loss(self, params, costs): Returns the loss and unregularised loss for the given params and costs. ''' return self.tf_session.run( - [self.loss_total, self.loss_unreg], + [self.loss_total, self.loss_raw], feed_dict={self.input_placeholder: params, self.output_placeholder: [[c] for c in costs], self.regularisation_coefficient_placeholder: self.regularisation_coefficient, From 86ca5531e08bcbdd858d62ed55fae6f917d798e3 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:01:55 +1000 Subject: [PATCH 055/119] Alternative training methods One shuffles the actual arrays rather than shuffling indices and pulling out elements. One moves the whole epoch into the TF graph. Neither of these seem to give a significant speed improvement. --- mloop/nnlearner.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 38bea56..03f8810 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -117,6 +117,26 @@ def get_loss_raw(expected, actual): self.loss_raw = get_loss_raw(self.output_placeholder, self.output_var) self.loss_total = self.loss_raw + loss_reg + ## Training + def process_batch(i): + length = tf.minimum(batch_size, tf.shape(self.input_placeholder)[0] - i * batch_size) + input_batch = tf.slice(self.input_placeholder, [i * batch_size, 0], [length, -1]) + output_batch = tf.slice(self.output_placeholder, [i * batch_size, 0], [length, -1]) + return tf.train.AdamOptimizer().minimize( + get_loss_raw( + output_batch, get_output_var(input_batch))) + + i = tf.constant(0) + nbatches = tf.to_int32(tf.ceil( + tf.to_float(tf.shape(self.input_placeholder)[0]) / tf.to_float(batch_size))) + self.train_epoch = tf.while_loop( + lambda i: tf.less(i, nbatches), + lambda i: tf.tuple([tf.add(i,1)], control_inputs=[process_batch(i)])[0], + [i], + back_prop=False, + parallel_iterations=1) + + # TODO: Set learning rate based on length scale? self.train_step = tf.train.AdamOptimizer().minimize(self.loss_total) @@ -178,6 +198,9 @@ def fit(self, params, costs, epochs): self.log.error("Params and costs must have the same length") raise ValueError + lparams = np.array(params) + lcosts = np.expand_dims(np.array(costs), axis=1) + # The general training procedure is as follows: # - set a threshold based on the current loss # - train for train_epochs epochs @@ -192,7 +215,30 @@ def fit(self, params, costs, epochs): tot = 0 run_start = time.time() for i in range(epochs): + # Split the data into random batches, and train on each batch epoch_start = time.time() + + # Shuffle the params + #state = np.random.get_state() + #np.random.shuffle(lparams) + #np.random.set_state(state) + #np.random.shuffle(lcosts) + #self.tf_session.run(self.train_epoch, + # feed_dict={self.input_placeholder: lparams, + # self.output_placeholder: lcosts, + # self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + # self.keep_prob_placeholder: self.keep_prob, + # }) + #for j in range(math.ceil(len(params) / self.batch_size)): + # batch_input = lparams[j * self.batch_size : (j + 1) * self.batch_size] + # batch_output = lcosts[j * self.batch_size : (j + 1) * self.batch_size] + + # self.tf_session.run(self.train_step, + # feed_dict={self.input_placeholder: batch_input, + # self.output_placeholder: batch_output, + # self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + # self.keep_prob_placeholder: self.keep_prob, + # }) # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) for j in range(math.ceil(len(params) / self.batch_size)): From aff482eebc31c7569a2cbf346dc339c1d35420e7 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:03:56 +1000 Subject: [PATCH 056/119] Revert "Alternative training methods" This reverts commit 86ca5531e08bcbdd858d62ed55fae6f917d798e3. Reverting because they're slower, but I still want them in history. --- mloop/nnlearner.py | 46 ---------------------------------------------- 1 file changed, 46 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 03f8810..38bea56 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -117,26 +117,6 @@ def get_loss_raw(expected, actual): self.loss_raw = get_loss_raw(self.output_placeholder, self.output_var) self.loss_total = self.loss_raw + loss_reg - ## Training - def process_batch(i): - length = tf.minimum(batch_size, tf.shape(self.input_placeholder)[0] - i * batch_size) - input_batch = tf.slice(self.input_placeholder, [i * batch_size, 0], [length, -1]) - output_batch = tf.slice(self.output_placeholder, [i * batch_size, 0], [length, -1]) - return tf.train.AdamOptimizer().minimize( - get_loss_raw( - output_batch, get_output_var(input_batch))) - - i = tf.constant(0) - nbatches = tf.to_int32(tf.ceil( - tf.to_float(tf.shape(self.input_placeholder)[0]) / tf.to_float(batch_size))) - self.train_epoch = tf.while_loop( - lambda i: tf.less(i, nbatches), - lambda i: tf.tuple([tf.add(i,1)], control_inputs=[process_batch(i)])[0], - [i], - back_prop=False, - parallel_iterations=1) - - # TODO: Set learning rate based on length scale? self.train_step = tf.train.AdamOptimizer().minimize(self.loss_total) @@ -198,9 +178,6 @@ def fit(self, params, costs, epochs): self.log.error("Params and costs must have the same length") raise ValueError - lparams = np.array(params) - lcosts = np.expand_dims(np.array(costs), axis=1) - # The general training procedure is as follows: # - set a threshold based on the current loss # - train for train_epochs epochs @@ -215,30 +192,7 @@ def fit(self, params, costs, epochs): tot = 0 run_start = time.time() for i in range(epochs): - # Split the data into random batches, and train on each batch epoch_start = time.time() - - # Shuffle the params - #state = np.random.get_state() - #np.random.shuffle(lparams) - #np.random.set_state(state) - #np.random.shuffle(lcosts) - #self.tf_session.run(self.train_epoch, - # feed_dict={self.input_placeholder: lparams, - # self.output_placeholder: lcosts, - # self.regularisation_coefficient_placeholder: self.regularisation_coefficient, - # self.keep_prob_placeholder: self.keep_prob, - # }) - #for j in range(math.ceil(len(params) / self.batch_size)): - # batch_input = lparams[j * self.batch_size : (j + 1) * self.batch_size] - # batch_output = lcosts[j * self.batch_size : (j + 1) * self.batch_size] - - # self.tf_session.run(self.train_step, - # feed_dict={self.input_placeholder: batch_input, - # self.output_placeholder: batch_output, - # self.regularisation_coefficient_placeholder: self.regularisation_coefficient, - # self.keep_prob_placeholder: self.keep_prob, - # }) # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) for j in range(math.ceil(len(params) / self.batch_size)): From b1dffc93486a16babe02d4ab631613b71ee7d243 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:06:14 +1000 Subject: [PATCH 057/119] Add comment in net init --- mloop/nnlearner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 38bea56..6a632dc 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -117,6 +117,7 @@ def get_loss_raw(expected, actual): self.loss_raw = get_loss_raw(self.output_placeholder, self.output_var) self.loss_total = self.loss_raw + loss_reg + ## Training # TODO: Set learning rate based on length scale? self.train_step = tf.train.AdamOptimizer().minimize(self.loss_total) From 1f09c65f14d1a2845bc7d250d8603df2e985bd82 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:08:30 +1000 Subject: [PATCH 058/119] Make batch creation slightly more efficient --- mloop/nnlearner.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 6a632dc..da24032 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -179,6 +179,9 @@ def fit(self, params, costs, epochs): self.log.error("Params and costs must have the same length") raise ValueError + lparams = np.array(params) + lcosts = np.expand_dims(np.array(costs), axis=1) + # The general training procedure is as follows: # - set a threshold based on the current loss # - train for train_epochs epochs @@ -199,8 +202,8 @@ def fit(self, params, costs, epochs): for j in range(math.ceil(len(params) / self.batch_size)): batch_start = time.time() batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] - batch_input = [params[index] for index in batch_indices] - batch_output = [[costs[index]] for index in batch_indices] + batch_input = lparams[batch_indices] + batch_output = lcosts[batch_indices] self.tf_session.run(self.train_step, feed_dict={self.input_placeholder: batch_input, self.output_placeholder: batch_output, From da80dfd6955e1f8e14ebbfd15424aeeaa9c55fc3 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:09:19 +1000 Subject: [PATCH 059/119] Remove batch timing --- mloop/nnlearner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index da24032..5081850 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -200,7 +200,6 @@ def fit(self, params, costs, epochs): # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) for j in range(math.ceil(len(params) / self.batch_size)): - batch_start = time.time() batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] batch_input = lparams[batch_indices] batch_output = lcosts[batch_indices] @@ -210,7 +209,6 @@ def fit(self, params, costs, epochs): self.regularisation_coefficient_placeholder: self.regularisation_coefficient, self.keep_prob_placeholder: self.keep_prob, }) - self.log.debug("Batch time: " + str(time.time() - batch_start)) (l, ul) = self._loss(params, costs) self.losses_list.append(l) tot += l From 3250c25513bb034765183f275e68ff2811635421 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:11:16 +1000 Subject: [PATCH 060/119] Only log every 10th epoch's loss Getting the loss is actually pretty slow. --- mloop/nnlearner.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 5081850..fdc7604 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -209,10 +209,9 @@ def fit(self, params, costs, epochs): self.regularisation_coefficient_placeholder: self.regularisation_coefficient, self.keep_prob_placeholder: self.keep_prob, }) - (l, ul) = self._loss(params, costs) - self.losses_list.append(l) - tot += l if i % 10 == 0: + (l, ul) = self._loss(params, costs) + self.losses_list.append(l) self.log.debug('Fit neural network with total training cost ' + str(l) + ', with unregularized cost ' + str(ul)) self.log.debug("Epoch trained for: " + str(time.time() - epoch_start)) From 9b8699f4dd7a028d5fdf40390c2f6393a4db406d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:23:04 +1000 Subject: [PATCH 061/119] Add net construction timing --- mloop/nnlearner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index fdc7604..310737f 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -39,6 +39,8 @@ def __init__(self, regularisation_coefficient, losses_list): self.log = logging.getLogger(__name__) + start = time.time() + self.log.debug("Constructing net") self.graph = tf.Graph() self.tf_session = tf.Session(graph=self.graph) @@ -126,6 +128,7 @@ def get_loss_raw(expected, actual): # Saver for saving and restoring params self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) + self.log.debug("Finished constructing net in: " + str(time.time() - start)) def destroy(self): self.tf_session.close() From 6c622ce76ead678a47fdac5f29bc233c453d2c3d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:23:35 +1000 Subject: [PATCH 062/119] Revert "Initial attempt at shifting batching into TF graph" This reverts commit 5b2331036cb69bab4948a53bb440e09d7b93dfac. --- mloop/nnlearner.py | 44 +++++++++++++++----------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index d2138c5..e3a378b 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -66,19 +66,10 @@ def __init__(self, self.weights = [] self.biases = [] - # Loss function and training - inp, outp = tf.train.shuffle_batch( - tensors=[self.input_placeholder, self.output_placeholder], - batch_size=self.batch_size, - capacity=5000, - min_after_dequeue=0, - enqueue_many=True, - allow_smaller_final_batch=True) - # Input + internal nodes # TODO: Use length scale for setting initial weights? prev_layer_dim = self.num_params - prev_h = inp + prev_h = self.input_placeholder stddev=0.1 for (i, (dim, act)) in enumerate(zip(layer_dims, layer_activations)): self.weights.append(tf.Variable( @@ -101,8 +92,9 @@ def __init__(self, name="bias_out")) self.output_var = tf.matmul(prev_h, self.weights[-1]) + self.biases[-1] + # Loss function and training self.loss_func = ( - tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - outp), + tf.reduce_mean(tf.reduce_sum(tf.square(self.output_var - self.output_placeholder), reduction_indices=[1])) + self.regularisation_coefficient_placeholder * tf.reduce_mean([tf.nn.l2_loss(W) for W in self.weights])) @@ -190,24 +182,18 @@ def fit(self, params, costs, epochs): tot = 0 run_start = time.time() for i in range(epochs): - self.tf_session.run(self.train_step, - feed_dict={self.input_placeholder: params, - self.output_placeholder: [[c] for c in costs], - self.regularisation_coefficient_placeholder: self.regularisation_coefficient, - self.keep_prob_placeholder: self.keep_prob, - }) - ## Split the data into random batches, and train on each batch - #indices = np.random.permutation(len(params)) - #for j in range(math.ceil(len(params) / self.batch_size)): - # batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] - # batch_input = [params[index] for index in batch_indices] - # batch_output = [[costs[index]] for index in batch_indices] - # self.tf_session.run(self.train_step, - # feed_dict={self.input_placeholder: batch_input, - # self.output_placeholder: batch_output, - # self.regularisation_coefficient_placeholder: self.regularisation_coefficient, - # self.keep_prob_placeholder: self.keep_prob, - # }) + # Split the data into random batches, and train on each batch + indices = np.random.permutation(len(params)) + for j in range(math.ceil(len(params) / self.batch_size)): + batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] + batch_input = [params[index] for index in batch_indices] + batch_output = [[costs[index]] for index in batch_indices] + self.tf_session.run(self.train_step, + feed_dict={self.input_placeholder: batch_input, + self.output_placeholder: batch_output, + self.regularisation_coefficient_placeholder: self.regularisation_coefficient, + self.keep_prob_placeholder: self.keep_prob, + }) (l, ul) = self._loss(params, costs) self.losses_list.append(l) tot += l From 07579494f6ac79ac768ddedf8503d18f5cea38ba Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 14:26:52 +1000 Subject: [PATCH 063/119] Remove epoch timing (too spammy) --- mloop/nnlearner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 310737f..409ea1a 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -199,7 +199,6 @@ def fit(self, params, costs, epochs): tot = 0 run_start = time.time() for i in range(epochs): - epoch_start = time.time() # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) for j in range(math.ceil(len(params) / self.batch_size)): @@ -217,7 +216,6 @@ def fit(self, params, costs, epochs): self.losses_list.append(l) self.log.debug('Fit neural network with total training cost ' + str(l) + ', with unregularized cost ' + str(ul)) - self.log.debug("Epoch trained for: " + str(time.time() - epoch_start)) self.log.debug("Run trained for: " + str(time.time() - run_start)) (l, ul) = self._loss(params, costs) From dcb7f0f0d70eaa59bc4a1546b83f767cadb3a233 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 16:29:25 +1000 Subject: [PATCH 064/119] Don't save the learner archive every step --- mloop/learners.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mloop/learners.py b/mloop/learners.py index 0256406..33e2517 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1937,7 +1937,8 @@ def run(self): try: while not self.end_event.is_set(): self.log.debug('Learner waiting for new params event') - self.save_archive() + # TODO: Not doing this because it's slow. Is it necessary? + #self.save_archive() self.wait_for_new_params_event() self.log.debug('NN learner reading costs') self.get_params_and_costs() From c9670d5cfd942b4538ffecec6f160737563cdff9 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 16:30:14 +1000 Subject: [PATCH 065/119] Tidy up some NN logging --- mloop/nnlearner.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 409ea1a..db581e8 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -40,7 +40,7 @@ def __init__(self, losses_list): self.log = logging.getLogger(__name__) start = time.time() - self.log.debug("Constructing net") + self.log.info("Constructing net") self.graph = tf.Graph() self.tf_session = tf.Session(graph=self.graph) @@ -143,7 +143,7 @@ def load(self, archive): ''' Imports the net from an archive dictionary. You must call exactly one of this and init() before calling any other methods. ''' - self.log.debug("Loading neural network") + self.log.info("Loading neural network") self.saver.restore(self.tf_session, "./" + str(archive['saver_path'])) def save(self): @@ -152,7 +152,7 @@ def save(self): ''' # TODO: Use a proper timestamped filename, maybe? path = self.saver.save(self.tf_session, "net.ckpt") - self.log.debug("Saving neural network to: " + path) + self.log.info("Saving neural network to: " + path) return {'saver_path': path} def _loss(self, params, costs): @@ -174,7 +174,7 @@ def fit(self, params, costs, epochs): params (array): array of parameter arrays costs (array): array of costs (associated with the corresponding parameters) ''' - self.log.debug('Fitting neural network') + self.log.info('Fitting neural network') if len(params) == 0: self.log.error('No data provided.') raise ValueError @@ -214,7 +214,7 @@ def fit(self, params, costs, epochs): if i % 10 == 0: (l, ul) = self._loss(params, costs) self.losses_list.append(l) - self.log.debug('Fit neural network with total training cost ' + str(l) + self.log.info('Fit neural network with total training cost ' + str(l) + ', with unregularized cost ' + str(ul)) self.log.debug("Run trained for: " + str(time.time() - run_start)) @@ -344,7 +344,7 @@ def __init__(self, fit_hyperparameters = False): self.log = logging.getLogger(__name__) - self.log.debug('Initialising neural network impl') + self.log.info('Initialising neural network impl') if num_params is None: self.log.error("num_params must be provided") raise ValueError @@ -496,7 +496,6 @@ def fit_neural_net(self, all_params, all_costs): all_params (array): array of all parameter arrays all_costs (array): array of costs (associated with the corresponding parameters) ''' - self.log.debug('Fitting neural network') if len(all_params) == 0: self.log.error('No data provided.') raise ValueError From 81089ba32c0ff160a634ee46ffb200beddcae2b4 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 16:31:00 +1000 Subject: [PATCH 066/119] Attempt to fix race condition in NN learner The problem is that the "new params" event can be seen before the params are actually available on the queue, and the learner can't handle this. Work around it by making the learner better at handling it -- now it'll block waiting for those params for 1s. --- mloop/controllers.py | 6 ++++-- mloop/learners.py | 23 +++++++++++++++++------ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/mloop/controllers.py b/mloop/controllers.py index ea53553..fab50d4 100644 --- a/mloop/controllers.py +++ b/mloop/controllers.py @@ -695,9 +695,11 @@ def _optimization_routine(self): self._put_params_and_out_dict(next_params) self.log.debug('Starting ML optimization.') - self.new_params_event.set() - self.save_archive() + # TODO: This is a race. There's no guarantee that this will be available by the time the + # event is set. self._get_cost_and_in_dict() + self.save_archive() + self.new_params_event.set() self.log.debug('End training runs.') ml_consec = 0 diff --git a/mloop/learners.py b/mloop/learners.py index 33e2517..330b128 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -14,6 +14,7 @@ import logging import datetime import os +import queue import mloop.utilities as mlu import sklearn.gaussian_process as skg import sklearn.gaussian_process.kernels as skk @@ -1764,18 +1765,28 @@ def get_params_and_costs(self): ''' Get the parameters and costs from the queue and place in their appropriate all_[type] arrays. Also updates bad costs, best parameters, and search boundaries given trust region. ''' - if self.costs_in_queue.empty(): - self.log.error('Neural network asked for new parameters but no new costs were provided.') - raise ValueError - new_params = [] new_costs = [] new_uncers = [] new_bads = [] update_bads_flag = False - while not self.costs_in_queue.empty(): - (param, cost, uncer, bad) = self.costs_in_queue.get_nowait() + first_dequeue = True + while True: + if first_dequeue: + try: + # Block for 1s, because there might be a race with the event being set. + (param, cost, uncer, bad) = self.costs_in_queue.get(block=True, timeout=1) + first_dequeue = False + except queue.Empty: + self.log.error('Neural network asked for new parameters but no new costs were provided after 1s.') + raise ValueError + else: + try: + (param, cost, uncer, bad) = self.costs_in_queue.get_nowait() + except queue.Empty: + break + self.costs_count +=1 if bad: From 37f2d2a0eb40c9459b88ecfc4208a6b506875a5c Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 16:35:31 +1000 Subject: [PATCH 067/119] Add support for doing an optimisation run over the net Problem is that with stochastic nets, we can get really weird values with consecutive calls to predict_cost(_gradient). Now you can put the net in a state where you should get more consistent results. --- mloop/learners.py | 2 ++ mloop/nnlearner.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 330b128..1a3ea60 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1913,6 +1913,7 @@ def find_next_parameters(self): self.update_search_params() next_params = None next_cost = float('inf') + self.neural_net_impl.start_opt() for start_params in self.search_params: result = so.minimize(fun = self.predict_cost, x0 = start_params, @@ -1922,6 +1923,7 @@ def find_next_parameters(self): if result.fun < next_cost: next_params = result.x next_cost = result.fun + self.neural_net_impl.stop_opt() # Now tweak the selected parameters to make sure we don't just keep on looking in the same # place (the actual minimum might be a short distance away). # TODO: Rather than using [-0.1, 0.1] we should pick the fuzziness based on what we know diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index db581e8..a097d1e 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -271,6 +271,7 @@ def __init__(self, self.net_creator = net_creator self.nets = [self.net_creator() for _ in range(count)] self.fit_count = 0 + self.opt_net = None def _random_net(self): return self.nets[np.random.randint(0, len(self.nets))] @@ -313,13 +314,24 @@ def cross_validation_loss(self, params, costs): return np.mean([n.cross_validation_loss(params, costs) for n in self.nets]) def predict_cost(self,params): - return self._random_net().predict_cost(params) + if self.opt_net: + return self.opt_net.predict_cost(params) + else: + return self._random_net().predict_cost(params) #return np.mean([n.predict_cost(params) for n in self.nets]) def predict_cost_gradient(self,params): - return self._random_net().predict_cost_gradient(params) + if self.opt_net: + return self.opt_net.predict_cost_gradient(params) + else: + return self._random_net().predict_cost_gradient(params) #return np.mean([n.predict_cost_gradient(params) for n in self.nets]) + def start_opt(self): + self.opt_net = self._random_net() + + def stop_opt(self): + self.opt_net = None class NeuralNetImpl(): ''' @@ -586,3 +598,16 @@ def get_losses(self): Returns a list of training losses experienced by the network. ''' return self.losses_list + + def start_opt(self): + ''' + Starts an optimisation run. Until stop_opt() is called, predict_cost() and + predict_cost_gradient() will return consistent values. + ''' + self.net.start_opt() + + def stop_opt(self): + ''' + Stops an optimisation run. + ''' + self.net.stop_opt() From 81e697eed627d854b6b88482d7070ad6bed76f0e Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Fri, 19 May 2017 16:37:36 +1000 Subject: [PATCH 068/119] Tweak params to do better with many params --- mloop/nnlearner.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index a097d1e..b83da05 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -369,7 +369,7 @@ def __init__(self, # Variables for tracking the current state of hyperparameter fitting. self.last_hyperfit = 0 - self.last_net_reg = 1e-6 + self.last_net_reg = 1e-8 # The samples used to fit the scalers. When set, this will be a tuple of # (params samples, cost samples). @@ -397,13 +397,13 @@ def amazing_abs(_x): return tf.maximum(1 - tf.abs(_x), 0) creator = lambda: SingleNeuralNet( self.num_params, - [64]*5, [tf.nn.relu]*5, - 0.5, # train_threshold_ratio + [64]*5, [gelu_fast]*5, + 0.2, # train_threshold_ratio 16, # batch_size 1., # keep_prob reg, self.losses_list) - return SampledNeuralNet(creator, 3) + return SampledNeuralNet(creator, 1) def _fit_scaler(self): if self.scaler_samples is None: @@ -567,7 +567,7 @@ def fit_neural_net(self, all_params, all_costs): # TODO: Fit depth - self.net.fit(all_params, all_costs, self.epochs if first_fit else int(self.epochs / 10)) + self.net.fit(all_params, all_costs, self.epochs if first_fit else int(self.epochs / 5)) def predict_cost(self,params): ''' From cefbe78373e7eb3de68806129752e3ad9e90eaa5 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Sun, 21 May 2017 21:11:32 +1000 Subject: [PATCH 069/119] Use different vars for initial and subsequent epoch counts --- mloop/nnlearner.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index b83da05..07838f6 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -365,7 +365,8 @@ def __init__(self, self.num_params = num_params self.fit_hyperparameters = fit_hyperparameters - self.epochs = 100 + self.initial_epochs = 100 + self.subsequent_epochs = 20 # Variables for tracking the current state of hyperparameter fitting. self.last_hyperfit = 0 @@ -554,7 +555,7 @@ def fit_neural_net(self, all_params, all_costs): for r in [0.001, 0.01, 0.1, 1, 10]: net = self._make_net(r) net.init() - net.fit(train_params, train_costs, self.epochs) + net.fit(train_params, train_costs, self.initial_epochs) this_cv_loss = net.cross_validation_loss(cv_params, cv_costs) if this_cv_loss < best_cv_loss and this_cv_loss < 0.1 * orig_cv_loss: best_cv_loss = this_cv_loss @@ -567,7 +568,10 @@ def fit_neural_net(self, all_params, all_costs): # TODO: Fit depth - self.net.fit(all_params, all_costs, self.epochs if first_fit else int(self.epochs / 5)) + self.net.fit( + all_params, + all_costs, + self.initial_epochs if first_fit else self.subsequent_epochs) def predict_cost(self,params): ''' From 981b82403e2834ccc2e6cf31a7f662b627953484 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 22 May 2017 17:34:00 +1000 Subject: [PATCH 070/119] Fix x-axis on NN cross sections plot --- mloop/visualizations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 5d2d2e5..ee5baea 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -678,7 +678,7 @@ def plot_cross_sections(self): plt.figure(figure_counter) points = 100 (_,cost_arrays) = self.return_cross_sections(points=points) - rel_params = np.linspace(-5,5,points) + rel_params = np.linspace(0,1,points) for ind in range(self.num_params): plt.plot(rel_params,cost_arrays[ind,:],'-',color=self.param_colors[ind]) if self.has_trust_region: @@ -688,7 +688,7 @@ def plot_cross_sections(self): for ind in range(self.num_params): plt.plot([self.scaled_trust_min[ind],self.scaled_trust_max[ind]],[ytrust,ytrust],'s', color=self.param_colors[ind]) plt.xlabel(scale_param_label) - plt.xlim((-5,5)) + plt.xlim((0,1)) plt.ylabel(cost_label) plt.title('NN Learner: Predicted landscape' + ('with trust regions.' if self.has_trust_region else '.')) artists = [] From 42e65587dedaf96c34d4a42810e3360aac5bb6aa Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 22 May 2017 17:34:59 +1000 Subject: [PATCH 071/119] Set initial weights smartly We use the strategy in https://arxiv.org/pdf/1502.01852.pdf. --- mloop/nnlearner.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 07838f6..b4db130 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -70,12 +70,11 @@ def __init__(self, biases = [] # Input + internal nodes - # TODO: Use length scale for setting initial weights? prev_layer_dim = self.num_params stddev=0.1 for (i, (dim, act)) in enumerate(zip(layer_dims, layer_activations)): weights.append(tf.Variable( - tf.random_normal([prev_layer_dim, dim], stddev=stddev), + tf.random_normal([prev_layer_dim, dim], stddev=1.4/np.sqrt(prev_layer_dim)), name="weight_"+str(i))) biases.append(tf.Variable( tf.random_normal([dim], stddev=stddev), @@ -84,7 +83,7 @@ def __init__(self, # Output node weights.append(tf.Variable( - tf.random_normal([prev_layer_dim, 1], stddev=stddev), + tf.random_normal([prev_layer_dim, 1], stddev=1.4/np.sqrt(prev_layer_dim)), name="weight_out")) biases.append(tf.Variable( tf.random_normal([1], stddev=stddev), @@ -120,7 +119,6 @@ def get_loss_raw(expected, actual): self.loss_total = self.loss_raw + loss_reg ## Training - # TODO: Set learning rate based on length scale? self.train_step = tf.train.AdamOptimizer().minimize(self.loss_total) # Initialiser for ... initialising From 0364fdca7fc14d028e34d478cc427619622fd6a9 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 22 May 2017 17:37:15 +1000 Subject: [PATCH 072/119] Rename stddev to bias_stddev --- mloop/nnlearner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index b4db130..d3d3dca 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -71,13 +71,13 @@ def __init__(self, # Input + internal nodes prev_layer_dim = self.num_params - stddev=0.1 + bias_stddev=0.1 for (i, (dim, act)) in enumerate(zip(layer_dims, layer_activations)): weights.append(tf.Variable( tf.random_normal([prev_layer_dim, dim], stddev=1.4/np.sqrt(prev_layer_dim)), name="weight_"+str(i))) biases.append(tf.Variable( - tf.random_normal([dim], stddev=stddev), + tf.random_normal([dim], stddev=bias_stddev), name="bias_"+str(i))) prev_layer_dim = dim @@ -86,7 +86,7 @@ def __init__(self, tf.random_normal([prev_layer_dim, 1], stddev=1.4/np.sqrt(prev_layer_dim)), name="weight_out")) biases.append(tf.Variable( - tf.random_normal([1], stddev=stddev), + tf.random_normal([1], stddev=bias_stddev), name="bias_out")) # Get the output var given an input var From 96c17c875baabd355ec3cdcef432860925abf491 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 22 May 2017 17:37:38 +1000 Subject: [PATCH 073/119] Use correct per-layer activation functions This was a typo when doing the refactor before (to split creating variables from wiring them up). It would cause us to use the function specified for the last layer in every layer. --- mloop/nnlearner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index d3d3dca..68475ba 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -72,7 +72,7 @@ def __init__(self, # Input + internal nodes prev_layer_dim = self.num_params bias_stddev=0.1 - for (i, (dim, act)) in enumerate(zip(layer_dims, layer_activations)): + for (i, dim) in enumerate(layer_dims): weights.append(tf.Variable( tf.random_normal([prev_layer_dim, dim], stddev=1.4/np.sqrt(prev_layer_dim)), name="weight_"+str(i))) @@ -92,7 +92,7 @@ def __init__(self, # Get the output var given an input var def get_output_var(input_var): prev_h = input_var - for w, b in zip(weights[:-1], biases[:-1]): + for w, b, act in zip(weights[:-1], biases[:-1], layer_activations): prev_h = tf.nn.dropout( act(tf.matmul(prev_h, w) + b), keep_prob=self.keep_prob_placeholder) From 7788e8b4da02a24436bc2bd4bca3a8dd11c6ebee Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 23 May 2017 15:33:58 +1000 Subject: [PATCH 074/119] Use normal logging for NN learner This will need to be reverted before merging. --- mloop/learners.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 1a3ea60..439c08d 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1677,7 +1677,7 @@ def __init__(self, 'predict_local_minima_at_end':self.predict_local_minima_at_end}) #Remove logger so gaussian process can be safely picked for multiprocessing on Windows - self.log = None + #self.log = None def _construct_net(self): self.neural_net_impl = mlnn.NeuralNetImpl(self.num_params) @@ -1942,7 +1942,7 @@ def run(self): ''' #logging to the main log file from a process (as apposed to a thread) in cpython is currently buggy on windows and/or python 2.7 #current solution is to only log to the console for warning and above from a process - self.log = mp.log_to_stderr(logging.WARNING) + #self.log = mp.log_to_stderr(logging.WARNING) # The network needs to be created in the same process in which it runs self.create_neural_net() From 2b2bc8a36f86cd1b67477719f13f9efd5a45a717 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 23 May 2017 15:34:25 +1000 Subject: [PATCH 075/119] Add logging when searching for params --- mloop/learners.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mloop/learners.py b/mloop/learners.py index 439c08d..43dc0e4 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1934,6 +1934,8 @@ def find_next_parameters(self): # cleverly would probably correspond to introducing some kind of uncertainty-based biasing # (like the GP). #next_params = next_params + nr.uniform(-0.1, 0.1, size=next_params.shape) + self.log.debug("Suggesting params " + str(next_params) + " with predicted cost: " + + str(next_cost)) return next_params def run(self): From a11ccee417f08a6b6947874fdfddcdd18ba35cfc Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 23 May 2017 15:34:50 +1000 Subject: [PATCH 076/119] Updates to NN visualizations Specifically, start showing param vs run again, and also plot the predicted cross sections about the predicted best params (rather than the best params found). --- mloop/visualizations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index ee5baea..7349672 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -111,8 +111,8 @@ def create_controller_visualizations(filename, visualization = ControllerVisualizer(filename,file_type=file_type) if plot_cost_vs_run: visualization.plot_cost_vs_run() - #if plot_parameters_vs_run: - # visualization.plot_parameters_vs_run() + if plot_parameters_vs_run: + visualization.plot_parameters_vs_run() #if plot_parameters_vs_cost: # visualization.plot_parameters_vs_cost() @@ -677,7 +677,7 @@ def plot_cross_sections(self): figure_counter += 1 plt.figure(figure_counter) points = 100 - (_,cost_arrays) = self.return_cross_sections(points=points) + (_,cost_arrays) = self.return_cross_sections(points=points, cross_section_center=self.find_next_parameters()) rel_params = np.linspace(0,1,points) for ind in range(self.num_params): plt.plot(rel_params,cost_arrays[ind,:],'-',color=self.param_colors[ind]) From 0410e8ceacf44f442ea5ff0171fa11e875f24b0b Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 13:40:12 +1000 Subject: [PATCH 077/119] Bump up bias initialisation standard deviation This will cause more randomness in the initial and trained nets (I think). --- mloop/nnlearner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 68475ba..765bae7 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -71,7 +71,7 @@ def __init__(self, # Input + internal nodes prev_layer_dim = self.num_params - bias_stddev=0.1 + bias_stddev=0.5 for (i, dim) in enumerate(layer_dims): weights.append(tf.Variable( tf.random_normal([prev_layer_dim, dim], stddev=1.4/np.sqrt(prev_layer_dim)), From 5f4b23a23d367e51565e083ee46e174abd995557 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 13:41:59 +1000 Subject: [PATCH 078/119] Remove some old TODOs --- mloop/nnlearner.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 765bae7..31b8127 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -524,11 +524,6 @@ def fit_neural_net(self, all_params, all_costs): all_params, all_costs = self._scale_params_and_cost_list(all_params, all_costs) - # TODO: Consider adding some kind of "cost capping". Our NNs will never predict costs going - # off to infinity, so we could be "wasting" training cost due to totally irrelevant points. - # If we capped the costs to some value then this might help. Note that this is really just - # another form of cost scaling. - if self.fit_hyperparameters: # Every 20 fits (starting at 5, just because), re-fit the hyperparameters if int(len(all_params + 5) / 20) > self.last_hyperfit: @@ -564,8 +559,6 @@ def fit_neural_net(self, all_params, all_costs): else: net.destroy() - # TODO: Fit depth - self.net.fit( all_params, all_costs, From 6630983d862e481f486ba5996803edbe32c86379 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 13:55:52 +1000 Subject: [PATCH 079/119] Use timestamped file for net archive --- mloop/nnlearner.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 31b8127..2053d72 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -1,7 +1,9 @@ +import datetime import logging import math import time +import mloop.utilities as mlu import numpy as np import sklearn.preprocessing as skp import tensorflow as tf @@ -40,6 +42,14 @@ def __init__(self, losses_list): self.log = logging.getLogger(__name__) start = time.time() + + # TODO: Use a filename specific to this object? + self.save_archive_filename = ( + mlu.archive_foldername + + "neural_net_archive_" + + mlu.datetime_to_string(datetime.datetime.now()) + + ".ckpt") + self.log.info("Constructing net") self.graph = tf.Graph() self.tf_session = tf.Session(graph=self.graph) @@ -148,8 +158,7 @@ def save(self): ''' Exports the net to an archive dictionary. ''' - # TODO: Use a proper timestamped filename, maybe? - path = self.saver.save(self.tf_session, "net.ckpt") + path = self.saver.save(self.tf_session, self.save_archive_filename) self.log.info("Saving neural network to: " + path) return {'saver_path': path} From b84a74e3bb0ca7eef8504b20adb82473f8fb10bc Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 14:07:38 +1000 Subject: [PATCH 080/119] Tidying/renaming in neural net --- mloop/learners.py | 22 +++++++++++----------- mloop/nnlearner.py | 21 ++++++++++++++++++--- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 43dc0e4..dbe0c3a 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1680,7 +1680,7 @@ def __init__(self, #self.log = None def _construct_net(self): - self.neural_net_impl = mlnn.NeuralNetImpl(self.num_params) + self.neural_net = mlnn.NeuralNet(self.num_params) def _init_cost_scaler(self): ''' @@ -1694,7 +1694,7 @@ def create_neural_net(self): Creates the neural net. Must be called from the same process as fit_neural_net, predict_cost and predict_costs_from_param_array. ''' self._construct_net() - self.neural_net_impl.init() + self.neural_net.init() def import_neural_net(self): ''' @@ -1703,7 +1703,7 @@ def import_neural_net(self): if not self.training_dict: raise ValueError self._construct_net() - self.neural_net_impl.load(self.training_dict['net']) + self.neural_net.load(self.training_dict['net']) def fit_neural_net(self): ''' @@ -1713,7 +1713,7 @@ def fit_neural_net(self): ''' self.scaled_costs = self.cost_scaler.transform(self.all_costs[:,np.newaxis])[:,0] - self.neural_net_impl.fit_neural_net(self.all_params, self.scaled_costs) + self.neural_net.fit_neural_net(self.all_params, self.scaled_costs) def predict_cost(self,params): ''' @@ -1722,7 +1722,7 @@ def predict_cost(self,params): Returns: float : Predicted cost at paramters ''' - return self.neural_net_impl.predict_cost(params) + return self.neural_net.predict_cost(params) def predict_cost_gradient(self,params): ''' @@ -1732,7 +1732,7 @@ def predict_cost_gradient(self,params): float : Predicted gradient at paramters ''' # scipy.optimize.minimize doesn't seem to like a 32-bit Jacobian, so we convert to 64 - return self.neural_net_impl.predict_cost_gradient(params).astype(np.float64) + return self.neural_net.predict_cost_gradient(params).astype(np.float64) def predict_costs_from_param_array(self,params): @@ -1899,8 +1899,8 @@ def update_archive(self): 'length_scale':self.length_scale, 'noise_level':self.noise_level, 'cost_scaler_init_index':self.cost_scaler_init_index}) - if self.neural_net_impl: - self.archive_dict.update({'net':self.neural_net_impl.save()}) + if self.neural_net: + self.archive_dict.update({'net':self.neural_net.save()}) def find_next_parameters(self): ''' @@ -1913,7 +1913,7 @@ def find_next_parameters(self): self.update_search_params() next_params = None next_cost = float('inf') - self.neural_net_impl.start_opt() + self.neural_net.start_opt() for start_params in self.search_params: result = so.minimize(fun = self.predict_cost, x0 = start_params, @@ -1923,7 +1923,7 @@ def find_next_parameters(self): if result.fun < next_cost: next_params = result.x next_cost = result.fun - self.neural_net_impl.stop_opt() + self.neural_net.stop_opt() # Now tweak the selected parameters to make sure we don't just keep on looking in the same # place (the actual minimum might be a short distance away). # TODO: Rather than using [-0.1, 0.1] we should pick the fuzziness based on what we know @@ -2068,4 +2068,4 @@ def find_local_minima(self): # Methods for debugging/analysis. def get_losses(self): - return self.neural_net_impl.get_losses() + return self.neural_net.get_losses() diff --git a/mloop/nnlearner.py b/mloop/nnlearner.py index 2053d72..5cec4d2 100644 --- a/mloop/nnlearner.py +++ b/mloop/nnlearner.py @@ -270,7 +270,21 @@ def predict_cost_gradient(self,params): ''' return self.tf_session.run(self.output_var_gradient, feed_dict={self.input_placeholder: [params]})[0][0] + class SampledNeuralNet(): + ''' + A "neural network" that tracks a collection of SingleNeuralNet objects, and predicts the landscape + by sampling from that collection. + + This must run in the same process in which it's created. + + This class should be considered private to this module. + + Args: + net_creator: Callable that creates and returns a new SingleNeuralNet. + count: The number of individual networks to track. + ''' + def __init__(self, net_creator, count): @@ -340,7 +354,7 @@ def start_opt(self): def stop_opt(self): self.opt_net = None -class NeuralNetImpl(): +class NeuralNet(): ''' Neural network implementation. This may actually create multiple neural networks with different topologies or hyperparameters, and switch between them based on the data. @@ -401,8 +415,6 @@ def _make_net(self, reg): ''' def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh(tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3)))) - def amazing_abs(_x): - return tf.maximum(1 - tf.abs(_x), 0) creator = lambda: SingleNeuralNet( self.num_params, [64]*5, [gelu_fast]*5, @@ -414,6 +426,9 @@ def amazing_abs(_x): return SampledNeuralNet(creator, 1) def _fit_scaler(self): + ''' + Fits the cost and param scalers based on the scaler_samples member variable. + ''' if self.scaler_samples is None: self.log.error("_fit_scaler() called before samples set") raise ValueError From 3b06e69797eee942b8efc1e746af62d750b2343a Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 14:10:33 +1000 Subject: [PATCH 081/119] Rename nnlearner to neuralnet NeuralNet really is just supposed to be a neural network / function approximator. --- mloop/learners.py | 2 +- mloop/{nnlearner.py => neuralnet.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename mloop/{nnlearner.py => neuralnet.py} (100%) diff --git a/mloop/learners.py b/mloop/learners.py index dbe0c3a..09bb2ee 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -20,7 +20,7 @@ import sklearn.gaussian_process.kernels as skk import sklearn.preprocessing as skp import multiprocessing as mp -import mloop.nnlearner as mlnn +import mloop.neuralnet as mlnn learner_thread_count = 0 default_learner_archive_filename = 'learner_archive' diff --git a/mloop/nnlearner.py b/mloop/neuralnet.py similarity index 100% rename from mloop/nnlearner.py rename to mloop/neuralnet.py From 70e91f99b2fed1947db3225f1453a17598f7a45b Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 14:40:13 +1000 Subject: [PATCH 082/119] Tidy up NeuralNetLearner Remove unused attributes, fix some documentation. --- mloop/learners.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 09bb2ee..d64e898 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1477,7 +1477,6 @@ class NeuralNetLearner(Learner, mp.Process): end_event (event): Event to trigger end of learner. Keyword Args: - length_scale (Optional [array]): The initial guess for length scale(s) of the gaussian process. The array can either of size one or the number of parameters or None. If it is size one, it is assumed all the correlation lengths are the same. If it is the number of the parameters then all the parameters have their own independent length scale. If it is None, it is assumed all the length scales should be independent and they are all given an initial value of 1. Default None. trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction. default_bad_cost (Optional [float]): If a run is reported as bad and default_bad_cost is provided, the cost for the bad run is set to this default value. If default_bad_cost is None, then the worst cost received is set to all the bad runs. Default None. default_bad_uncertainty (Optional [float]): If a run is reported as bad and default_bad_uncertainty is provided, the uncertainty for the bad run is set to this default value. If default_bad_uncertainty is None, then the uncertainty is set to a tenth of the best to worst cost range. Default None. @@ -1486,11 +1485,10 @@ class NeuralNetLearner(Learner, mp.Process): predict_local_minima_at_end (Optional [bool]): If True finds all minima when the learner is ended. Does not if False. Default False. Attributes: - TODO: Update these. all_params (array): Array containing all parameters sent to learner. all_costs (array): Array containing all costs sent to learner. all_uncers (array): Array containing all uncertainties sent to learner. - scaled_costs (array): Array contaning all the costs scaled to have zero mean and a standard deviation of 1. Needed for training the gaussian process. + scaled_costs (array): Array contaning all the costs scaled to have zero mean and a standard deviation of 1. bad_run_indexs (list): list of indexes to all runs that were marked as bad. best_cost (float): Minimum received cost, updated during execution. best_params (array): Parameters of best run. (reference to element in params array). @@ -1499,18 +1497,16 @@ class NeuralNetLearner(Learner, mp.Process): worst_index (int): index to run with worst cost. cost_range (float): Difference between worst_cost and best_cost generation_num (int): Number of sets of parameters to generate each generation. Set to 5. - length_scale_history (list): List of length scales found after each fit. noise_level_history (list): List of noise levels found after each fit. - fit_count (int): Counter for the number of times the gaussian process has been fit. cost_count (int): Counter for the number of costs, parameters and uncertainties added to learner. params_count (int): Counter for the number of parameters asked to be evaluated by the learner. - gaussian_process (GaussianProcessRegressor): Gaussian process that is fitted to data and used to make predictions + neural_net (NeuralNet): Neural net that is fitted to data and used to make predictions. cost_scaler (StandardScaler): Scaler used to normalize the provided costs. + cost_scaler_init_index (int): The number of params to use to initialise cost_scaler. has_trust_region (bool): Whether the learner has a trust region. ''' def __init__(self, - update_hyperparameters = True, trust_region=None, default_bad_cost = None, default_bad_uncertainty = None, @@ -1536,7 +1532,6 @@ def __init__(self, #Counters self.costs_count = int(self.training_dict['costs_count']) - self.fit_count = int(self.training_dict['fit_count']) self.params_count = int(self.training_dict['params_count']) #Data from previous experiment @@ -1604,11 +1599,9 @@ def __init__(self, self.worst_cost = float('-inf') self.worst_index = 0 self.cost_range = float('inf') - self.length_scale_history = [] self.noise_level_history = [] self.costs_count = 0 - self.fit_count = 0 self.params_count = 0 self.has_local_minima = False @@ -1633,7 +1626,6 @@ def __init__(self, self.bad_uncer_frac = 0.1 #Fraction of cost range to set a bad run uncertainty #Optional user set variables - self.update_hyperparameters = bool(update_hyperparameters) self.predict_global_minima_at_end = bool(predict_global_minima_at_end) self.predict_local_minima_at_end = bool(predict_local_minima_at_end) if default_bad_cost is not None: @@ -1892,10 +1884,8 @@ def update_archive(self): 'worst_cost':self.worst_cost, 'worst_index':self.worst_index, 'cost_range':self.cost_range, - 'fit_count':self.fit_count, 'costs_count':self.costs_count, 'params_count':self.params_count, - 'update_hyperparameters':self.update_hyperparameters, 'length_scale':self.length_scale, 'noise_level':self.noise_level, 'cost_scaler_init_index':self.cost_scaler_init_index}) From 9f80041bb3c34e0e2c39afd2afdc7eb246cdbe7d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 14:41:08 +1000 Subject: [PATCH 083/119] Update description of neural net learner --- mloop/learners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/learners.py b/mloop/learners.py index d64e898..902f1f4 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1469,7 +1469,7 @@ def find_local_minima(self): class NeuralNetLearner(Learner, mp.Process): ''' - Shell of Neural Net Learner. + Learner that uses a neural network for function approximation. Args: params_out_queue (queue): Queue for parameters sent to controller. From bf83469b3c28868c0251847b9a8bf3d95d46f18d Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 14:41:20 +1000 Subject: [PATCH 084/119] Set minimum_uncertainty in nnl --- mloop/learners.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mloop/learners.py b/mloop/learners.py index 902f1f4..e8ef430 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1512,6 +1512,7 @@ def __init__(self, default_bad_uncertainty = None, nn_training_filename =None, nn_training_file_type ='txt', + minimum_uncertainty = 1e-8, predict_global_minima_at_end = True, predict_local_minima_at_end = False, **kwargs): @@ -1628,6 +1629,7 @@ def __init__(self, #Optional user set variables self.predict_global_minima_at_end = bool(predict_global_minima_at_end) self.predict_local_minima_at_end = bool(predict_local_minima_at_end) + self.minimum_uncertainty = float(minimum_uncertainty) if default_bad_cost is not None: self.default_bad_cost = float(default_bad_cost) else: @@ -1643,6 +1645,9 @@ def __init__(self, else: self.log.error('Both the default cost and uncertainty must be set for a bad run or they must both be set to None.') raise ValueError + if self.minimum_uncertainty <= 0: + self.log.error('Minimum uncertainty must be larger than zero for the learner.') + raise ValueError self._set_trust_region(trust_region) From 728c2047c22cd4eeb307e16de50a9a7c5f549dd2 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 14:57:01 +1000 Subject: [PATCH 085/119] Untrack local_tests --- local_tests/.gitignore | 6 --- local_tests/MLOOPQuickTest.py | 123 ------------------------------------------ 2 files changed, 129 deletions(-) delete mode 100644 local_tests/.gitignore delete mode 100644 local_tests/MLOOPQuickTest.py diff --git a/local_tests/.gitignore b/local_tests/.gitignore deleted file mode 100644 index 76f97fa..0000000 --- a/local_tests/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -M-LOOP_archives/ -M-LOOP_logs/ -output/ -results/ -checkpoint -net* diff --git a/local_tests/MLOOPQuickTest.py b/local_tests/MLOOPQuickTest.py deleted file mode 100644 index 660f654..0000000 --- a/local_tests/MLOOPQuickTest.py +++ /dev/null @@ -1,123 +0,0 @@ -''' -Created on 15Jun.,2016 - -@author: michaelhush -''' -from __future__ import absolute_import, division, print_function -__metaclass__ = type - -import mloop.controllers as mlc -import mloop.interfaces as mli -import mloop.testing as mlt -import mloop.visualizations as mlv -import mloop.utilities as mlu -import numpy as np -import logging -import matplotlib.pyplot as plt - -def main(): - - mlu.config_logger(file_log_level=logging.DEBUG, - console_log_level=logging.DEBUG) - - log = logging.getLogger('mloop.main') - - log.info("Making queues") - - tnum_params = 10 - - tmin_boundary=[-10.] * tnum_params - tmax_boundary=[ 10.] * tnum_params - - tmax_num_runs = 40 - tcost = -1. - - log.info("Making landscape") - - tlandscape = mlt.TestLandscape(num_params = tnum_params) - - tlandscape.set_random_quadratic_landscape(np.array(tmin_boundary),np.array(tmax_boundary),random_scale=False) - - which = 4 - if which == 0: - log.info("Running random controller") - interface = mli.TestInterface(test_landscape=tlandscape) - random_controller = mlc.create_controller(interface, - controller_type='random', - max_num_runs= tmax_num_runs, - num_params=tnum_params, - min_boundary=tmin_boundary, - max_boundary=tmax_boundary, - trust_region=0.1) - random_controller.optimize() - mlv.show_all_default_visualizations(random_controller, show_plots=False) - log.info("Random controller ended") - elif which == 1: - log.info("Running Nelder-Mead controller") - interface = mli.TestInterface(test_landscape=tlandscape) - nelder_mead_controller = mlc.create_controller(interface, - controller_type='nelder_mead', - max_num_runs= tmax_num_runs, - num_params=tnum_params, - min_boundary=tmin_boundary, - max_boundary=tmax_boundary) - nelder_mead_controller.optimize() - mlv.show_all_default_visualizations(nelder_mead_controller, show_plots=False) - log.info("Running Nelder-Mead controller") - elif which == 2: - log.info("Running differential evolution controller") - interface = mli.TestInterface(test_landscape=tlandscape) - diff_evo_controller = mlc.create_controller(interface, - controller_type='differential_evolution', - evolution_strategy='rand2', - max_num_runs= tmax_num_runs, - num_params=tnum_params, - min_boundary=tmin_boundary, - max_boundary=tmax_boundary) - diff_evo_controller.optimize() - mlv.show_all_default_visualizations(diff_evo_controller, show_plots=False) - log.info("Running differential evolution controller") - elif which == 3: - log.info("Running Gaussian process controller") - interface = mli.TestInterface(test_landscape=tlandscape) - gp_controller = mlc.create_controller(interface, - controller_type='gaussian_process', - no_delay=False, - max_num_runs= tmax_num_runs, - target_cost = tcost, - num_params=tnum_params, - min_boundary=tmin_boundary, - max_boundary=tmax_boundary) - #length_scale = 1.) - gp_controller.optimize() - mlv.show_all_default_visualizations(gp_controller, show_plots=False) - log.info("Gaussian process controller ended") - elif which == 4: - log.info("Running Neural net controller") - interface = mli.TestInterface(test_landscape=tlandscape) - nn_controller = mlc.create_controller(interface, - controller_type='neural_net', - no_delay=False, - max_num_runs= tmax_num_runs, - target_cost = tcost, - num_params=tnum_params, - min_boundary=tmin_boundary, - max_boundary=tmax_boundary) - nn_controller.optimize() - mlv.show_all_default_visualizations(nn_controller, show_plots=False) - log.info("Neural net process controller ended") - else: - raise ValueError - - log.info("True minimum:" + str(tlandscape.expected_minima)) - log.info("True minimum value:" + str(tlandscape.cost_function(p=tlandscape.expected_minima))) - - log.info("Visualizations started.") - - plt.show() - - log.info("MLOOP Quick Test ended") - - -if __name__ == '__main__': - main() From 22e981f8c9f6d7307d343b6f9a115a649d10dcdf Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 14:59:00 +1000 Subject: [PATCH 086/119] Tidy visualisations --- mloop/visualizations.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 7349672..681a438 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -186,11 +186,6 @@ def plot_cost_vs_run(self): for ut in self.unique_types: artists.append(plt.Line2D((0,1),(0,0), color=_color_from_controller_name(ut), marker='o', linestyle='')) plt.legend(artists,self.unique_types,loc=legend_loc) - f = open("output.txt", "w") - for (x, y) in zip(self.in_numbers, self.in_costs): - f.write(str(x) + " " + str(y) + "\n") - f.close() - def plot_parameters_vs_run(self): ''' @@ -577,8 +572,8 @@ def create_neural_net_learner_visualizations(filename, visualization = NeuralNetVisualizer(filename, file_type=file_type) if plot_cross_sections: visualization.plot_cross_sections() - #visualization.plot_surface() - visualization.plot_density_surface() + visualization.plot_surface() + visualization.plot_density_surface() visualization.plot_losses() From 5dc3ed7015fdcb92c2a1e622d214b009e70fbef6 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 15:00:46 +1000 Subject: [PATCH 087/119] Undo logging hackery --- mloop/learners.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index e8ef430..21d3889 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1674,7 +1674,7 @@ def __init__(self, 'predict_local_minima_at_end':self.predict_local_minima_at_end}) #Remove logger so gaussian process can be safely picked for multiprocessing on Windows - #self.log = None + self.log = None def _construct_net(self): self.neural_net = mlnn.NeuralNet(self.num_params) @@ -1939,7 +1939,7 @@ def run(self): ''' #logging to the main log file from a process (as apposed to a thread) in cpython is currently buggy on windows and/or python 2.7 #current solution is to only log to the console for warning and above from a process - #self.log = mp.log_to_stderr(logging.WARNING) + self.log = mp.log_to_stderr(logging.WARNING) # The network needs to be created in the same process in which it runs self.create_neural_net() From af4a2f9829e60ec12acf2e4b3fe64d8aff3b46f5 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 16:41:10 +1000 Subject: [PATCH 088/119] Destroy the net when we're done --- mloop/learners.py | 1 + mloop/neuralnet.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/mloop/learners.py b/mloop/learners.py index 21d3889..b7c8aa9 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1980,6 +1980,7 @@ def run(self): 'local_minima_costs':self.local_minima_costs}) self.params_out_queue.put(end_dict) self._shut_down() + self.neural_net.destroy() self.log.debug('Ended neural network learner') def find_global_minima(self): diff --git a/mloop/neuralnet.py b/mloop/neuralnet.py index 5cec4d2..6599fd2 100644 --- a/mloop/neuralnet.py +++ b/mloop/neuralnet.py @@ -523,6 +523,13 @@ def save(self): 'net': self.net.save(), } + def destroy(self): + ''' + Destroys the net. + ''' + if not self.net is None: + self.net.destroy() + def fit_neural_net(self, all_params, all_costs): ''' Fits the neural net to the data. From 9763289f7f97f39663f45aefad3aa37f8669bc62 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 20:56:40 +1000 Subject: [PATCH 089/119] Add tensorflow to requirements and setup.py --- requirements.txt | 3 ++- setup.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5012a5c..23ef29b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ scipy>=0.17 matplotlib>=1.5 pytest>=2.9 scikit-learn>=0.18 -setuptools>=26 \ No newline at end of file +setuptools>=26 +tensorflow>=1.1.0 diff --git a/setup.py b/setup.py index c6b6017..07b67e3 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,8 @@ def main(): 'scipy>=0.17', 'matplotlib>=1.5', 'pytest>=2.9', - 'scikit-learn>=0.18'], + 'scikit-learn>=0.18', + 'tensorflow>=1.1.0'], tests_require=['pytest','setuptools>=26'], package_data = { @@ -60,4 +61,4 @@ def main(): if __name__=='__main__': mp.freeze_support() - main() \ No newline at end of file + main() From d5425fd1bb017c98427e8a516abba1ec3b592fc0 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 21:07:48 +1000 Subject: [PATCH 090/119] Use mlu.empty_exception instead of queue.Empty --- mloop/learners.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 21d3889..3c8a165 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -14,7 +14,6 @@ import logging import datetime import os -import queue import mloop.utilities as mlu import sklearn.gaussian_process as skg import sklearn.gaussian_process.kernels as skk @@ -1775,13 +1774,13 @@ def get_params_and_costs(self): # Block for 1s, because there might be a race with the event being set. (param, cost, uncer, bad) = self.costs_in_queue.get(block=True, timeout=1) first_dequeue = False - except queue.Empty: + except mlu.empty_exception: self.log.error('Neural network asked for new parameters but no new costs were provided after 1s.') raise ValueError else: try: (param, cost, uncer, bad) = self.costs_in_queue.get_nowait() - except queue.Empty: + except mlu.empty_exception: break self.costs_count +=1 From 42b07c65170fc3c362bf3d1eac82ed3ca3a9964a Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 22:15:36 +1000 Subject: [PATCH 091/119] Run Travis CI on trusty --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index e99f535..1b18501 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,5 +11,5 @@ install: script: python setup.py test os: - linux - - \ No newline at end of file +# Need trusty+ for tensorflow +dist: trusty From 5a87daafd7da3d67865b5c453c93d6408f11b37c Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 22:15:36 +1000 Subject: [PATCH 092/119] Run Travis CI on trusty --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index e99f535..1b18501 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,5 +11,5 @@ install: script: python setup.py test os: - linux - - \ No newline at end of file +# Need trusty+ for tensorflow +dist: trusty From 56506312f71e90f5792d7cded2b3aa4d60d6c04a Mon Sep 17 00:00:00 2001 From: GEM openSUSE Date: Mon, 29 May 2017 14:16:07 +1000 Subject: [PATCH 093/119] Fix NN exception on python 2 On python 2 math.ceil returns a float, and this was making range unhappy. --- mloop/neuralnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/neuralnet.py b/mloop/neuralnet.py index 5cec4d2..b50e16f 100644 --- a/mloop/neuralnet.py +++ b/mloop/neuralnet.py @@ -208,7 +208,7 @@ def fit(self, params, costs, epochs): for i in range(epochs): # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) - for j in range(math.ceil(len(params) / self.batch_size)): + for j in range(int(math.ceil(len(params) / self.batch_size))): batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] batch_input = lparams[batch_indices] batch_output = lcosts[batch_indices] From 9e32eb62d809d747277e1d175974c6728abd131e Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 29 May 2017 15:54:56 +1000 Subject: [PATCH 094/119] Log learner type with run number --- mloop/controllers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mloop/controllers.py b/mloop/controllers.py index fab50d4..baa4bf3 100644 --- a/mloop/controllers.py +++ b/mloop/controllers.py @@ -690,7 +690,7 @@ def _optimization_routine(self): if self.check_end_conditions(): #Start last training run - self.log.info('Run:' + str(self.num_in_costs +1)) + self.log.info('Run:' + str(self.num_in_costs +1) + ' (training)') next_params = self._next_params() self._put_params_and_out_dict(next_params) @@ -706,12 +706,14 @@ def _optimization_routine(self): ml_count = 0 while self.check_end_conditions(): - self.log.info('Run:' + str(self.num_in_costs +1)) + run_num = self.num_in_costs + 1 if ml_consec==self.generation_num or (self.no_delay and self.ml_learner_params_queue.empty()): + self.log.info('Run:' + str(run_num) + ' (trainer)') next_params = self._next_params() self._put_params_and_out_dict(next_params) ml_consec = 0 else: + self.log.info('Run:' + str(run_num) + ' (machine learner)') next_params = self.ml_learner_params_queue.get() super(MachineLearnerController,self)._put_params_and_out_dict(next_params, param_type=self.machine_learner_type) ml_consec += 1 From 6c9e868e0448501118bf9922b7348e0671ffc7ee Mon Sep 17 00:00:00 2001 From: GEM openSUSE Date: Mon, 29 May 2017 16:21:09 +1000 Subject: [PATCH 095/119] Add "training" logging to all training runs --- mloop/controllers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/controllers.py b/mloop/controllers.py index baa4bf3..a1c1e04 100644 --- a/mloop/controllers.py +++ b/mloop/controllers.py @@ -675,14 +675,14 @@ def _optimization_routine(self): ''' #Run the training runs using the standard optimization routine. self.log.debug('Starting training optimization.') - self.log.info('Run:' + str(self.num_in_costs +1)) + self.log.info('Run:' + str(self.num_in_costs +1) + ' (training)') next_params = self._first_params() self._put_params_and_out_dict(next_params) self.save_archive() self._get_cost_and_in_dict() while (self.num_in_costs < self.num_training_runs) and self.check_end_conditions(): - self.log.info('Run:' + str(self.num_in_costs +1)) + self.log.info('Run:' + str(self.num_in_costs +1) + ' (training)') next_params = self._next_params() self._put_params_and_out_dict(next_params) self.save_archive() From e53d3fcb3fabe81d80e5fcaa0ff91f15e275ece6 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 20:56:40 +1000 Subject: [PATCH 096/119] Add tensorflow to requirements and setup.py --- requirements.txt | 3 ++- setup.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5012a5c..23ef29b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ scipy>=0.17 matplotlib>=1.5 pytest>=2.9 scikit-learn>=0.18 -setuptools>=26 \ No newline at end of file +setuptools>=26 +tensorflow>=1.1.0 diff --git a/setup.py b/setup.py index c6b6017..07b67e3 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,8 @@ def main(): 'scipy>=0.17', 'matplotlib>=1.5', 'pytest>=2.9', - 'scikit-learn>=0.18'], + 'scikit-learn>=0.18', + 'tensorflow>=1.1.0'], tests_require=['pytest','setuptools>=26'], package_data = { @@ -60,4 +61,4 @@ def main(): if __name__=='__main__': mp.freeze_support() - main() \ No newline at end of file + main() From 15538b636120ab2ed6f80853c1b8c2ef13833f51 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 21:07:48 +1000 Subject: [PATCH 097/119] Use mlu.empty_exception instead of queue.Empty --- mloop/learners.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index b7c8aa9..11aeb1f 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -14,7 +14,6 @@ import logging import datetime import os -import queue import mloop.utilities as mlu import sklearn.gaussian_process as skg import sklearn.gaussian_process.kernels as skk @@ -1775,13 +1774,13 @@ def get_params_and_costs(self): # Block for 1s, because there might be a race with the event being set. (param, cost, uncer, bad) = self.costs_in_queue.get(block=True, timeout=1) first_dequeue = False - except queue.Empty: + except mlu.empty_exception: self.log.error('Neural network asked for new parameters but no new costs were provided after 1s.') raise ValueError else: try: (param, cost, uncer, bad) = self.costs_in_queue.get_nowait() - except queue.Empty: + except mlu.empty_exception: break self.costs_count +=1 From 8cb1e450235e62dea9729df174e13f3109e47773 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 24 May 2017 22:15:36 +1000 Subject: [PATCH 098/119] Run Travis CI on trusty --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index e99f535..1b18501 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,5 +11,5 @@ install: script: python setup.py test os: - linux - - \ No newline at end of file +# Need trusty+ for tensorflow +dist: trusty From a12f3e59e8742afb168de45d0512f313aec2d20a Mon Sep 17 00:00:00 2001 From: GEM openSUSE Date: Mon, 29 May 2017 14:16:07 +1000 Subject: [PATCH 099/119] Fix NN exception on python 2 On python 2 math.ceil returns a float, and this was making range unhappy. --- mloop/neuralnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/neuralnet.py b/mloop/neuralnet.py index 6599fd2..e50d7c9 100644 --- a/mloop/neuralnet.py +++ b/mloop/neuralnet.py @@ -208,7 +208,7 @@ def fit(self, params, costs, epochs): for i in range(epochs): # Split the data into random batches, and train on each batch indices = np.random.permutation(len(params)) - for j in range(math.ceil(len(params) / self.batch_size)): + for j in range(int(math.ceil(len(params) / self.batch_size))): batch_indices = indices[j * self.batch_size : (j + 1) * self.batch_size] batch_input = lparams[batch_indices] batch_output = lcosts[batch_indices] From 94d1868df34aa54bc54c64c1f3c54523e5524082 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Mon, 29 May 2017 15:54:56 +1000 Subject: [PATCH 100/119] Log learner type with run number --- mloop/controllers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mloop/controllers.py b/mloop/controllers.py index fab50d4..baa4bf3 100644 --- a/mloop/controllers.py +++ b/mloop/controllers.py @@ -690,7 +690,7 @@ def _optimization_routine(self): if self.check_end_conditions(): #Start last training run - self.log.info('Run:' + str(self.num_in_costs +1)) + self.log.info('Run:' + str(self.num_in_costs +1) + ' (training)') next_params = self._next_params() self._put_params_and_out_dict(next_params) @@ -706,12 +706,14 @@ def _optimization_routine(self): ml_count = 0 while self.check_end_conditions(): - self.log.info('Run:' + str(self.num_in_costs +1)) + run_num = self.num_in_costs + 1 if ml_consec==self.generation_num or (self.no_delay and self.ml_learner_params_queue.empty()): + self.log.info('Run:' + str(run_num) + ' (trainer)') next_params = self._next_params() self._put_params_and_out_dict(next_params) ml_consec = 0 else: + self.log.info('Run:' + str(run_num) + ' (machine learner)') next_params = self.ml_learner_params_queue.get() super(MachineLearnerController,self)._put_params_and_out_dict(next_params, param_type=self.machine_learner_type) ml_consec += 1 From 7f8e9f90af5b15a0a76b2d161d00a03c6b139ef5 Mon Sep 17 00:00:00 2001 From: GEM openSUSE Date: Mon, 29 May 2017 16:21:09 +1000 Subject: [PATCH 101/119] Add "training" logging to all training runs --- mloop/controllers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/controllers.py b/mloop/controllers.py index baa4bf3..a1c1e04 100644 --- a/mloop/controllers.py +++ b/mloop/controllers.py @@ -675,14 +675,14 @@ def _optimization_routine(self): ''' #Run the training runs using the standard optimization routine. self.log.debug('Starting training optimization.') - self.log.info('Run:' + str(self.num_in_costs +1)) + self.log.info('Run:' + str(self.num_in_costs +1) + ' (training)') next_params = self._first_params() self._put_params_and_out_dict(next_params) self.save_archive() self._get_cost_and_in_dict() while (self.num_in_costs < self.num_training_runs) and self.check_end_conditions(): - self.log.info('Run:' + str(self.num_in_costs +1)) + self.log.info('Run:' + str(self.num_in_costs +1) + ' (training)') next_params = self._next_params() self._put_params_and_out_dict(next_params) self.save_archive() From b5cafaf977289891ce19eb7df67bff6560ba85f3 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 30 May 2017 14:47:03 +1000 Subject: [PATCH 102/119] Make NNLearner.fit_neural_net more private --- mloop/learners.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 11aeb1f..bee13b5 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1701,7 +1701,7 @@ def import_neural_net(self): self._construct_net() self.neural_net.load(self.training_dict['net']) - def fit_neural_net(self): + def _fit_neural_net(self): ''' Fits a neural net to the data. @@ -1954,7 +1954,7 @@ def run(self): if self.cost_scaler_init_index is None: self.cost_scaler_init_index = len(self.all_costs) self._init_cost_scaler() - self.fit_neural_net() + self._fit_neural_net() for _ in range(self.generation_num): self.log.debug('Neural network learner generating parameter:'+ str(self.params_count+1)) next_params = self.find_next_parameters() From 4103cfdcd576d01a8277fa6c6c6bc18e88cfeddc Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 30 May 2017 14:49:09 +1000 Subject: [PATCH 103/119] Support multiple nets at the learner level This means we can do things like cycle through nets per generation, interleave param generation and training of those nets, and so on. We can probably remove the sampling from the net itself now. --- mloop/learners.py | 71 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index bee13b5..e49b47f 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1620,6 +1620,7 @@ def __init__(self, #Constants, limits and tolerances self.generation_num = 1 + self.num_nets = 1 self.search_precision = 1.0e-6 self.parameter_searches = max(10,self.num_params) self.hyperparameter_searches = max(10,self.num_params) @@ -1676,7 +1677,7 @@ def __init__(self, self.log = None def _construct_net(self): - self.neural_net = mlnn.NeuralNet(self.num_params) + self.neural_net = [mlnn.NeuralNet(self.num_params) for _ in range(self.num_nets)] def _init_cost_scaler(self): ''' @@ -1690,7 +1691,8 @@ def create_neural_net(self): Creates the neural net. Must be called from the same process as fit_neural_net, predict_cost and predict_costs_from_param_array. ''' self._construct_net() - self.neural_net.init() + for n in self.neural_net: + n.init() def import_neural_net(self): ''' @@ -1699,9 +1701,10 @@ def import_neural_net(self): if not self.training_dict: raise ValueError self._construct_net() - self.neural_net.load(self.training_dict['net']) + for i, n in enumerate(self.neural_net): + n.load(self.training_dict['net_' + str(i)]) - def _fit_neural_net(self): + def _fit_neural_net(self,index): ''' Fits a neural net to the data. @@ -1709,29 +1712,33 @@ def _fit_neural_net(self): ''' self.scaled_costs = self.cost_scaler.transform(self.all_costs[:,np.newaxis])[:,0] - self.neural_net.fit_neural_net(self.all_params, self.scaled_costs) + self.neural_net[index].fit_neural_net(self.all_params, self.scaled_costs) - def predict_cost(self,params): + def predict_cost(self,params,net_index=None): ''' - Produces a prediction of cost from the gaussian process at params. + Produces a prediction of cost from the neural net at params. Returns: float : Predicted cost at paramters ''' - return self.neural_net.predict_cost(params) + if net_index is None: + net_index = nr.randint(self.num_nets) + return self.neural_net[net_index].predict_cost(params) - def predict_cost_gradient(self,params): + def predict_cost_gradient(self,params,net_index=None): ''' Produces a prediction of the gradient of the cost function at params. Returns: float : Predicted gradient at paramters ''' + if net_index is None: + net_index = nr.randint(self.num_nets) # scipy.optimize.minimize doesn't seem to like a 32-bit Jacobian, so we convert to 64 - return self.neural_net.predict_cost_gradient(params).astype(np.float64) + return self.neural_net[net_index].predict_cost_gradient(params).astype(np.float64) - def predict_costs_from_param_array(self,params): + def predict_costs_from_param_array(self,params,net_index=None): ''' Produces a prediction of costs from an array of params. @@ -1739,7 +1746,7 @@ def predict_costs_from_param_array(self,params): float : Predicted cost at paramters ''' # TODO: Can do this more efficiently. - return [self.predict_cost(param) for param in params] + return [self.predict_cost(param,net_index) for param in params] def wait_for_new_params_event(self): @@ -1894,30 +1901,34 @@ def update_archive(self): 'noise_level':self.noise_level, 'cost_scaler_init_index':self.cost_scaler_init_index}) if self.neural_net: - self.archive_dict.update({'net':self.neural_net.save()}) + for i,n in enumerate(self.neural_net): + self.archive_dict.update({'net_'+str(i):n.save()}) - def find_next_parameters(self): + def find_next_parameters(self, net_index=None): ''' Returns next parameters to find. Increments counters appropriately. Return: next_params (array): Returns next parameters from cost search. ''' + if net_index is None: + net_index = nr.randint(self.num_nets) + self.params_count += 1 self.update_search_params() next_params = None next_cost = float('inf') - self.neural_net.start_opt() + self.neural_net[net_index].start_opt() for start_params in self.search_params: - result = so.minimize(fun = self.predict_cost, + result = so.minimize(fun = lambda x: self.predict_cost(x, net_index), x0 = start_params, - jac = self.predict_cost_gradient, + jac = lambda x: self.predict_cost_gradient(x, net_index), bounds = self.search_region, tol = self.search_precision) if result.fun < next_cost: next_params = result.x next_cost = result.fun - self.neural_net.stop_opt() + self.neural_net[net_index].stop_opt() # Now tweak the selected parameters to make sure we don't just keep on looking in the same # place (the actual minimum might be a short distance away). # TODO: Rather than using [-0.1, 0.1] we should pick the fuzziness based on what we know @@ -1943,6 +1954,10 @@ def run(self): # The network needs to be created in the same process in which it runs self.create_neural_net() + # We cycle through our different nets to generate each new set of params. This keeps track + # of the current net. + net_index = 0 + try: while not self.end_event.is_set(): self.log.debug('Learner waiting for new params event') @@ -1954,10 +1969,20 @@ def run(self): if self.cost_scaler_init_index is None: self.cost_scaler_init_index = len(self.all_costs) self._init_cost_scaler() - self._fit_neural_net() + # Now we need to generate generation_num new param sets, by iterating over our + # nets. We want to fire off new params as quickly as possible, so we don't train a + # net until we actually need to use it. But we need to make sure that each net gets + # trained exactly once, regardless of how many times it's used to generate new + # params. + num_nets_trained = 0 for _ in range(self.generation_num): + if num_nets_trained < self.num_nets: + self._fit_neural_net(net_index) + num_nets_trained += 1 + self.log.debug('Neural network learner generating parameter:'+ str(self.params_count+1)) - next_params = self.find_next_parameters() + next_params = self.find_next_parameters(net_index) + net_index = (net_index + 1) % self.num_nets self.params_out_queue.put(next_params) if self.end_event.is_set(): raise LearnerInterrupt() @@ -1979,7 +2004,8 @@ def run(self): 'local_minima_costs':self.local_minima_costs}) self.params_out_queue.put(end_dict) self._shut_down() - self.neural_net.destroy() + for n in self.neural_net: + n.destroy() self.log.debug('Ended neural network learner') def find_global_minima(self): @@ -2063,4 +2089,5 @@ def find_local_minima(self): # Methods for debugging/analysis. def get_losses(self): - return self.neural_net.get_losses() + # TODO + return self.neural_net[0].get_losses() From cc45898c42fae81b5236dba9b12eed48eaea5aa3 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 30 May 2017 14:50:09 +1000 Subject: [PATCH 104/119] Bump generation num and nets count up to 3 --- mloop/learners.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index e49b47f..039e5b8 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1619,8 +1619,8 @@ def __init__(self, self.scaled_costs = None #Constants, limits and tolerances - self.generation_num = 1 - self.num_nets = 1 + self.num_nets = 3 + self.generation_num = 3 self.search_precision = 1.0e-6 self.parameter_searches = max(10,self.num_params) self.hyperparameter_searches = max(10,self.num_params) From 4b41fd38fd115f8ffff02ad08c6a523803b9c4b8 Mon Sep 17 00:00:00 2001 From: GEM openSUSE Date: Tue, 30 May 2017 16:07:05 +1000 Subject: [PATCH 105/119] Implement get_losses for multiple nets --- mloop/learners.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index 039e5b8..0cfae8c 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -2089,5 +2089,7 @@ def find_local_minima(self): # Methods for debugging/analysis. def get_losses(self): - # TODO - return self.neural_net[0].get_losses() + all_losses = [] + for n in self.neural_nets: + all_losses += n.get_losses() + return all_losses From ad090893a9949fc48fb4ed4102c03e47e8cbaad4 Mon Sep 17 00:00:00 2001 From: GEM openSUSE Date: Tue, 30 May 2017 16:09:00 +1000 Subject: [PATCH 106/119] Move start/stop opt out of debugging methods --- mloop/neuralnet.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mloop/neuralnet.py b/mloop/neuralnet.py index e50d7c9..06b5340 100644 --- a/mloop/neuralnet.py +++ b/mloop/neuralnet.py @@ -617,14 +617,6 @@ def predict_cost_gradient(self,params): ''' return self._unscale_gradient(self.net.predict_cost_gradient(self._scale_params(params))) - # Public mmethods to be used only for debugging/analysis. - - def get_losses(self): - ''' - Returns a list of training losses experienced by the network. - ''' - return self.losses_list - def start_opt(self): ''' Starts an optimisation run. Until stop_opt() is called, predict_cost() and @@ -637,3 +629,11 @@ def stop_opt(self): Stops an optimisation run. ''' self.net.stop_opt() + + # Public mmethods to be used only for debugging/analysis. + + def get_losses(self): + ''' + Returns a list of training losses experienced by the network. + ''' + return self.losses_list From 8f4e9f1cab5e4d70bde627eaec97627704042718 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Tue, 30 May 2017 22:45:05 +1000 Subject: [PATCH 107/119] Fix get_losses --- mloop/learners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/learners.py b/mloop/learners.py index 0cfae8c..c9925b4 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -2090,6 +2090,6 @@ def find_local_minima(self): def get_losses(self): all_losses = [] - for n in self.neural_nets: + for n in self.neural_net: all_losses += n.get_losses() return all_losses From 8b4d67ac5acf63a356a6e099fc5414406ec8d268 Mon Sep 17 00:00:00 2001 From: GEM openSUSE Date: Wed, 31 May 2017 16:53:15 +1000 Subject: [PATCH 108/119] Support loading visualisations from archive --- mloop/visualizations.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 681a438..966a698 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -64,6 +64,27 @@ def show_all_default_visualizations(controller, show_plots=True): if show_plots: plt.show() +def show_all_default_visualizations_from_archive(controller_filename, learner_filename, controller_type, show_plots=True): + log = logging.getLogger(__name__) + configure_plots() + log.debug('Creating controller visualizations.') + controller_file_type = controller_filename.split(".")[-1] + learner_file_type = learner_filename.split(".")[-1] + create_controller_visualizations(controller_filename, file_type=controller_file_type) + + if controller_type == 'neural_net': + log.debug('Creating neural net visualizations.') + create_neural_net_learner_visualizations( + learner_filename, + file_type=learner_file_type) + else: + log.error('show_all_default_visualizations not implemented for type: ' + controller_type) + raise ValueError + + log.info('Showing visualizations, close all to end MLOOP.') + if show_plots: + plt.show() + def _color_from_controller_name(controller_name): ''' Gives a color (as a number betweeen zero an one) corresponding to each controller name string. From 88f56133b8891641b3baa6fa1099eaab09fff8f0 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 31 May 2017 17:16:32 +1000 Subject: [PATCH 109/119] Allow multiple nets to be saved Previously this didn't really work, because if you constructed multiple nets in quick succession they'd save over each other. Now the filenames have a few random bytes in them, so this shouldn't be a problem any more. --- mloop/neuralnet.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mloop/neuralnet.py b/mloop/neuralnet.py index 06b5340..0e17925 100644 --- a/mloop/neuralnet.py +++ b/mloop/neuralnet.py @@ -2,9 +2,11 @@ import logging import math import time +import base64 import mloop.utilities as mlu import numpy as np +import numpy.random as nr import sklearn.preprocessing as skp import tensorflow as tf @@ -43,11 +45,14 @@ def __init__(self, self.log = logging.getLogger(__name__) start = time.time() - # TODO: Use a filename specific to this object? self.save_archive_filename = ( mlu.archive_foldername + "neural_net_archive_" + mlu.datetime_to_string(datetime.datetime.now()) + + "_" + # We include 6 random bytes for deduplication in case multiple nets + # are created at the same time. + + base64.urlsafe_b64encode(nr.bytes(6)).decode() + ".ckpt") self.log.info("Constructing net") From 7d2627b367a1022313e6f05edaa0ef736056cfbc Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Wed, 31 May 2017 17:25:50 +1000 Subject: [PATCH 110/119] Support multiple nets in visualisations --- mloop/visualizations.py | 65 ++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 966a698..8937913 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -671,46 +671,49 @@ def return_cross_sections(self, points=100, cross_section_center=None): self.log.error('cross_section_center not in boundaries:' + repr(cross_section_center)) raise ValueError - cross_parameter_arrays = [ np.linspace(min_p, max_p, points) for (min_p,max_p) in zip(self.min_boundary,self.max_boundary)] - cost_arrays = [] - for ind in range(self.num_params): - sample_parameters = np.array([cross_section_center for _ in range(points)]) - sample_parameters[:, ind] = cross_parameter_arrays[ind] - costs = self.predict_costs_from_param_array(sample_parameters) - cost_arrays.append(costs) - if self.cost_scaler.scale_: - cross_parameter_arrays = np.array(cross_parameter_arrays)/self.cost_scaler.scale_ - else: - cross_parameter_arrays = np.array(cross_parameter_arrays) - cost_arrays = self.cost_scaler.inverse_transform(np.array(cost_arrays)) - return (cross_parameter_arrays,cost_arrays) + res = [] + for net_index in range(self.num_nets): + cross_parameter_arrays = [ np.linspace(min_p, max_p, points) for (min_p,max_p) in zip(self.min_boundary,self.max_boundary)] + cost_arrays = [] + for ind in range(self.num_params): + sample_parameters = np.array([cross_section_center for _ in range(points)]) + sample_parameters[:, ind] = cross_parameter_arrays[ind] + costs = self.predict_costs_from_param_array(sample_parameters, net_index) + cost_arrays.append(costs) + if self.cost_scaler.scale_: + cross_parameter_arrays = np.array(cross_parameter_arrays)/self.cost_scaler.scale_ + else: + cross_parameter_arrays = np.array(cross_parameter_arrays) + cost_arrays = self.cost_scaler.inverse_transform(np.array(cost_arrays)) + res.append((cross_parameter_arrays, cost_arrays)) + return res def plot_cross_sections(self): ''' Produce a figure of the cross section about best cost and parameters ''' global figure_counter, legend_loc - figure_counter += 1 - plt.figure(figure_counter) points = 100 - (_,cost_arrays) = self.return_cross_sections(points=points, cross_section_center=self.find_next_parameters()) rel_params = np.linspace(0,1,points) - for ind in range(self.num_params): - plt.plot(rel_params,cost_arrays[ind,:],'-',color=self.param_colors[ind]) - if self.has_trust_region: - axes = plt.gca() - ymin, ymax = axes.get_ylim() - ytrust = ymin + 0.1*(ymax - ymin) + for (_,cost_arrays) in self.return_cross_sections(points=points, cross_section_center=self.find_next_parameters()): + figure_counter += 1 + plt.figure(figure_counter) for ind in range(self.num_params): - plt.plot([self.scaled_trust_min[ind],self.scaled_trust_max[ind]],[ytrust,ytrust],'s', color=self.param_colors[ind]) - plt.xlabel(scale_param_label) - plt.xlim((0,1)) - plt.ylabel(cost_label) - plt.title('NN Learner: Predicted landscape' + ('with trust regions.' if self.has_trust_region else '.')) - artists = [] - for ind in range(self.num_params): - artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind], linestyle='-')) - plt.legend(artists,[str(x) for x in range(1,self.num_params+1)],loc=legend_loc) + plt.plot(rel_params,cost_arrays[ind,:],'-',color=self.param_colors[ind]) + if self.has_trust_region: + axes = plt.gca() + ymin, ymax = axes.get_ylim() + ytrust = ymin + 0.1*(ymax - ymin) + for ind in range(self.num_params): + plt.plot([self.scaled_trust_min[ind],self.scaled_trust_max[ind]],[ytrust,ytrust],'s', color=self.param_colors[ind]) + plt.xlabel(scale_param_label) + plt.xlim((0,1)) + plt.ylabel(cost_label) + plt.title('NN Learner: Predicted landscape' + ('with trust regions.' if self.has_trust_region else '.')) + artists = [] + for ind in range(self.num_params): + artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind], linestyle='-')) + plt.legend(artists,[str(x) for x in range(1,self.num_params+1)],loc=legend_loc) def plot_surface(self): ''' From b7cd840e316f7990de0a023e51e25c0349042e5f Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 11:16:49 +1000 Subject: [PATCH 111/119] Support uploading NN cross sections to plotly --- mloop/visualizations.py | 55 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 8937913..e2d2ba5 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -11,6 +11,9 @@ import logging import matplotlib.pyplot as plt import matplotlib as mpl +import plotly.plotly as py +import plotly.tools as tls +import plotly.exceptions as pye from mpl_toolkits.mplot3d import Axes3D @@ -64,7 +67,7 @@ def show_all_default_visualizations(controller, show_plots=True): if show_plots: plt.show() -def show_all_default_visualizations_from_archive(controller_filename, learner_filename, controller_type, show_plots=True): +def show_all_default_visualizations_from_archive(controller_filename, learner_filename, controller_type, show_plots=True, upload_cross_sections=False): log = logging.getLogger(__name__) configure_plots() log.debug('Creating controller visualizations.') @@ -76,7 +79,8 @@ def show_all_default_visualizations_from_archive(controller_filename, learner_fi log.debug('Creating neural net visualizations.') create_neural_net_learner_visualizations( learner_filename, - file_type=learner_file_type) + file_type=learner_file_type, + upload_cross_sections=upload_cross_sections) else: log.error('show_all_default_visualizations not implemented for type: ' + controller_type) raise ValueError @@ -578,7 +582,8 @@ def plot_hyperparameters_vs_run(self): def create_neural_net_learner_visualizations(filename, file_type='pkl', - plot_cross_sections=True): + plot_cross_sections=True, + upload_cross_sections=False): ''' Creates plots from a neural nets learner file. @@ -592,7 +597,7 @@ def create_neural_net_learner_visualizations(filename, ''' visualization = NeuralNetVisualizer(filename, file_type=file_type) if plot_cross_sections: - visualization.plot_cross_sections() + visualization.do_cross_sections(upload=upload_cross_sections) visualization.plot_surface() visualization.plot_density_surface() visualization.plot_losses() @@ -688,33 +693,45 @@ def return_cross_sections(self, points=100, cross_section_center=None): res.append((cross_parameter_arrays, cost_arrays)) return res - def plot_cross_sections(self): + def do_cross_sections(self, upload): ''' Produce a figure of the cross section about best cost and parameters ''' - global figure_counter, legend_loc points = 100 rel_params = np.linspace(0,1,points) - for (_,cost_arrays) in self.return_cross_sections(points=points, cross_section_center=self.find_next_parameters()): - figure_counter += 1 - plt.figure(figure_counter) - for ind in range(self.num_params): - plt.plot(rel_params,cost_arrays[ind,:],'-',color=self.param_colors[ind]) - if self.has_trust_region: + for net_index, (_,cost_arrays) in enumerate(self.return_cross_sections(points=points, cross_section_center=self.find_next_parameters())): + def prepare_plot(): + global figure_counter + figure_counter += 1 + fig = plt.figure(figure_counter) axes = plt.gca() - ymin, ymax = axes.get_ylim() - ytrust = ymin + 0.1*(ymax - ymin) for ind in range(self.num_params): - plt.plot([self.scaled_trust_min[ind],self.scaled_trust_max[ind]],[ytrust,ytrust],'s', color=self.param_colors[ind]) - plt.xlabel(scale_param_label) - plt.xlim((0,1)) - plt.ylabel(cost_label) - plt.title('NN Learner: Predicted landscape' + ('with trust regions.' if self.has_trust_region else '.')) + axes.plot(rel_params,cost_arrays[ind,:],'-',color=self.param_colors[ind],label=str(ind)) + if self.has_trust_region: + ymin, ymax = axes.get_ylim() + ytrust = ymin + 0.1*(ymax - ymin) + for ind in range(self.num_params): + axes.plot([self.scaled_trust_min[ind],self.scaled_trust_max[ind]],[ytrust,ytrust],'s', color=self.param_colors[ind]) + axes.set_xlabel(scale_param_label) + axes.set_xlim((0,1)) + axes.set_ylabel(cost_label) + axes.set_title('NN Learner: Predicted landscape' + ('with trust regions.' if self.has_trust_region else '.') + ' (' + str(net_index) + ')') + return fig + if upload: + plf = tls.mpl_to_plotly(prepare_plot()) + plf['layout']['showlegend'] = True + try: + url = py.plot(plf,auto_open=False) + print(url) + except pye.PlotlyRequestError: + print("Failed to upload due to quota restrictions") + prepare_plot() artists = [] for ind in range(self.num_params): artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind], linestyle='-')) plt.legend(artists,[str(x) for x in range(1,self.num_params+1)],loc=legend_loc) + def plot_surface(self): ''' Produce a figure of the cost surface (only works when there are 2 parameters) From ff3d45d0abd032d979e219292c807f244fc78ff5 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 11:19:46 +1000 Subject: [PATCH 112/119] Add plotly to requirements --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 23ef29b..010acc0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ docutils>=0.3 numpy>=1.11 scipy>=0.17 matplotlib>=1.5 +plotly>=2.0.8 pytest>=2.9 scikit-learn>=0.18 setuptools>=26 diff --git a/setup.py b/setup.py index 07b67e3..3174f97 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ def main(): 'numpy>=1.11', 'scipy>=0.17', 'matplotlib>=1.5', + 'plotly>=2.0.8', 'pytest>=2.9', 'scikit-learn>=0.18', 'tensorflow>=1.1.0'], From 49123b9ffd5aa47ef4a8dfe99a46e7bc16b339c8 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 11:59:27 +1000 Subject: [PATCH 113/119] Add script to show visualisations from an archive --- tools/landscape_vis.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tools/landscape_vis.py diff --git a/tools/landscape_vis.py b/tools/landscape_vis.py new file mode 100644 index 0000000..a63a21b --- /dev/null +++ b/tools/landscape_vis.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +import argparse + +parser = argparse.ArgumentParser(description='Plot cross sections of the predicted landscape, and optionally upload them via plotly. Must be run from the same directory as M-LOOP was run.') +parser.add_argument("controller_filename") +parser.add_argument("learner_filename") +parser.add_argument("learner_type") +parser.add_argument("-u","--upload",action="store_true",help="upload plots to the interwebs") +args = parser.parse_args() + +import mloop.visualizations as mlv +import mloop.utilities as mlu +import logging + +mlu.config_logger(log_filename=None, console_log_level=logging.DEBUG) + +mlv.show_all_default_visualizations_from_archive(args.controller_filename, args.learner_filename, args.learner_type, upload_cross_sections=args.upload) From 055fde5111dd407963915247c68777f3501a5cab Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 14:21:45 +1000 Subject: [PATCH 114/119] Comment tidying --- mloop/learners.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/mloop/learners.py b/mloop/learners.py index c9925b4..3e475a7 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1472,7 +1472,7 @@ class NeuralNetLearner(Learner, mp.Process): Args: params_out_queue (queue): Queue for parameters sent to controller. - costs_in_queue (queue): Queue for costs for gaussian process. This must be tuple + costs_in_queue (queue): Queue for costs. end_event (event): Event to trigger end of learner. Keyword Args: @@ -1673,7 +1673,7 @@ def __init__(self, 'predict_global_minima_at_end':self.predict_global_minima_at_end, 'predict_local_minima_at_end':self.predict_local_minima_at_end}) - #Remove logger so gaussian process can be safely picked for multiprocessing on Windows + #Remove logger so neural net can be safely picked for multiprocessing on Windows self.log = None def _construct_net(self): @@ -1929,16 +1929,6 @@ def find_next_parameters(self, net_index=None): next_params = result.x next_cost = result.fun self.neural_net[net_index].stop_opt() - # Now tweak the selected parameters to make sure we don't just keep on looking in the same - # place (the actual minimum might be a short distance away). - # TODO: Rather than using [-0.1, 0.1] we should pick the fuzziness based on what we know - # about length scales. - # TODO: It would be nice to deal with uncertainty more cleverly. Even though the current - # method will help find the true local minimum, it doesn't help if we get stuck in a local - # minimum and there's another one a long way away that appears slightly higher. To do this - # cleverly would probably correspond to introducing some kind of uncertainty-based biasing - # (like the GP). - #next_params = next_params + nr.uniform(-0.1, 0.1, size=next_params.shape) self.log.debug("Suggesting params " + str(next_params) + " with predicted cost: " + str(next_cost)) return next_params From f27d2b7ed8481b45565ad35e6b134be7f4fddb29 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 15:13:40 +1000 Subject: [PATCH 115/119] Logging tweaks --- mloop/learners.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mloop/learners.py b/mloop/learners.py index 3e475a7..dd050a6 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1521,7 +1521,7 @@ def __init__(self, nn_training_filename = str(nn_training_filename) nn_training_file_type = str(nn_training_file_type) if not mlu.check_file_type_supported(nn_training_file_type): - self.log.error('GP training file type not supported' + repr(nn_training_file_type)) + self.log.error('NN training file type not supported' + repr(nn_training_file_type)) self.training_dict = mlu.get_dict_from_file(nn_training_filename, nn_training_file_type) From c53cded6ab4c3ca81e5efa00d7b3da2f1af2f5d2 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 15:13:49 +1000 Subject: [PATCH 116/119] Make sure every net gets trained with each new parameter set Previously if the number of nets was greater than the generation num then we wouldn't train the extra nets. --- mloop/learners.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mloop/learners.py b/mloop/learners.py index dd050a6..1455c21 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -1976,6 +1976,10 @@ def run(self): self.params_out_queue.put(next_params) if self.end_event.is_set(): raise LearnerInterrupt() + # Train any nets that haven't been trained yet. + for i in range(self.num_nets - num_nets_trained): + self._fit_neural_net((net_index + i) % self.num_nets) + except LearnerInterrupt: pass # TODO: Fix this. We can't just do what's here because the costs queue might be empty, but From 3e7914cf97da964b2d7dae6da5fc1aee3f1d9a6b Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 17:21:42 +1000 Subject: [PATCH 117/119] Switch cross sections to be about best found point (rather than best predicted point) --- mloop/visualizations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index e2d2ba5..b4f463d 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -699,7 +699,8 @@ def do_cross_sections(self, upload): ''' points = 100 rel_params = np.linspace(0,1,points) - for net_index, (_,cost_arrays) in enumerate(self.return_cross_sections(points=points, cross_section_center=self.find_next_parameters())): + all_cost_arrays = [a for _,a in self.return_cross_sections(points=points)] + for net_index, cost_arrays in enumerate(all_cost_arrays): def prepare_plot(): global figure_counter figure_counter += 1 From 9356e31725bdba9f31dd24f5ad5678f5d92058da Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 17:22:15 +1000 Subject: [PATCH 118/119] Add plot of max/min/mean cross sections --- mloop/visualizations.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index b4f463d..17c25f2 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -731,7 +731,44 @@ def prepare_plot(): for ind in range(self.num_params): artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind], linestyle='-')) plt.legend(artists,[str(x) for x in range(1,self.num_params+1)],loc=legend_loc) - + if self.num_nets > 1: + # And now create a plot showing the average, max and min for each cross section. + def prepare_plot(): + global figure_counter + figure_counter += 1 + fig = plt.figure(figure_counter) + axes = plt.gca() + for ind in range(self.num_params): + this_param_cost_array = np.array(all_cost_arrays)[:,ind,:] + mn = np.mean(this_param_cost_array, axis=0) + m = np.min(this_param_cost_array, axis=0) + M = np.max(this_param_cost_array, axis=0) + axes.plot(rel_params,mn,'-',color=self.param_colors[ind],label=str(ind)) + axes.plot(rel_params,m,'--',color=self.param_colors[ind]) + axes.plot(rel_params,M,'--',color=self.param_colors[ind]) + axes.set_xlabel(scale_param_label) + axes.set_xlim((0,1)) + axes.set_ylabel(cost_label) + axes.set_title('NN Learner: Average predicted landscape') + return fig + if upload: + plf = tls.mpl_to_plotly(prepare_plot()) + plf['layout']['showlegend'] = True + for i,d in enumerate(plf['data']): + d['legendgroup'] = str(int(i/3)) + if not i % 3 == 0: + d['showlegend'] = False + # Pretty sure this shouldn't be necessary, but it seems to be anyway. + d['line']['dash'] = 'dash' + try: + url = py.plot(plf,auto_open=False) + print(url) + except pye.PlotlyRequestError: + print("Failed to upload due to quota restrictions") + prepare_plot() + for ind in range(self.num_params): + artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind], linestyle='-')) + plt.legend(artists,[str(x) for x in range(1,self.num_params+1)],loc=legend_loc) def plot_surface(self): ''' From 4667761864b98f3943552908022b2ea87ac4fff1 Mon Sep 17 00:00:00 2001 From: Harry Slatyer Date: Thu, 1 Jun 2017 17:39:21 +1000 Subject: [PATCH 119/119] Show labels on min/max plots too --- mloop/visualizations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 17c25f2..f746e76 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -744,8 +744,8 @@ def prepare_plot(): m = np.min(this_param_cost_array, axis=0) M = np.max(this_param_cost_array, axis=0) axes.plot(rel_params,mn,'-',color=self.param_colors[ind],label=str(ind)) - axes.plot(rel_params,m,'--',color=self.param_colors[ind]) - axes.plot(rel_params,M,'--',color=self.param_colors[ind]) + axes.plot(rel_params,m,'--',color=self.param_colors[ind],label=str(ind)) + axes.plot(rel_params,M,'--',color=self.param_colors[ind],label=str(ind)) axes.set_xlabel(scale_param_label) axes.set_xlim((0,1)) axes.set_ylabel(cost_label)