diff --git a/doc/source/api/controllers.rst b/doc/source/api/controllers.rst index 9b85904..9af1001 100644 --- a/doc/source/api/controllers.rst +++ b/doc/source/api/controllers.rst @@ -1,3 +1,5 @@ +.. _api-controllers: + controllers ----------- diff --git a/doc/source/api/index.rst b/doc/source/api/index.rst index 70c36a0..3d2ff16 100644 --- a/doc/source/api/index.rst +++ b/doc/source/api/index.rst @@ -1,3 +1,5 @@ +.. _sec-api: + M-LOOP API ========== diff --git a/doc/source/api/learners.rst b/doc/source/api/learners.rst index 3e76c52..642105a 100644 --- a/doc/source/api/learners.rst +++ b/doc/source/api/learners.rst @@ -1,3 +1,5 @@ +.. _api-learners: + learners --------- diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst new file mode 100644 index 0000000..eeb6fac --- /dev/null +++ b/doc/source/contributing.rst @@ -0,0 +1,8 @@ +.. _sec-contributing: + +Contributing +============ + + + + diff --git a/doc/source/data.rst b/doc/source/data.rst new file mode 100644 index 0000000..4c328a8 --- /dev/null +++ b/doc/source/data.rst @@ -0,0 +1,36 @@ +.. _sec-data: + +Data +==== + +M-LOOP saves all data produced by the experiment in archives which are saved to disk during and after the optimization run. The archives also contain information derived from the data, including the machine learning model for how the experiment works. Here we explain how to interpret the file archives. + +File Formats +------------ + +M-LOOP currently supports three file formats for all file input and output. + +- 'txt' text files: Human readable text files. This is the default file format for all outputs. The advantage of text files is they are easy to read, and there will be no format compatibility issues in the future. However, there will be some loss of precision in your data. To ensure you keep all significant figure you may want to use 'pkl' or 'mat'. +- 'mat' MATLAB files: Matlab files that can be opened and written with MATLAB or `numpy `_. +- 'pkl' pickle files: a serialization of a python dictionary made with `pickle `. Your data can be retrieved from this dictionary using the appropriate keywords. + +File Keywords +------------- + +The archives contain a set of keywords/variable names with associated data. The quickest way to understand what the values mean for a particular keyword is to :ref:`search` the documentation for a description. + +For a comprehensive list of all the keywords looks at the attributes described in the API. + +For the controller archive see :ref:`api-controllers`. + +For the learner archive see :ref:`api-learners`. The generic keywords are described in the class Learner, with learner specific options described in the derived classes, for example GaussianProcessLearner. + +Converting files +---------------- + +If for whatever reason you want to convert files between the formats you can do so using the utilities module of M-LOOP. For example the following python code will convert the file controller_archive_2016-08-18_12-18.pkl from a 'pkl' file to a 'mat' file:: + + import mloop.utilities as mlu + + saved_dict = mlu.get_dict_from_file('./M-LOOP_archives/controller_archive_2016-08-18_12-18.pkl','pkl') + mlu.save_dict_to_file(saved_dict,'./M-LOOP_archives/controller_archive_2016-08-18_12-18.mat','mat') diff --git a/doc/source/examples.rst b/doc/source/examples.rst index 3337169..9aa7edb 100644 --- a/doc/source/examples.rst +++ b/doc/source/examples.rst @@ -1,3 +1,6 @@ +.. _sec-examples: + Examples --------- -I like turtles side 8 \ No newline at end of file +======== + +Blah \ No newline at end of file diff --git a/doc/source/images/M-LOOP_diagram.png b/doc/source/images/M-LOOP_diagram.png new file mode 100644 index 0000000..192c8bf Binary files /dev/null and b/doc/source/images/M-LOOP_diagram.png differ diff --git a/doc/source/index.rst b/doc/source/index.rst index 8229e9c..84d87c0 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -32,8 +32,12 @@ Contents install tutorial + interfaces + data + visualizations options examples + contributing api/index Indices diff --git a/doc/source/install.rst b/doc/source/install.rst index a9cc22d..e3f4497 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -11,7 +11,6 @@ The installation process involves four steps. 3. Install the development version of :ref:`sec-M-LOOP`. 4. :ref:`Test` your M-LOOP install. - .. _sec-anaconda: Anaconda @@ -56,6 +55,12 @@ M-LOOP can be installed from the source code with three commands:: The first command downloads the latest source code for M-LOOP from github into the current directory, the second moves into the M-LOOP source directory, and the third link builds the package and creates a link from you python package to the source. You may need admin privileges to run the setup script. +At any time you can update M-LOOP to the latest version from github by running the command:: + + git pull origin master + +in the M-LOOP directory. + .. _sec-Testing: Test Installation @@ -63,9 +68,9 @@ Test Installation To test your M-LOOP installation use the command:: - python setup.py pytest + python setup.py test -In the M-LOOP source code directory. The tests should take around three minutes to complete. If you find a error please consider contributing to the project by reporting a bug on the github. +In the M-LOOP source code directory. The tests should take around five minutes to complete. If you find a error please consider :ref:`sec-contributing` to the project and report a bug on the `github `_. Documentation ------------- diff --git a/doc/source/interfaces.rst b/doc/source/interfaces.rst new file mode 100644 index 0000000..cd31aeb --- /dev/null +++ b/doc/source/interfaces.rst @@ -0,0 +1,43 @@ +.. _sec-interfaces: + +Interfaces +========== + +Currently M-LOOP only support the File interface, which is also described in :ref:`sec-tutorial`. There will be more added very soon. If you have any suggestions for interfaces please consider :ref:`sec-contributing` to the project. + +File Interface +-------------- + +The simplest method to connect your experiment to M-LOOP is with the file interface where data is exchanged by writing files to disk. To use this interface you can include the option:: + + interface='file' + +in you configuration file. The file interface happens to be the default, so this is not necessary. + +The file interface works under the assumption that you experiment follows the following algorithm. + +1. Wait for the file *exp_input.txt* to be made on the disk in the same folder M-LOOP is run. +2. Read the parameters for the next experiment from the file (named params). +3. Delete the file *exp_input.txt*. +4. Run the experiment with the parameters provided and calculate a cost, and optionally the uncertainty. +5. Write the cost to the file *exp_output.txt*. Go back to step 1. + +It is important you delete the file *exp_input.txt* after reading it, since it is used to as an indicator for the next experiment to run. + +When writing the file *exp_output.txt* there are three keywords and values you can include in your file, for example after the first run your experiment may produce the following:: + + cost = 0.5 + uncer = 0.01 + bad = false + +cost refers to the cost calculated from the experimental data. uncer, is optional, and refers to the uncertainty in the cost measurement made. Note, M-LOOP by default assumes there is some noise corrupting costs, which is fitted and compensated for. Hence, if there is some noise in your costs which you are unable to predict from a single measurement, do not worry, you do not have to estimate uncer, you can just leave it out. Lastly bad can be used to indicate an experiment failed and was not able to produce a cost. If the experiment worked set bad = false and if it failed set bad = true. + +Note you do not have to include all of the keywords, you must provide at least a cost or the bad keyword set to false. For example a succesful run can simply be:: + + cost = 0.3 + +and failed experiment can be as simple as:: + + bad = True + +Once the *exp_output.txt* has been written to disk, M-LOOP will read it and delete it. diff --git a/doc/source/options.rst b/doc/source/options.rst index f1bc186..f572fd4 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -1,3 +1,5 @@ +.. _sec-options: + Options ------- I like turtles 3 \ No newline at end of file diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index 70ec4a2..3664a00 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -3,4 +3,203 @@ Tutorial ======== -I like turtles 3. \ No newline at end of file +Here we provide a tutorial on how to use M-LOOP to optimize a generic experiment. M-LOOP is flexible and can be customized with a variety of :ref:`sec-options` and :ref:`sec-interfaces`, it can be run from the command line or used as a :ref:`python API `. Here we introduce the basic settings to get M-LOOP up and running as quick as possible. + +Overview +-------- + +The basic operation of M-LOOP is sketched below. + +.. _fig-mloop-diag: + +.. figure:: images/M-LOOP_diagram.png + :alt: M-LOOP in a loop with an experiment sending parameters and receiving costs. + +There are three stages: + +1. M-LOOP is started with the command:: + + M-LOOP + + M-LOOP first looks for the configuration file *exp_input.txt*, which contains options like the number of parameters and their limits, in the folder it is executed, then starts the optimization process. + +2. M-LOOP controls and optimizes the experiment by exchanging files written to disk. M-LOOP produces a file called *exp_input.txt* which contains a variable params with the next parameters to be run by the experiment. The experiment is expected to run an experiment with these parameters and measure the resultant cost. The experiment should then write the file *exp_output.txt* which contains at least the variable cost which quantifies the performance of that experimental run, and optionally, the variables uncer (for uncertainty) and bad (if the run failed). This process is repeated many times until the halting condition is met. + +3. Once the optimization process is complete, M-LOOP prints to the console the parameters and cost of the best run performed during the experiment, and a prediction of what the optimal parameters (with the corresponding predicted cost and uncertainty). M-LOOP also produces a set of plots that allow the user to visualize the optimization process and cost landscape. During operation and at the end M-LOOP write three files to disk: + + - *M-LOOP_[datetime].log* a log of the console output and other debugging information during the run. + - *controller_archive_[datetime].txt* an archive of all the experimental data recorded and the results. + - *learner_archive_[datetime].txt* an archive of the model created by the machine learner of the experiment. + +In what follows we will unpack this process and give details on how to configure and run M-LOOP. + +Launching M-LOOP +---------------- + +Launching M-LOOP is performed by executing the command M-LOOP on the console. You can also provide the name of your configuration file if you do not want to use the default with the command:: + + M-LOOP -c [config_filename] + +Configuration File +------------------ + +The configuration file contains a list of options and settings for the optimization run. Each option must be started on a new line and formatted as:: + + [keyword] = [value] + +You can add comments to your file using #, everything past # will be ignored. Examples of relevant keywords and syntax for the values is provided in :ref:`sec-examples` and a comprehensive list of options is described in :ref:`sec-options`. The values should be formatted with python syntax, strings should be surrounded with single or double quotes and arrays of values can be surrounded with square brackets/parentheses with numbers separated with commas. In this tutorial we will examine the example file *tutoral_config.txt*:: + + #Tutorial Config + #--------------- + + #Parameter settings + num_params = 2 #number of parameters + min_boundary = [-1,-1] #minimum boundary + max_boundary = [1,1] #maximum boundary + + #Halting conditions + max_num_runs = 1000 #maximum number of runs + max_num_runs_without_better_params = 50 #maximum number of runs without finding better parameters + target_cost = 0.01 #optimization halts when a cost below this target is found + + #Learner specific options + first_params = [0.5,0.5] #first parameters to try + trust_region = 0.4 #maximum % move distance from best params + + #File format options + interface_file_type = 'txt' #file types of *exp_input.mat* and *exp_output.mat* + controller_archive_file_type = 'mat' #file type of the controller archive + learner_archive_file_type = 'pkl' #file type of the learner archive + + #Visualizations + visualizations = True + +We will now explain the options in each of their groups. In almost all cases you will only need to the parameters settings and halting conditions, but we have also describe a few of the most commonly used extra options. + +Parameter settings +~~~~~~~~~~~~~~~~~~ + +The number of parameters and their limits is defined with three keywords:: + + num_params = 2 + min_boundary = [-1,-1] + max_boundary = [1,1] + +num_params defines the number of parameters, min_boundary defines the minimum value each of the parameters can take and max_boundary defines the maximum value each parameter can take. Here there are two value which each must be between -1 and 1. + +Halting conditions +~~~~~~~~~~~~~~~~~~ + +The halting conditions define when the simulation will stop. We present three options here:: + + max_num_runs = 100 + max_num_runs_without_better_params = 10 + target_cost = 0.1 + +max_num_runs is the maximum number of runs that the optimization algorithm is allowed to run. max_num_runs_without_better_params is the maximum number of runs allowed before a lower cost and better parameters is found. Finally, when target_cost is set, if a run produces a cost that is less than this value the optimization process will stop. + +When multiple halting conditions are set, the optimization process will halt when any one of them is met. + +If you do not have any prior knowledge of the problem use only the keyword max_num_runs and set it to the highest value you can wait for. If you have some knowledge about what the minimum attainable cost is or there is some cost threshold you need to achieve, you might want to set the target_cost. max_num_runs_without_better_params is useful if you want to let the optimization algorithm run as long as it needs until there is a good chance the global optimum has been found. + +If you do not want one of the halting conditions, simply delete it from your file. For example if you just wanted the algorithm to search as long as it can until it found a global minimum you could set:: + + max_num_runs_without_better_params = 10 + + +Learner specific options +~~~~~~~~~~~~~~~~~~~~~~~~ + +There are many learner specific options (and different learner algorithms) described in :ref:`sec-options`. Here we consider just a couple of the most commonly used ones. M-LOOP has been designed to find an optimum quickly with no custom configuration as long as the experiment is able to provide a cost for every parameter it provides. + +However if your experiment will fail to work if there are sudden and significant changes to your parameters you may need to set the following options:: + + first_parameters = [0.5,0.5] + trust_region = 0.4 + +first_parameters defines the first parameters the learner will try. trust_region defines the maximum change allowed in the parameters from the best parameters found so far. In the current example the region size is 2 by 2, with a trust region of 40% thus the maximum allowed change for the second run will be [0 +/- 0.8, 0 +/- 0.8]. + +If you experiment reliably produces costs for any parameter set you will not need these settings and you can just delete them. + +File format options +~~~~~~~~~~~~~~~~~~~ + +You can set the file formats for the archives produced at the end and the files exchanged with the experiment with the options:: + + interface_file_type = 'txt' + controller_archive_file_type = 'mat' + learner_archive_file_type = 'pkl' + +interface_file_type controls the file format for the files exchanged with the experiment. controller_archive_file_type and learner_archive_file_type control the format of the respective archives. + +There are three file formats currently available: 'mat' is for MATLAB readable files, 'pkl' if for python binary archives created using the `pickle package `_, and 'txt' human readable text files. For more details on these formats see :ref:`sec-data`. + +Visualization +~~~~~~~~~~~~~ + +By default M-LOOP will display a set of plots that allow the user to visualize the optimization process and the cost landscape. To change this behavior use the option:: + + visualizations = True + +Set it to false to turn the visualizations off. For more details see :ref:`sec-visualizations`. + +File Interface +-------------- + +There are many options of how to connect M-LOOP to your experiment. We consider the most generic method, writing and reading files to disk. For other options see :ref:`sec-interfaces`. If you design a bespoke interface for your experiment please consider :ref:`sec-contributing` to the project by sharing your method with other users. + +The file interface works under the assumption that you experiment follows the following algorithm. + +1. Wait for the file *exp_input.txt* to be made on the disk in the same folder M-LOOP is run. +2. Read the parameters for the next experiment from the file (named params). +3. Delete the file *exp_input.txt*. +4. Run the experiment with the parameters provided and calculate a cost, and optionally the uncertainty. +5. Write the cost to the file *exp_output.txt*. Go back to step 1. + +It is important you delete the file *exp_input.txt* after reading it, since it is used to as an indicator for the next experiment to run. + +When writing the file *exp_output.txt* there are three keywords and values you can include in your file, for example after the first run your experiment may produce the following:: + + cost = 0.5 + uncer = 0.01 + bad = false + +cost refers to the cost calculated from the experimental data. uncer, is optional, and refers to the uncertainty in the cost measurement made. Note, M-LOOP by default assumes there is some noise corrupting costs, which is fitted and compensated for. Hence, if there is some noise in your costs which you are unable to predict from a single measurement, do not worry, you do not have to estimate uncer, you can just leave it out. Lastly bad can be used to indicate an experiment failed and was not able to produce a cost. If the experiment worked set bad = false and if it failed set bad = true. + +Note you do not have to include all of the keywords, you must provide at least a cost or the bad keyword set to false. For example a successful run can simply be:: + + cost = 0.3 + +and failed experiment can be as simple as:: + + bad = True + +Once the *exp_output.txt* has been written to disk, M-LOOP will read it and delete it. + +Parameters and cost function +---------------------------- + +Choosing the right parameterization of your experiment and cost function will be an important part of getting great results. + +If you have time dependent functions in your experiment you will need to choose a parametrization of these function before interfacing them with M-LOOP. M-LOOP will take more time and experiments to find an optimum, given more parameters. But if you provide too few parameters, you may not be able to achieve your cost target. + +Fortunately, the visualizations provided after the optimization will help you determine which parameters contributed the most to the optimization process. Try with whatever parameterization is convenient to start and use the data produced afterwards to guide you on how to better improve the parametrization of your experiment. + +Picking the right cost function from experimental observables will also be important. M-LOOP will always find a global optimal as quick as it can, but if you have a poorly chosen cost function, the global optimal may not what you really wanted to optimize. Make sure you pick a cost function that will uniquely produce the result you want. Again, do not be afraid to experiment and use the data produced by the optimization runs to improve the cost function you are using. + +Have a look at our `paper `_ on using M-LOOP to create a Bose-Einstein Condensate for an example of choosing a parametrization and cost function for an experiment. + +Results +------- + +Once M-LOOP has completed the optimization, it will output results in several ways. + +M-LOOP will print results to the console. It will give the parameters of the experimental run that produced the lowest cost. It will also provide a set of parameters which are predicted to be produce the lowest average cost. If there is no noise in the costs your experiment produced, then the best parameters and predicted best parameters will be the same. If there was some noise your costs then it is possible that there will be a difference between the two. This is because the noise might have resulted with a set of experimental parameters that produced a lower cost due to a random fluke. The real optimal parameters that correspond to the minimum average cost are the predicted best parameters. In general, use the predicted best parameters (when provided) as the final result of the experiment. + +M-LOOP will produce an archive for the controller and machine learner. The controller archive contains all the data gathered during the experimental run and also other configuration details set by the user. By default it will be a 'txt' file which is human readable. If the meaning of a keyword and its associated data in the file is unclear, just :ref:`search` the documentation with the keyword to find a description. The learner archive contains a model of the experiment produced by the machine learner algorithm, which is currently a gaussian process. By default it will also be a 'txt' file. For more detail on these files see :ref:`sec-data`. + +M-LOOP, by default, will produce a set of visualizations. These plots show the optimizations process over time and also predictions made by the learner of the cost landscape. For more details on these visualizations and their interpretation see :ref:`sec-visualizations`. + + + + diff --git a/doc/source/visualizations.rst b/doc/source/visualizations.rst new file mode 100644 index 0000000..3b2f290 --- /dev/null +++ b/doc/source/visualizations.rst @@ -0,0 +1,6 @@ +.. _sec-visualizations: + +Visualizations +============== + +Blah \ No newline at end of file diff --git a/examples/complete_controller_config.txt b/examples/complete_controller_config.txt index effa6e0..4e167a1 100644 --- a/examples/complete_controller_config.txt +++ b/examples/complete_controller_config.txt @@ -4,7 +4,7 @@ #General options max_num_runs = 1000 #number of planned runs target_cost = 0.1 #cost to beat -max_repeats_without_better_params = 10 #max allowed number of runs between finding better parameters +max_num_runs_without_better_params = 10 #max allowed number of runs between finding better parameters controller_archive_filename = 'test' #filename prefix for controller archive controller_archive_file_type = 'mat' #file_type for controller archive archive_extra_dict = {'test':'this_is'} #dictionary of any data to be put in archive diff --git a/examples/complete_gaussian_process_config.txt b/examples/complete_gaussian_process_config.txt index 598a4a1..aa5b9ef 100644 --- a/examples/complete_gaussian_process_config.txt +++ b/examples/complete_gaussian_process_config.txt @@ -17,15 +17,15 @@ update_hyperparameters = True #whether noise level and lengths scales a trust_region = [5,5] #maximum move distance from best params default_bad_cost = 10 #default cost for bad run default_bad_uncertainty = 1 #default uncertainty for bad run -gp_archive_filename = 'cpg_run' #filename for archive -gp_archive_file_type = 'txt' #file type of archive +learner_archive_filename = 'a_word' #filename of gp archive +learner_archive_file_type = 'mat' #file type of archive predict_global_minima_at_end = True #find predicted global minima at end predict_local_minima_at_end = True #find all local minima of landscape at end #Training source options training_type = 'random' #training type can be random or nelder_mead first_params = [1.9,-1.0] #first parameters to try in initial training -gp_training_filename = None #training data filename +gp_training_filename = None #filename for training from previous experiment gp_training_file_type = 'pkl' #training data file type #if you use nelder_mead for the initial training source see the CompleteNelderMeadConfig.txt for options. diff --git a/examples/tutorial_config.txt b/examples/tutorial_config.txt new file mode 100644 index 0000000..79e5da8 --- /dev/null +++ b/examples/tutorial_config.txt @@ -0,0 +1,24 @@ +#Tutorial Config +#--------------- + +#Parameter settings +num_params = 2 #number of parameters +min_boundary = [-1,-1] #minimum boundary +max_boundary = [1,1] #maximum boundary + +#Halting conditions +max_num_runs = 1000 #maximum number of runs +max_num_runs_without_better_params = 50 #maximum number of runs without finding better parameters +target_cost = 0.01 #optimization halts when a cost below this target is found + +#Learner specific options +first_params = [0.5,0.5] #first parameters to try +trust_region = 0.4 #maximum % move distance from best params + +#File format options +interface_file_type = 'txt' #file types of *exp_input.mat* and *exp_output.mat* +controller_archive_file_type = 'mat' #file type of the controller archive +learner_archive_file_type = 'pkl' #file type of the learner archive + +#Visualizations +visualizations = True \ No newline at end of file diff --git a/mloop/__init__.py b/mloop/__init__.py index 11fc455..1f34358 100644 --- a/mloop/__init__.py +++ b/mloop/__init__.py @@ -10,7 +10,7 @@ import os -__version__= "2.0.0" +__version__= "2.0.1" __all__ = ['controllers','interfaces','launchers','learners','testing','utilities','visualizations'] #Add a null handler in case the user does not run config_logger() before running the optimization diff --git a/mloop/controllers.py b/mloop/controllers.py index 7070dc9..0e74707 100644 --- a/mloop/controllers.py +++ b/mloop/controllers.py @@ -14,6 +14,7 @@ controller_dict = {'random':1,'nelder_mead':2,'gaussian_process':3} number_of_controllers = 3 default_controller_archive_filename = 'controller_archive' +default_controller_archive_file_type = 'txt' class ControllerInterrupt(Exception): ''' @@ -74,7 +75,7 @@ class Controller(): Keyword Args: max_num_runs (Optional [float]): The number of runs before the controller stops. If set to float('+inf') the controller will run forever. Default float('inf'), meaning the controller will run until another condition is met. target_cost (Optional [float]): The target cost for the run. If a run achieves a cost lower than the target, the controller is stopped. Default float('-inf'), meaning the controller will run until another condition is met. - max_repeats_without_better_params (Otional [float]): Puts a limit on the number of runs are allowed before a new better set of parameters is found. Default float('inf'), meaning the controller will run until another condition is met. + max_num_runs_without_better_params (Otional [float]): Puts a limit on the number of runs are allowed before a new better set of parameters is found. Default float('inf'), meaning the controller will run until another condition is met. controller_archive_filename (Optional [string]): Filename for archive. Contains costs, parameter history and other details depending on the controller type. Default 'ControllerArchive.mat' controller_archive_file_type (Optional [string]): File type for archive. Can be either 'txt' a human readable text file, 'pkl' a python dill file, 'mat' a matlab file or None if there is no archive. Default 'mat'. archive_extra_dict (Optional [dict]): A dictionary with any extra variables that are to be saved to the archive. If None, nothing is added. Default None. @@ -105,9 +106,9 @@ class Controller(): def __init__(self, interface, max_num_runs = float('+inf'), target_cost = float('-inf'), - max_repeats_without_better_params = float('+inf'), + max_num_runs_without_better_params = float('+inf'), controller_archive_filename=default_controller_archive_filename, - controller_archive_file_type='pkl', + controller_archive_file_type=default_controller_archive_file_type, archive_extra_dict = None, start_datetime = None, **kwargs): @@ -176,9 +177,9 @@ def __init__(self, interface, self.log.error('Number of runs must be greater than zero. max_num_runs:'+repr(self.max_num_run)) raise ValueError self.target_cost = float(target_cost) - self.max_repeats_without_better_params = float(max_repeats_without_better_params) - if self.max_repeats_without_better_params<=0: - self.log.error('Max number of repeats must be greater than zero. max_num_runs:'+repr(max_repeats_without_better_params)) + self.max_num_runs_without_better_params = float(max_num_runs_without_better_params) + if self.max_num_runs_without_better_params<=0: + self.log.error('Max number of repeats must be greater than zero. max_num_runs:'+repr(max_num_runs_without_better_params)) raise ValueError if mlu.check_file_type_supported(controller_archive_file_type): @@ -215,12 +216,12 @@ def __init__(self, interface, def check_end_conditions(self): ''' - Check whether either of the three end contions have been met: number_of_runs, target_cost or max_repeats_without_better_params. + Check whether either of the three end contions have been met: number_of_runs, target_cost or max_num_runs_without_better_params. Returns: bool : True, if the controlled should continue, False if the controller should end. ''' - return (self.num_in_costs < self.max_num_runs) and (self.best_cost > self.target_cost) and (self.num_last_best_cost < self.max_repeats_without_better_params) + return (self.num_in_costs < self.max_num_runs) and (self.best_cost > self.target_cost) and (self.num_last_best_cost < self.max_num_runs_without_better_params) def _update_controller_with_learner_attributes(self): ''' @@ -365,11 +366,11 @@ def _shut_down(self): self.end_learner.set() self.log.debug('Interface end event set.') self.end_interface.set() - self.learner.join() - self.log.debug('Learner joined.') #After 3 or 4 executions of mloop in same python environment, sometimes excution can be trapped here #Likely to be a bug with multiprocessing in python, but difficult to isolate. #current solution is to join with a timeout and kill if that fails + self.learner.join() + self.log.debug('Learner joined.') self.interface.join(self.interface.interface_wait*3) if self.interface.is_alive(): self.log.debug('Interface did not join in time had to terminate.') @@ -383,6 +384,13 @@ def print_results(self): ''' Print results from optimization run to the logs ''' + self.log.debug('Optimization ended because:') + if self.num_in_costs >= self.max_num_runs: + self.log.debug('Maximum number of runs reached.') + if self.best_cost <= self.target_cost: + self.log.debug('Target cost reached.') + if self.num_last_best_cost >= self.max_num_runs_without_better_params: + self.log.debug('Maximum number of runs without better params reached.') self.log.info('Results:-') self.log.info('Best parameters found:' + str(self.best_params)) self.log.info('Best cost returned:' + str(self.best_cost) + ' +/- ' + str(self.best_uncer)) @@ -443,6 +451,7 @@ def __init__(self, interface,**kwargs): super().__init__(interface, **kwargs) self.learner = mll.RandomLearner(start_datetime = self.start_datetime, + learner_archive_filename=None, **self.remaining_kwargs) self._update_controller_with_learner_attributes() @@ -515,6 +524,7 @@ def __init__(self, interface, min_boundary=None, max_boundary=None, trust_region=None, + learner_archive_filename = 'learner_archive', **kwargs): super().__init__(interface, **kwargs) @@ -541,6 +551,7 @@ def __init__(self, interface, min_boundary=min_boundary, max_boundary=max_boundary, trust_region=trust_region, + learner_archive_filename=None, **self.remaining_kwargs) elif self.training_type == 'nelder_mead': @@ -548,6 +559,7 @@ def __init__(self, interface, num_params=num_params, min_boundary=min_boundary, max_boundary=max_boundary, + learner_archive_filename='training_learner_archive', **self.remaining_kwargs) else: self.log.error('Unknown training type provided to Gaussian process controller:' + repr(training_type)) @@ -560,6 +572,7 @@ def __init__(self, interface, min_boundary=min_boundary, max_boundary=max_boundary, trust_region=trust_region, + learner_archive_filename=learner_archive_filename, **self.remaining_kwargs) self.gp_learner_params_queue = self.gp_learner.params_out_queue @@ -662,7 +675,10 @@ def _shut_down(self): ''' self.log.debug('GP learner end set.') self.end_gp_learner.set() - self.gp_learner.join() + self.gp_learner.join(self.gp_learner.learner_wait*3) + if self.gp_learner.is_alive(): + self.log.debug('GP Learner did not join in time had to terminate.') + self.gp_learner.terminate() self.log.debug('GP learner joined') last_dict = None while not self.gp_learner_params_queue.empty(): diff --git a/mloop/interfaces.py b/mloop/interfaces.py index bc94ad4..2a049c5 100644 --- a/mloop/interfaces.py +++ b/mloop/interfaces.py @@ -128,17 +128,15 @@ class FileInterface(Interface): costs_in_queue (queue): Queue for costs (and other details) that have been returned by experiment. Keyword Args: - out_filename (Optional [string]): filename for file written with parameters. - out_file_type (Optional [string]): currently supports: 'txt' where the output is a text file with the parameters as a list of numbers, and 'mat' a matlab file with variable parameters with the next_parameters. Default is 'mat'. - in_filename (Optional [string]): filename for file written with parameters. - in_file_type (Optional [string]): file type to be written either 'mat' for matlab or 'txt' for readible text file. Defaults to 'mat'. + interface_out_filename (Optional [string]): filename for file written with parameters. + interface_in_filename (Optional [string]): filename for file written with parameters. + interface_file_type (Optional [string]): file type to be written either 'mat' for matlab or 'txt' for readible text file. Defaults to 'txt'. ''' def __init__(self, - out_filename=mlu.default_out_filename, - out_file_type=mlu.default_out_file_type, - in_filename=mlu.default_in_filename, - in_file_type=mlu.default_in_file_type, + interface_out_filename=mlu.default_interface_out_filename, + interface_in_filename=mlu.default_interface_in_filename, + interface_file_type=mlu.default_interface_file_type, **kwargs): super().__init__(**kwargs) @@ -146,18 +144,14 @@ def __init__(self, self.out_file_count = 0 self.in_file_count = 0 - if mlu.check_file_type_supported(out_file_type): - self.out_file_type = str(out_file_type) + if mlu.check_file_type_supported(interface_file_type): + self.out_file_type = str(interface_file_type) + self.in_file_type = str(interface_file_type) else: - self.log.error('File out type is not supported:' + out_file_type) - self.out_filename = str(out_filename) + self.log.error('File out type is not supported:' + interface_file_type) + self.out_filename = str(interface_out_filename) self.total_out_filename = self.out_filename + '.' + self.out_file_type - if mlu.check_file_type_supported(in_file_type): - self.in_file_type = str(in_file_type) - else: - self.log.error('File in type is not supported:' + in_file_type) - raise ValueError - self.in_filename = str(in_filename) + self.in_filename = str(interface_in_filename) self.total_in_filename = self.in_filename + '.' + self.in_file_type def _get_next_cost_dict(self,params_dict): @@ -170,6 +164,7 @@ def _get_next_cost_dict(self,params_dict): mlu.save_dict_to_file(self.last_params_dict,self.total_out_filename,self.out_file_type) while not self.end_event.is_set(): if os.path.isfile(self.total_in_filename): + time.sleep(mlu.filewrite_wait) #wait for file to be written to disk try: in_dict = mlu.get_dict_from_file(self.total_in_filename, self.in_file_type) except IOError: diff --git a/mloop/learners.py b/mloop/learners.py index 42c5485..da3262c 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -18,8 +18,8 @@ import sklearn.preprocessing as skp learner_thread_count = 0 -default_nelder_mead_archive_filename = 'nelder_mead_archive' -default_gp_archive_filename = 'gaussian_process_archive' +default_learner_filename = 'learner_archive' +default_learner_archive_file_type = 'txt' class LearnerInterrupt(Exception): ''' @@ -43,6 +43,7 @@ class Learner(): min_boundary (Optional [array]): Array with minimimum values allowed for each parameter. Note if certain values have no minimum value you can set them to -inf for example [-1, 2, float('-inf')] is a valid min_boundary. If None sets all the boundaries to '-1'. Default None. max_boundary (Optional [array]): Array with maximum values allowed for each parameter. Note if certain values have no maximum value you can set them to +inf for example [0, float('inf'),3,-12] is a valid max_boundary. If None sets all the boundaries to '1'. Default None. learner_archive_filename (Optional [string]): Name for python archive of the learners current state. If None, no archive is saved. Default None. But this is typically overloaded by the child class. + learner_archive_file_type (Optional [string]): File type for archive. Can be either 'txt' a human readable text file, 'pkl' a python dill file, 'mat' a matlab file or None if there is no archive. Default 'mat'. log_queue (Optional [queue]): Queue for sending log messages to main logger. If None, default behavoir is to send warnings and above to console level. Default None. log_level (Optional [int]): Level for the learners logger. If None, set to warning. Default None. start_datetime (Optional [datetime]): Start date time, if None, is automatically generated. @@ -57,8 +58,8 @@ def __init__(self, num_params=None, min_boundary=None, max_boundary=None, - learner_archive_filename=None, - learner_archive_file_type='pkl', + learner_archive_filename=default_learner_filename, + learner_archive_file_type=default_learner_archive_file_type, start_datetime=None, **kwargs): @@ -269,20 +270,15 @@ class RandomLearner(Learner, threading.Thread): Keyword Args: min_boundary (Optional [array]): If set to None, overrides default learner values and sets it to a set of value 0. Default None. max_boundary (Optional [array]): If set to None overides default learner values and sets it to an array of value 1. Default None. - trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction. - random_archive_filename: Name for python archive of the learners current state. If None, no archive is saved. Default None. - random_archive_file_type: Type of archive. 'pkl' for pickle, 'mat' for matlab and 'txt' for text. + trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction. ''' def __init__(self, trust_region=None, first_params=None, - random_archive_filename=None, - random_archive_file_type='pkl', **kwargs): - super().__init__(learner_archive_filename=random_archive_filename, - **kwargs) + super().__init__(**kwargs) if not np.all(self.diff_boundary>0.0): self.log.error('All elements of max_boundary are not larger than min_boundary') @@ -359,12 +355,9 @@ def __init__(self, initial_simplex_corner=None, initial_simplex_displacements=None, initial_simplex_scale=None, - nelder_mead_archive_filename=default_nelder_mead_archive_filename, - nelder_mead_archive_file_type='pkl', **kwargs): - super().__init__(learner_archive_filename=nelder_mead_archive_filename, - **kwargs) + super().__init__(**kwargs) self.num_boundary_hits = 0 self.rho = 1 @@ -592,11 +585,7 @@ class GaussianProcessLearner(Learner, mp.Process): trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction. default_bad_cost (Optional [float]): If a run is reported as bad and default_bad_cost is provided, the cost for the bad run is set to this default value. If default_bad_cost is None, then the worst cost received is set to all the bad runs. Default None. default_bad_uncertainty (Optional [float]): If a run is reported as bad and default_bad_uncertainty is provided, the uncertainty for the bad run is set to this default value. If default_bad_uncertainty is None, then the uncertainty is set to a tenth of the best to worst cost range. Default None. - gp_archive_filename (Optional [string]): Name for the python pickle archive of the learner. Default GaussianProcessLearnerArchive. - gp_archive_file_type (Optional [string]): File type of the training file_type archive. Can be 'mat' for matlabe file, 'pkl' for python pickle file, 'txt' for text file. minimum_uncertainty (Optional [float]): The minimum uncertainty associated with provided costs. Must be above zero to avoid fitting errors. Default 1e-8. - gp_training_filename (Optional [string]): Filename of a previously archive that will be used to train the gaussian process. Note if this is provided, only the data from the previous experiment, properties of the GP, boundary values, and number of parameters are copied into the new learner. Be sure to also provide the same other configuration details if you want the experiment to continue identically, for example the trust region of the previous experiment is not included. - gp_training_file_type (Optional [string]): File type of the training file_type archive. Can be 'mat' for matlabe file, 'pkl' for python pickle file, 'txt' for text file. predict_global_minima_at_end (Optional [bool]): If True finds the global minima when the learner is ended. Does not if False. Default True. predict_local_minima_at_end (Optional [bool]): If True finds the all minima when the learner is ended. Does not if False. Default False. @@ -631,11 +620,9 @@ def __init__(self, trust_region=None, default_bad_cost = None, default_bad_uncertainty = None, - gp_archive_filename=default_gp_archive_filename, - gp_archive_file_type='pkl', minimum_uncertainty = 1e-8, gp_training_filename =None, - gp_training_file_type ='pkl', + gp_training_file_type ='txt', predict_global_minima_at_end = True, predict_local_minima_at_end = False, **kwargs): @@ -695,18 +682,14 @@ def __init__(self, self.has_local_minima = False - super().__init__(learner_archive_filename=gp_archive_filename, - learner_archive_file_type=gp_archive_file_type, - num_params=num_params, + super().__init__(num_params=num_params, min_boundary=min_boundary, max_boundary=max_boundary, **kwargs) else: - super().__init__(learner_archive_filename=gp_archive_filename, - learner_archive_file_type=gp_archive_file_type, - **kwargs) + super().__init__(**kwargs) #Storage variables, archived self.all_params = np.array([], dtype=float) diff --git a/mloop/testing.py b/mloop/testing.py index c6bcb48..3d8bf2e 100644 --- a/mloop/testing.py +++ b/mloop/testing.py @@ -163,17 +163,15 @@ class FakeExperiment(threading.Thread): Keyword Args: test_landscape (Optional TestLandscape): landscape to generate costs from. - out_file_type (Optional [string]): currently supports: 'txt' where the output is a text file with the parameters as a list of numbers, and 'mat' a matlab file with variable parameters with the next_parameters. Default is 'mat'. - in_file_type (Optional [string]): file type to be written either 'mat' for matlab or 'txt' for readible text file. Defaults to 'mat'. - + experiment_file_type (Optional [string]): currently supports: 'txt' where the output is a text file with the parameters as a list of numbers, and 'mat' a matlab file with variable parameters with the next_parameters. Default is 'txt'. + Attributes self.end_event (Event): Used to trigger end of experiment. ''' def __init__(self, test_landscape = None, - out_file_type=mlu.default_in_file_type, - in_file_type=mlu.default_out_file_type, + experiment_file_type=mlu.default_interface_file_type, exp_wait = 0, poll_wait = 1, **kwargs): @@ -188,11 +186,11 @@ def __init__(self, self.log = logging.getLogger(__name__) self.exp_wait = float(exp_wait) self.poll_wait = float(poll_wait) - self.out_file_type = str(out_file_type) - self.in_file_type = str(in_file_type) + self.out_file_type = str(experiment_file_type) + self.in_file_type = str(experiment_file_type) - self.total_out_filename = mlu.default_in_filename + '.' + self.out_file_type - self.total_in_filename = mlu.default_out_filename + '.' + self.in_file_type + self.total_out_filename = mlu.default_interface_in_filename + '.' + self.out_file_type + self.total_in_filename = mlu.default_interface_out_filename + '.' + self.in_file_type self.end_event = threading.Event() self.test_count =0 @@ -213,6 +211,7 @@ def run(self): self.log.debug('Entering FakeExperiment loop') while not self.end_event.is_set(): if os.path.isfile(self.total_in_filename): + time.sleep(mlu.filewrite_wait) #wait for file to be written try: in_dict = mlu.get_dict_from_file(self.total_in_filename, self.in_file_type) except IOError: diff --git a/mloop/utilities.py b/mloop/utilities.py index 3cd4c20..01d888f 100644 --- a/mloop/utilities.py +++ b/mloop/utilities.py @@ -11,15 +11,16 @@ import numpy as np import mloop -default_in_filename = 'exp_output' -default_in_file_type = 'mat' -default_out_filename = 'exp_input' -default_out_file_type = 'mat' +default_interface_in_filename = 'exp_output' +default_interface_out_filename = 'exp_input' +default_interface_file_type = 'txt' archive_foldername = './M-LOOP_archives/' log_foldername = './M-LOOP_logs/' default_log_filename = 'M-LOOP_' +filewrite_wait = 0.1 + mloop_path = os.path.dirname(mloop.__file__) def config_logger(**kwargs): @@ -97,9 +98,10 @@ def txt_file_to_dict(filename): temp = (line.partition('#')[0]).strip('\n').strip() if temp != '': tdict_string += temp+',' - #Setting up words for parsing a dict + #Setting up words for parsing a dict, ignore eclipse warnings array = np.array inf = float('inf') + nan = float('nan') tdict = eval('dict('+tdict_string+')') return tdict diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 931d43b..a71231f 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -129,7 +129,7 @@ def __init__(self, filename, self.num_in_costs = int(controller_dict['num_in_costs']) self.num_out_params = int(controller_dict['num_out_params']) self.out_params = np.array(controller_dict['out_params']) - self.out_type = list(controller_dict['out_type']) + self.out_type = [x.strip() for x in list(controller_dict['out_type'])] self.in_costs = np.squeeze(np.array(controller_dict['in_costs'])) self.in_uncers = np.squeeze(np.array(controller_dict['in_uncers'])) self.in_bads = np.squeeze(list(controller_dict['in_bads'])) diff --git a/setup.cfg b/setup.cfg index 23411c4..1dd2a0f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,10 +1,5 @@ [metadata] description-file = README.rst -[build_sphinx] -source-dir = doc/source -build-dir = doc/build -all_files = 1 - -[upload_sphinx] -upload-dir = doc/build/html \ No newline at end of file +[aliases] +test=pytest \ No newline at end of file diff --git a/tests/test_examples.py b/tests/test_examples.py index 4695e2b..a3f50f0 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -9,7 +9,6 @@ import mloop.utilities as mlu import logging import numpy as np -import shutil class TestExamples(unittest.TestCase): @@ -24,8 +23,6 @@ def setUpClass(cls): def tearDownClass(cls): cls.fake_experiment.end_event.set() cls.fake_experiment.join() - shutil.rmtree(mlu.archive_foldername) - shutil.rmtree(mlu.log_foldername) def test_complete_controller_config(self): controller = mll.launch_from_file(mlu.mloop_path+'/../examples/complete_controller_config.txt', @@ -75,9 +72,14 @@ def test_complete_gaussian_process_config(self): **self.override_dict) self.asserts_for_cost_and_params(controller) + def test_tutorial_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/tutorial_config.txt', + **self.override_dict) + self.asserts_for_cost_and_params(controller) + def asserts_for_cost_and_params(self,controller): self.assertTrue(controller.best_cost<=controller.target_cost) self.assertTrue(np.sum(np.square(controller.best_params))<=controller.target_cost) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() \ No newline at end of file