diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..e99f535 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: python +python: + - "2.7.12" + - "3.5" +install: + - pip install --upgrade pip + - python --version + - pip --version + - pip install -r requirements.txt +# command to run tests +script: python setup.py test +os: + - linux + + \ No newline at end of file diff --git a/README.rst b/README.rst index 5f1264d..0df4823 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ In the M-LOOP source folder. For more details on how to use the package see the documentation. You can see it online at -? +http://m-loop.readthedocs.io/ Or you can build it by entering the docs folder and running: diff --git a/doc/source/examples.rst b/doc/source/examples.rst deleted file mode 100644 index 3337169..0000000 --- a/doc/source/examples.rst +++ /dev/null @@ -1,3 +0,0 @@ -Examples --------- -I like turtles side 8 \ No newline at end of file diff --git a/doc/source/images/M-LOOP_logo.pdf b/doc/source/images/M-LOOP_logo.pdf deleted file mode 100644 index bbb7a85..0000000 Binary files a/doc/source/images/M-LOOP_logo.pdf and /dev/null differ diff --git a/doc/source/images/M-LOOP_logo.png b/doc/source/images/M-LOOP_logo.png deleted file mode 100644 index bc88a1e..0000000 Binary files a/doc/source/images/M-LOOP_logo.png and /dev/null differ diff --git a/doc/source/install.rst b/doc/source/install.rst deleted file mode 100644 index a9cc22d..0000000 --- a/doc/source/install.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. _sec-installation: - -Installation -============ -M-LOOP is available on PyPI and can be installed with your favorite package manager. However, we currently recommend you install from the source code to ensure you have the latest improvements and bug fixes. - -The installation process involves four steps. - -1. Get a Python 3 distribution with the standard scientific packages. We recommend installing :ref:`sec-anaconda`. -2. Install the development version of :ref:`sec-scikit-learn`. -3. Install the development version of :ref:`sec-M-LOOP`. -4. :ref:`Test` your M-LOOP install. - - -.. _sec-anaconda: - -Anaconda --------- -We recommend installing Anaconda to get a python 3 environment with all the required scientific packages. The Anaconda distribution is available here: - -https://www.continuum.io/downloads - -Follow the installation instructions they provide. - -M-LOOP requires a python 3.\* environment. If you want to retain compatibility with python 2.\* Anaconda supports installing multiple python environments on the same machine, see: - -http://conda.pydata.org/docs/py2or3.html#create-python-2-or-3-environments. - -.. _sec-scikit-learn: - -scikit-learn ------------- -M-LOOP currently uses some of the machine learning algorithms from the development version of scikit-learn. This means you must also install scikit-learn from source in order to use M-LOOP. - -To install the development version of scikit-learn you can follow the instructions here: - -http://scikit-learn.org/stable/developers/contributing.html#git-repo - -Or simply run these three commands:: - - git clone git://github.com/scikit-learn/scikit-learn.git - cd ./scikit-learn - python setup.py develop - -The first command downloads the latest source code for scikit-learn from github into the current directory, the second moves into the scikit-learn source directory, and the third link builds the package and creates a link from you python package to the source. You may need admin privileges to run the setup script. - -.. _sec-M-LOOP: - -M-LOOP ------- -M-LOOP can be installed from the source code with three commands:: - - git clone git://github.com/michaelhush/M-LOOP.git - cd ./M-LOOP - python setup.py develop - -The first command downloads the latest source code for M-LOOP from github into the current directory, the second moves into the M-LOOP source directory, and the third link builds the package and creates a link from you python package to the source. You may need admin privileges to run the setup script. - -.. _sec-Testing: - -Test Installation ------------------ - -To test your M-LOOP installation use the command:: - - python setup.py pytest - -In the M-LOOP source code directory. The tests should take around three minutes to complete. If you find a error please consider contributing to the project by reporting a bug on the github. - -Documentation -------------- - -If you would also like a local copy of the documentation enter the doc folder and use the command:: - - make html - -Which will generate the documentation in doc/build/html. - diff --git a/doc/source/options.rst b/doc/source/options.rst deleted file mode 100644 index f1bc186..0000000 --- a/doc/source/options.rst +++ /dev/null @@ -1,3 +0,0 @@ -Options -------- -I like turtles 3 \ No newline at end of file diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst deleted file mode 100644 index 70ec4a2..0000000 --- a/doc/source/tutorial.rst +++ /dev/null @@ -1,6 +0,0 @@ -.. _sec-tutorial: - -Tutorial -======== - -I like turtles 3. \ No newline at end of file diff --git a/doc/Makefile b/docs/Makefile similarity index 98% rename from doc/Makefile rename to docs/Makefile index 5213de8..e53d317 100644 --- a/doc/Makefile +++ b/docs/Makefile @@ -5,7 +5,7 @@ SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = -BUILDDIR = build +BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) @@ -15,9 +15,9 @@ endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help help: diff --git a/docs/_static/M-LOOP_diagram.png b/docs/_static/M-LOOP_diagram.png new file mode 100644 index 0000000..192c8bf Binary files /dev/null and b/docs/_static/M-LOOP_diagram.png differ diff --git a/doc/source/images/M-LOOP_logo.ico b/docs/_static/M-LOOP_logo.ico similarity index 100% rename from doc/source/images/M-LOOP_logo.ico rename to docs/_static/M-LOOP_logo.ico diff --git a/docs/_static/M-LOOP_logo.png b/docs/_static/M-LOOP_logo.png new file mode 100644 index 0000000..172aece Binary files /dev/null and b/docs/_static/M-LOOP_logo.png differ diff --git a/docs/_static/M-LOOP_visualizations.png b/docs/_static/M-LOOP_visualizations.png new file mode 100644 index 0000000..6e4c45e Binary files /dev/null and b/docs/_static/M-LOOP_visualizations.png differ diff --git a/doc/source/images/M-LOOPandBEC.png b/docs/_static/M-LOOPandBEC.png similarity index 100% rename from doc/source/images/M-LOOPandBEC.png rename to docs/_static/M-LOOPandBEC.png diff --git a/doc/source/api/controllers.rst b/docs/api/controllers.rst similarity index 75% rename from doc/source/api/controllers.rst rename to docs/api/controllers.rst index 9b85904..4679486 100644 --- a/doc/source/api/controllers.rst +++ b/docs/api/controllers.rst @@ -1,5 +1,7 @@ +.. _api-controllers: + controllers ------------ +=========== .. automodule:: mloop.controllers :members: diff --git a/doc/source/api/index.rst b/docs/api/index.rst similarity index 92% rename from doc/source/api/index.rst rename to docs/api/index.rst index 70c36a0..b8d6915 100644 --- a/doc/source/api/index.rst +++ b/docs/api/index.rst @@ -1,3 +1,6 @@ +.. _sec-api: + +========== M-LOOP API ========== diff --git a/doc/source/api/interfaces.rst b/docs/api/interfaces.rst similarity index 90% rename from doc/source/api/interfaces.rst rename to docs/api/interfaces.rst index 80eb1e9..9d443c8 100644 --- a/doc/source/api/interfaces.rst +++ b/docs/api/interfaces.rst @@ -1,5 +1,5 @@ interfaces ----------- +========== .. automodule:: mloop.interfaces :members: diff --git a/doc/source/api/launchers.rst b/docs/api/launchers.rst similarity index 70% rename from doc/source/api/launchers.rst rename to docs/api/launchers.rst index 7d3c105..3e9454c 100644 --- a/doc/source/api/launchers.rst +++ b/docs/api/launchers.rst @@ -1,5 +1,5 @@ launchers ---------- +========= .. automodule:: mloop.launchers :members: diff --git a/doc/source/api/learners.rst b/docs/api/learners.rst similarity index 60% rename from doc/source/api/learners.rst rename to docs/api/learners.rst index 3e76c52..7385be9 100644 --- a/doc/source/api/learners.rst +++ b/docs/api/learners.rst @@ -1,5 +1,7 @@ +.. _api-learners: + learners ---------- +======== .. automodule:: mloop.learners :members: diff --git a/doc/source/api/mloop.rst b/docs/api/mloop.rst similarity index 82% rename from doc/source/api/mloop.rst rename to docs/api/mloop.rst index a0127dd..affcb8f 100644 --- a/doc/source/api/mloop.rst +++ b/docs/api/mloop.rst @@ -1,4 +1,4 @@ mloop ------ +===== .. automodule:: mloop diff --git a/doc/source/api/t_esting.rst b/docs/api/t_esting.rst similarity index 92% rename from doc/source/api/t_esting.rst rename to docs/api/t_esting.rst index 9bb25ae..1209b5a 100644 --- a/doc/source/api/t_esting.rst +++ b/docs/api/t_esting.rst @@ -1,5 +1,5 @@ testing -------- +======= .. automodule:: mloop.testing :members: diff --git a/doc/source/api/utilities.rst b/docs/api/utilities.rst similarity index 90% rename from doc/source/api/utilities.rst rename to docs/api/utilities.rst index 1f22fb5..8e63990 100644 --- a/doc/source/api/utilities.rst +++ b/docs/api/utilities.rst @@ -1,5 +1,5 @@ utilities ---------- +========= .. automodule:: mloop.utilities :members: diff --git a/doc/source/api/visualizations.rst b/docs/api/visualizations.rst similarity index 88% rename from doc/source/api/visualizations.rst rename to docs/api/visualizations.rst index f602372..91d7209 100644 --- a/doc/source/api/visualizations.rst +++ b/docs/api/visualizations.rst @@ -1,5 +1,5 @@ visualizations --------------- +============== .. automodule:: mloop.visualizations :members: diff --git a/doc/source/conf.py b/docs/conf.py similarity index 93% rename from doc/source/conf.py rename to docs/conf.py index 821c252..389407f 100644 --- a/doc/source/conf.py +++ b/docs/conf.py @@ -32,8 +32,7 @@ extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', - 'sphinx.ext.mathjax', - 'sphinx.ext.githubpages', + 'sphinx.ext.mathjax' ] # Napoleon settings @@ -71,9 +70,9 @@ # built documents. # # The short X.Y version. -version = '2.0' +version = '2.1' # The full version, including alpha/beta/rc tags. -release = '2.0.1' +release = '2.1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -128,10 +127,25 @@ # a list of builtin themes. html_theme = 'alabaster' +# Custom sidebar templates, maps document names to template names. +html_sidebars = { '**': ['about.html','navigation.html','relations.html', 'searchbox.html'], } + +#'globaltoc.html', + # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +html_theme_options = {'logo':'M-LOOP_logo.png', +'logo_name':True, +'description':'Machine-Learning Online Optimization Package', +'github_user':'michaelhush', +'github_repo':'M-LOOP', +'github_banner':True, +'font_family':"Arial, Helvetica, sans-serif", +'head_font_family':"Arial, Helvetica, sans-serif", +'analytics_id':'UA-83520804-1'} + +#'github_button':True, # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] @@ -145,12 +159,12 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = 'images/M-LOOP_logo.png' +#html_logo = '_static/M-LOOP_logo.png' # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = 'images/M-LOOP_logo.ico' +html_favicon = '_static/M-LOOP_logo.ico' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -171,9 +185,6 @@ # typographically correct entities. #html_use_smartypants = True -# Custom sidebar templates, maps document names to template names. -html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'searchbox.html'], } - # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} @@ -248,7 +259,7 @@ # The name of an image file (relative to this directory) to place at the top of # the title page. -latex_logo = 'images/M-LOOP_logo.pdf' +#latex_logo = 'M-LOOP_logo.pdf' # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 0000000..48709b8 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1,38 @@ +.. _sec-contributing: + +============ +Contributing +============ + +If you use M-LOOP please consider contributing to the project. There are many quick and easy ways to help out. + +- If you use M-LOOP be sure to cite the paper where it first used: `'Fast machine-learning online optimization of ultra-cold-atom experiments', Sci Rep 6, 25890 (2016) `_. +- Star and watch the `M-LOOP GitHub `_. +- Make a suggestion on what features you would like added, or report an issue, on the `GitHub `_ or by `email `_. +- Contribute your own code to the `M-LOOP GitHub `_, this could be the interface you designed, more options or a completely new solver. + +Finally spread the word! Let others know the success you have had with M-LOOP and recommend they try it too. + +Contributors +============ + +M-LOOP is written and maintained by `Michael R Hush `_ + +Other contributors, listed alphabetically, are: + +* John W. Bastian - design, first demonstration +* Patrick J. Everitt - testing, design, first demonstration +* Kyle S. Hardman - design, first demonstration +* Anton van den Hengel - design, first demonstration +* Joe J. Hope - design, first demonstration +* Carlos C. N. Kuhn - first demonstration +* Andre N. Luiten - first demonstration +* Gordon D. McDonald - first demonstration +* Manju Perumbil - first demonstration +* Ian R. Petersen - first demonstration +* Ciaron D. Quinlivan - first demonstration +* Alex Ratcliff - testing +* Nick P. Robins - first demonstration +* Mahasen A. Sooriyabandara - first demonstration +* Richard Taylor - testing +* Paul B. Wigley - testing, design, first demonstration diff --git a/docs/data.rst b/docs/data.rst new file mode 100644 index 0000000..4f78e13 --- /dev/null +++ b/docs/data.rst @@ -0,0 +1,37 @@ +.. _sec-data: + +==== +Data +==== + +M-LOOP saves all data produced by the experiment in archives which are saved to disk during and after the optimization run. The archives also contain information derived from the data, including the machine learning model for how the experiment works. Here we explain how to interpret the file archives. + +File Formats +============ + +M-LOOP currently supports three file formats for all file input and output. + +- 'txt' text files: Human readable text files. This is the default file format for all outputs. The advantage of text files is they are easy to read, and there will be no format compatibility issues in the future. However, there will be some loss of precision in your data. To ensure you keep all significant figure you may want to use 'pkl' or 'mat'. +- 'mat' MATLAB files: Matlab files that can be opened and written with MATLAB or `numpy `_. +- 'pkl' pickle files: a serialization of a python dictionary made with `pickle `. Your data can be retrieved from this dictionary using the appropriate keywords. + +File Keywords +============= + +The archives contain a set of keywords/variable names with associated data. The quickest way to understand what the values mean for a particular keyword is to :ref:`search` the documentation for a description. + +For a comprehensive list of all the keywords looks at the attributes described in the API. + +For the controller archive see :ref:`api-controllers`. + +For the learner archive see :ref:`api-learners`. The generic keywords are described in the class Learner, with learner specific options described in the derived classes, for example GaussianProcessLearner. + +Converting files +================ + +If for whatever reason you want to convert files between the formats you can do so using the utilities module of M-LOOP. For example the following python code will convert the file controller_archive_2016-08-18_12-18.pkl from a 'pkl' file to a 'mat' file:: + + import mloop.utilities as mlu + + saved_dict = mlu.get_dict_from_file('./M-LOOP_archives/controller_archive_2016-08-18_12-18.pkl','pkl') + mlu.save_dict_to_file(saved_dict,'./M-LOOP_archives/controller_archive_2016-08-18_12-18.mat','mat') diff --git a/docs/examples.rst b/docs/examples.rst new file mode 100644 index 0000000..5349d30 --- /dev/null +++ b/docs/examples.rst @@ -0,0 +1,128 @@ +.. _sec-examples: + +======== +Examples +======== + +M-LOOP includes a series of example configuration files for each of the controllers and interfaces. The examples can be found in examples folder. For some controllers there are two files, ones ending with *_basic_config* which includes the standard configuration options and *_complete_config* which include a comprehensive list of all the configuration options available. + +The options available are also comprehensively documented in the :ref:`sec-api` as keywords for each of the classes. However, the quickest and easiest way to learn what options are available, if you are not familiar with python, is to just look at the provided examples. + +Each of the example files is used when running tests of M-LOOP. So please copy and modify them elsewhere if you use them as a starting point for your configuration file. + +Interfaces +========== + +There are currently two interfaces supported: 'file' and 'shell'. You can specify which interface you want with the option:: + + interface_type = [name] + +The default will be 'file'. The specific options for each of the interfaces are described below. + +File Interface +-------------- + +The file interface exchanges information with the experiment by writing files to disk. You can change the names of the files used for the file interface and their type. The file interface options are described in *file_interface_config.txt*. + +.. include:: ../examples/file_interface_config.txt + :literal: + +Shell Interface +--------------- + +The shell interface is for experiments that can be run through a command executed in a shell. Information is then piped between M-LOOP and the experiment through the shell. You can change the command to run the experiment and the way the parameters are formatted. The shell interface options are described in *shell_interface_config.txt* + +.. include:: ../examples/shell_interface_config.txt + :literal: + + +Controllers +=========== + +There are currently three controller types supported: 'gaussian_process', 'random' and 'nelder_mead'. The default is 'gaussian_process'. You can set which interface you want to use with the option:: + + controller_type = [name] + +Each of the controllers and their specific options are described below. There is also a set of common options shared by all controllers which is described in *controller_options.txt*. The common options include the parameter settings and the halting conditions. + +.. include:: ../examples/controller_config.txt + :literal: + +Gaussian process +---------------- + +The Gaussian-process controller is the default controller and is the currently the most sophisticated machine learner algorithm. It uses a `Link Gaussian process `_ to develop a model for how the parameters relate to the measured cost, effectively creating a model for how the experiment operates. This model is then used when picking new points to test. + +There are two example files for the Gaussian-process controller: *gaussian_process_simple_config.txt* which contains the basic options. + +.. include:: ../examples/gaussian_process_simple_config.txt + :literal: + +*gaussian_process_complete_config.txt* which contains a comprehensive list of options. + +.. include:: ../examples/gaussian_process_complete_config.txt + :literal: + +Differential evolution +---------------------- + +The differential evolution (DE) controller uses a `Link DE algorithm `_ for optimization. DE is a type of evolutionary algorithm, and is historically the most commonly used in automated optimization. DE will eventually find a global solution, however it can take many experiments before it does so. + +There are two example files for the differential evolution controller: *differential_evolution_simple_config.txt* which contains the basic options. + +.. include:: ../examples/differential_evolution_simple_config.txt + :literal: + +*differential_evolution_complete_config.txt* which contains a comprehensive list of options. + +.. include:: ../examples/differential_evolution_complete_config.txt + :literal: + + +Nelder Mead +----------- + +The Nelder Mead controller implements the `Link Nelder-Mead method `_ for optimization. You can control the starting point and size of the initial simplex of the method with the configuration file. + +There are two example files for the Nelder-Mead controller: *nelder_mead_simple_config.txt* which contains the basic options. + +.. include:: ../examples/nelder_mead_simple_config.txt + :literal: + +*nelder_mead_complete_config.txt* which contains a comprehensive list of options. + +.. include:: ../examples/nelder_mead_complete_config.txt + :literal: + +Random +------ + +The random optimization algorithm picks parameters randomly from a uniform distribution from within the parameter bounds or trust region. + +There are two example files for the random controller: *random_simple_config.txt* which contains the basic options. + +.. include:: ../examples/random_simple_config.txt + :literal: + +*random_complete_config.txt* which contains a comprehensive list of options. + +.. include:: ../examples/random_complete_config.txt + :literal: + +Logging +======= + +You can control the filename of the logs and also the level which is reported to the file and the console. For more information see `Link logging levels `_. The logging options are described in *logging_config.txt*. + +.. include:: ../examples/logging_config.txt + :literal: + +Extras +====== + +Extras refers to options related to post processing your data once the optimization is complete. Currently the only extra option is for visualizations. The extra options are described in *extras_config.txt*. + +.. include:: ../examples/extras_config.txt + :literal: + + diff --git a/doc/source/index.rst b/docs/index.rst similarity index 71% rename from doc/source/index.rst rename to docs/index.rst index 8229e9c..d5b173f 100644 --- a/doc/source/index.rst +++ b/docs/index.rst @@ -1,43 +1,45 @@ -###### +====== M-LOOP -###### +====== The Machine-Learner Online Optimization Package is designed to automatically and rapidly optimize the parameters of a scientific experiment or computer controller system. -.. figure:: images/M-LOOPandBEC.png +.. figure:: _static/M-LOOPandBEC.png :alt: M-LOOP optimizing a BEC. - M-LOOP in control of a ultra-cold atom experiment. M-LOOP was able to find an optimal set of ramps to evaporatively cool a thermal gas and form a Bose-Einstein Condensate. + M-LOOP in control of an ultra-cold atom experiment. M-LOOP was able to find an optimal set of ramps to evaporatively cool a thermal gas and form a Bose-Einstein Condensate. -Using M-LOOP is simple, once the parameters of your experiment is computer controller, all you need to do is determine a cost function that quantifies the performance of an experiment after a single run. You can then hand over control of the experiment to M-LOOP which will find a global optimal set of parameters that minimize the cost function, by performing a few experiments and testing different parameters. M-LOOP uses machine-learning to predict how the parameters of the experiment relate to the cost, it uses this model to pick the next best parameters to test to find an optimum as quickly as possible. +Using M-LOOP is simple, once the parameters of your experiment is computer controlled, all you need to do is determine a cost function that quantifies the performance of an experiment after a single run. You can then hand over control of the experiment to M-LOOP which will find a global optimal set of parameters that minimize the cost function, by performing a few experiments and testing different parameters. M-LOOP uses machine-learning to predict how the parameters of the experiment relate to the cost, it uses this model to pick the next best parameters to test to find an optimum as quickly as possible. M-LOOP not only finds an optimal set of parameters for the experiment it also provides a model of how the parameters are related to the costs which can be used to improve the experiment. -If you use M-LOOP please cite our publication where we first used the package to optimise the production of a Bose-Einstein Condensate: +If you use M-LOOP please cite our publication where we first used the package to optimize the production of a Bose-Einstein Condensate: Fast Machine-Learning Online Optimization of Ultra-Cold-Atom Experiments. *Scientific Reports* **6**, 25890 (2016). DOI: `Link 10.1038/srep25890 `_ http://www.nature.com/articles/srep25890 Quick Start ------------ +=========== -To get the M-LOOP running as soon as possible follow the :ref:'sec-installation' instructions and :ref:'_sec-tutorial'. +To get M-LOOP running follow the :ref:`sec-installation` instructions and :ref:`sec-tutorial`. Contents --------- +======== .. toctree:: - :maxdepth: 2 install - tutorial - options + tutorials + interfaces + data + visualizations examples + contributing api/index Indices -------- +======= * :ref:`genindex` * :ref:`modindex` diff --git a/docs/install.rst b/docs/install.rst new file mode 100644 index 0000000..de51e7f --- /dev/null +++ b/docs/install.rst @@ -0,0 +1,131 @@ +.. _sec-installation: + +============ +Installation +============ + +M-LOOP is available on PyPI and can be installed with your favorite package manager; simply search for 'M-LOOP' and install. However, if you want the latest features and a local copy of the examples you should install M-LOOP using the source code from the `Link GitHub `_. Detailed installation instruction are provided below. + +The installation process involves three steps. + +1. Get a Python distribution with the standard scientific packages. We recommend installing :ref:`sec-anaconda`. +2. Install the latest release of :ref:`sec-M-LOOP`. +3. (Optional) :ref:`Test` your M-LOOP install. + +If you are having any trouble with the installation you may need to check your the :ref:`package dependencies` have been correctly installed. If you ares still having trouble, you can `Link submit an issue `_ to the GitHub. + +.. _sec-anaconda: + +Anaconda +======== + +We recommend installing Anaconda to get a python environment with all the required scientific packages. The Anaconda distribution is available here: + +https://www.continuum.io/downloads + +Follow the installation instructions they provide. + +M-LOOP is targeted at python 3 but also supports 2. Please use python 3 if you do not have a reason to use 2, see :ref:`sec-py3vpy2` for details. + +.. _sec-m-loop: + +M-LOOP +====== + +You have two options when installing M-LOOP, you can perform a basic installation of the last release with pip or you can install from source to get the latest features. We recommend installing from source so you can test your installation, see all the examples and get the most recent bug fixes. + +Installing from source +---------------------- + +M-LOOP can be installed from the latest source code with three commands:: + + git clone git://github.com/michaelhush/M-LOOP.git + cd ./M-LOOP + python setup.py develop + +The first command downloads the latest source code for M-LOOP from GitHub into the current directory, the second moves into the M-LOOP source directory, and the third link builds the package and creates a link from you python package to the source. If you are using linux or MacOS you may need admin privileges to run the setup script. + +At any time you can update M-LOOP to the latest version from GitHub by running the command:: + + git pull origin master + +in the M-LOOP directory. + +Installing with pip +------------------- + +M-LOOP can be installed with pip with a single command:: + + pip install M-LOOP + +If you are using linux or MacOS you may need admin privileges to run the command. To update M-LOOP to the latest version use:: + + pip install M-LOOP --upgrade + + +.. _sec-Testing: + +Testing +======= + +If you have installed from source, to test you installation use the command:: + + python setup.py test + +In the M-LOOP source code directory. The tests should take around five minutes to complete. If you find a error please consider :ref:`sec-contributing` to the project and report a bug on the `GitHub `_. + +If you installed M-LOOP using pip, you will not need to test your installation. + +.. _sec-dependencies: + +Dependencies +============ + +M-LOOP requires the following packages to run correctly. + +============ ======= +Package Version +============ ======= +docutils >=0.3 +matplotlib >=1.5 +numpy >=1.11 +pip >=7.0 +pytest >=2.9 +setuptools >=26 +scikit-learn >=0.18 +scipy >=0.17 +============ ======= + +These packages should be automatically installed by pip or the script setup.py when you install M-LOOP. + +However, if you are using Anaconda some packages that are managed by the conda command may not be correctly updated, even if your installation passes all the tests. In this case, you will have to update these packages manually. You can check what packages you have installed and their version with the command:: + + conda list + +To install a package that is missing, say for example pytest, use the command:: + + conda install pytest + +To update a package to the latest version, say for example scikit-learn, use the command:: + + conda update scikit-learn + +Once you install and update all the required packages with conda M-LOOP should run correctly. + +Documentation +============= + +The latest documentation will always be available here online. If you would also like a local copy of the documentation, and you have downloaded the source code, enter the docs folder and use the command:: + + make html + +Which will generate the documentation in docs/_build/html. + +.. _sec-py3vpy2: + +Python 3 vs 2 +============= + +M-LOOP is developed in python 3 and it gets the best performance in this environment. This is primarily because other packages that M-LOOP uses, like numpy, run fastest in python 3. The tests typically take about 20% longer to complete in python 2 than 3. + +If you have a specific reason to stay in a python 2 environment (you may use other packages which are not python 3 compatible) then you can still use M-LOOP without upgrading to 3. However, if you do not have a specific reason to stay with python 2, it is highly recommended you use the latest python 3 package. diff --git a/docs/interfaces.rst b/docs/interfaces.rst new file mode 100644 index 0000000..926fb9d --- /dev/null +++ b/docs/interfaces.rst @@ -0,0 +1,95 @@ +.. _sec-interfaces: + +========== +Interfaces +========== + +Currently M-LOOP supports three ways to interface your experiment + +1. File interface where parameters and costs are exchanged between the experiment and M-LOOP through files written to disk. This approach is described in a :ref:`tutorial `. +2. Shell interface where parameters and costs are exchanged between the experiment and M-LOOP through information piped through a shell (or command line). This option should be considered if you can execute your experiment using a command from a shell. +3. Implementing your own interface through the M-LOOP python API. + +Each of these options is described below. If you have any suggestions for interfaces please consider :ref:`sec-contributing` to the project. + +File interface +============== + +The simplest method to connect your experiment to M-LOOP is with the file interface where data is exchanged by writing files to disk. To use this interface you can include the option:: + + interface='file' + +in your configuration file. The file interface happens to be the default, so this is not necessary. + +The file interface works under the assumption that you experiment follows the following algorithm. + +1. Wait for the file *exp_input.txt* to be made on the disk in the same folder M-LOOP is run. +2. Read the parameters for the next experiment from the file (named params). +3. Delete the file *exp_input.txt*. +4. Run the experiment with the parameters provided and calculate a cost, and optionally the uncertainty. +5. Write the cost to the file *exp_output.txt*. Go back to step 1. + +It is important you delete the file *exp_input.txt* after reading it, since it is used to as an indicator for the next experiment to run. + +When writing the file *exp_output.txt* there are three keywords and values you can include in your file, for example after the first run your experiment may produce the following:: + + cost = 0.5 + uncer = 0.01 + bad = false + +cost refers to the cost calculated from the experimental data. uncer, is optional, and refers to the uncertainty in the cost measurement made. Note, M-LOOP by default assumes there is some noise corrupting costs, which is fitted and compensated for. Hence, if there is some noise in your costs which you are unable to predict from a single measurement, do not worry, you do not have to estimate uncer, you can just leave it out. Lastly bad can be used to indicate an experiment failed and was not able to produce a cost. If the experiment worked set bad = false and if it failed set bad = true. + +Note you do not have to include all of the keywords, you must provide at least a cost or the bad keyword set to false. For example a successful run can simply be:: + + cost = 0.3 + +and failed experiment can be as simple as:: + + bad = True + +Once the *exp_output.txt* has been written to disk, M-LOOP will read it and delete it. + +Shell interface +=============== + +The shell interface is used when experiments can be run from a command in a shell. M-LOOP will still need to be configured and executed in the same manner described for a file interface as describe in :ref:`tutorial `. The only difference is how M-LOOP starts the experiment and reads data. To use this interface you must include the following options:: + + interface_type='shell' + command='./run_exp' + params_args_type='direct' + +in the configuration file. The interface keyword simply indicates that you want M-LOOP to operate the experiment through the shell. The other two keywords need to be customized to your needs. + +The command keyword should be provided with the command on the shell that runs the experiment. In the example above the executable would be *run_exp*. Note M-LOOP will try and execute the command in the folder that you run M-LOOP from, if this causes trouble you should just the absolute address of your executable. Your command can be more complicated than a single work, for example if you wanted to include some options like './run_exp --verbose -U' this would also be acceptable. + +The params_args_type keyword controls how M-LOOP delivers the parameters to the executable. If you use the 'direct' option the parameters will just be fed directly to the experiment as arguments. For example if the command was ./run_exp and the parameters to test next were 1.3, -23 and 12, M-LOOP would execute the following command:: + + ./run_exp 1.3 -23 12 + +the other params_args_type option is 'named' in this case each parameter is fed to the experiment as a named option. Given the same parameters as before, M-LOOP would execute the command:: + + ./run_exp --param1 1.3 --param2 -23 --param3 12 + +After the experiment has run and a cost (and uncertainty or bad value) has been found they must be provided back to M-LOOP through the shell. For example if you experiment completed with a cost 1.3, uncertainty 0.1 you need to program your executable to print the following to the shell:: + + M-LOOP_start + cost = 1.3 + uncer = 0.1 + M-LOOP_end + +You can also output other information to the shell and split up the information you provide to M-LOOP if you wish. The following output would also valid. + + Running experiment... Experiment complete. + Checking it was valid... It worked. + M-LOOP_start + bad = False + M-LOOP_end + Calculating cost... Was 3.2. + M-LOOP_start + cost = 3.2 + M-LOOP_end + +Python interfaces +================= + +If your experiment is controlled in python you can use M-LOOP as an API in your own custom python script. In this case you must create your own implementation of the abstract interface class to control the experiment. This is explained in detail in the :ref:`tutorial for python controlled experiments `. diff --git a/doc/make.bat b/docs/make.bat similarity index 94% rename from doc/make.bat rename to docs/make.bat index e67a860..a2d3c82 100644 --- a/doc/make.bat +++ b/docs/make.bat @@ -5,9 +5,9 @@ REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) -set BUILDDIR=build -set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source -set I18NSPHINXOPTS=%SPHINXOPTS% source +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% diff --git a/docs/tutorials.rst b/docs/tutorials.rst new file mode 100644 index 0000000..a7d0dd9 --- /dev/null +++ b/docs/tutorials.rst @@ -0,0 +1,478 @@ +.. _sec-tutorial: + +========= +Tutorials +========= + +Here we provide some tutorials on how to use M-LOOP. M-LOOP is flexible and can be customized with a variety of :ref:`options ` and :ref:`interfaces `. Here we provide some basic tutorials to get you up and started as quick as possible. + +There are two different approaches to using M-LOOP: + +1. You can execute M-LOOP from a command line (or shell) and configure it using a text file. +2. You can use M-LOOP as a :ref:`python API `. + +If you have a standard experiment, that is operated by LabVIEW, Simulink or some other method, then you should use option 1 and follow the :ref:`first tutorial `. If your experiment is operated using python, you should consider using option 2 as it will give you more flexibility and control, in which case, look at the :ref:`second tutorial `. + +.. _sec-standard-experiment: + +Standard experiment +=================== + +The basic operation of M-LOOP is sketched below. + +.. _fig-mloop-diag: + +.. figure:: _static/M-LOOP_diagram.png + :alt: M-LOOP in a loop with an experiment sending parameters and receiving costs. + +There are three stages: + +1. M-LOOP is started with the command:: + + M-LOOP + + M-LOOP first looks for the configuration file *exp_config.txt*, which contains options like the number of parameters and their limits, in the folder it is executed, then starts the optimization process. + +2. M-LOOP controls and optimizes the experiment by exchanging files written to disk. M-LOOP produces a file called *exp_input.txt* which contains a variable params with the next parameters to be run by the experiment. The experiment is expected to run an experiment with these parameters and measure the resultant cost. The experiment should then write the file *exp_output.txt* which contains at least the variable cost which quantifies the performance of that experimental run, and optionally, the variables uncer (for uncertainty) and bad (if the run failed). This process is repeated many times until the halting condition is met. + +3. Once the optimization process is complete, M-LOOP prints to the console the parameters and cost of the best run performed during the experiment, and a prediction of what the optimal parameters (with the corresponding predicted cost and uncertainty). M-LOOP also produces a set of plots that allow the user to visualize the optimization process and cost landscape. During operation and at the end M-LOOP write three files to disk: + + - *M-LOOP_[datetime].log* a log of the console output and other debugging information during the run. + - *controller_archive_[datetime].txt* an archive of all the experimental data recorded and the results. + - *learner_archive_[datetime].txt* an archive of the model created by the machine learner of the experiment. + +In what follows we will unpack this process and give details on how to configure and run M-LOOP. + +Launching M-LOOP +---------------- + +Launching M-LOOP is performed by executing the command M-LOOP on the console. You can also provide the name of your configuration file if you do not want to use the default with the command:: + + M-LOOP -c [config_filename] + +.. _sec-configuration-file: + +Configuration File +------------------ + +The configuration file contains a list of options and settings for the optimization run. Each option must be started on a new line and formatted as:: + + [keyword] = [value] + +You can add comments to your file using #, everything past # will be ignored. Examples of relevant keywords and syntax for the values is provided in :ref:`sec-examples` and a comprehensive list of options is described in :ref:`sec-examples`. The values should be formatted with python syntax, strings should be surrounded with single or double quotes and arrays of values can be surrounded with square brackets/parentheses with numbers separated with commas. In this tutorial we will examine the example file *tutoral_config.txt*:: + + #Tutorial Config + #--------------- + + #Parameter settings + num_params = 2 #number of parameters + min_boundary = [-1,-1] #minimum boundary + max_boundary = [1,1] #maximum boundary + first_params = [0.5,0.5] #first parameters to try + trust_region = 0.4 #maximum % move distance from best params + + #Halting conditions + max_num_runs = 1000 #maximum number of runs + max_num_runs_without_better_params = 50 #maximum number of runs without finding better parameters + target_cost = 0.01 #optimization halts when a cost below this target is found + + #Learner options + cost_has_noise = True #whether the cost are corrupted by noise or not + + #Timing options + no_delay = True #wait for learner to make generate new parameters or use training algorithms + + #File format options + interface_file_type = 'txt' #file types of *exp_input.mat* and *exp_output.mat* + controller_archive_file_type = 'mat' #file type of the controller archive + learner_archive_file_type = 'pkl' #file type of the learner archive + + #Visualizations + visualizations = True + +We will now explain the options in each of their groups. In almost all cases you will only need to the parameters settings and halting conditions, but we have also described a few of the most commonly used extra options. + +Parameter settings +~~~~~~~~~~~~~~~~~~ + +The number of parameters and their limits is defined with three keywords:: + + num_params = 2 + min_boundary = [-1,-1] + max_boundary = [1,1] + +num_params defines the number of parameters, min_boundary defines the minimum value each of the parameters can take and max_boundary defines the maximum value each parameter can take. Here there are two value which each must be between -1 and 1. + +first_parameters defines the first parameters the learner will try. You only need to set this if you have a safe set of parameters you want the experiment to start with. Just delete this keyword if any set of parameters in the boundaries will work. + +trust_region defines the maximum change allowed in the parameters from the best parameters found so far. In the current example the region size is 2 by 2, with a trust region of 40% thus the maximum allowed change for the second run will be [0 +/- 0.8, 0 +/- 0.8]. This is only needed if your experiment produces bad results when the parameters are changes significantly between runs. Simply delete this keyword if your experiment works with any set of parameters within the boundaries. + +Halting conditions +~~~~~~~~~~~~~~~~~~ + +The halting conditions define when the simulation will stop. We present three options here:: + + max_num_runs = 100 + max_num_runs_without_better_params = 10 + target_cost = 0.1 + first_params = [0.5,0.5] + trust_region = 0.4 + +max_num_runs is the maximum number of runs that the optimization algorithm is allowed to run. max_num_runs_without_better_params is the maximum number of runs allowed before a lower cost and better parameters is found. Finally, when target_cost is set, if a run produces a cost that is less than this value the optimization process will stop. + +When multiple halting conditions are set, the optimization process will halt when any one of them is met. + +If you do not have any prior knowledge of the problem use only the keyword max_num_runs and set it to the highest value you can wait for. If you have some knowledge about what the minimum attainable cost is or there is some cost threshold you need to achieve, you might want to set the target_cost. max_num_runs_without_better_params is useful if you want to let the optimization algorithm run as long as it needs until there is a good chance the global optimum has been found. + +If you do not want one of the halting conditions, simply delete it from your file. For example if you just wanted the algorithm to search as long as it can until it found a global minimum you could set:: + + max_num_runs_without_better_params = 10 + + +Learner Options +~~~~~~~~~~~~~~~ + +There are many learner specific options (and different learner algorithms) described in :ref:`sec-examples`. Here we just present a common one:: + + cost_has_noise = True + +If the cost you provide has noise in it, meaning your the cost you calculate would fluctuate if you did multiple experiments with the same parameters, then set this flag to True. If the costs your provide have no noise then set this flag to False. M-LOOP will automatically determine if the costs have noise in them or not, so if you are unsure, just delete this keyword and it will use the default value of True. + +Timing options +~~~~~~~~~~~~~~ + +M-LOOP learns how the experiment works by fitting the parameters and costs using a gaussian process. This learning process can take some time. If M-LOOP is asked for new parameters before it has time to generate a new prediction, it will use the training algorithm to provide a new set of parameters to test. This allows for an experiment to be run while the learner is still thinking. The training algorithm by default is differential evolution, this algorithm is also used to do the first initial set of experiments which are then used to train M-LOOP. If you would prefer M-LOOP waits for the learner to come up with its best prediction before running another experiment you can change this behavior with the option:: + + no_delay = True + +Set no_delay to true to ensure there is no pauses between experiments and set it to false if you to give M-LOOP to have the time to come up with its most informed choice. Sometimes doing fewer more intelligent experiments will lead to an optimal quicker than many quick unintelligent experiments. You can delete the keyword if you are unsure and it will default to True. + +File format options +~~~~~~~~~~~~~~~~~~~ + +You can set the file formats for the archives produced at the end and the files exchanged with the experiment with the options:: + + interface_file_type = 'txt' + controller_archive_file_type = 'mat' + learner_archive_file_type = 'pkl' + +interface_file_type controls the file format for the files exchanged with the experiment. controller_archive_file_type and learner_archive_file_type control the format of the respective archives. + +There are three file formats currently available: 'mat' is for MATLAB readable files, 'pkl' if for python binary archives created using the `pickle package `_, and 'txt' human readable text files. For more details on these formats see :ref:`sec-data`. + +Visualization +~~~~~~~~~~~~~ + +By default M-LOOP will display a set of plots that allow the user to visualize the optimization process and the cost landscape. To change this behavior use the option:: + + visualizations = True + +Set it to false to turn the visualizations off. For more details see :ref:`sec-visualizations`. + +Interface +--------- + +There are many options of how to connect M-LOOP to your experiment. We consider the most generic method, writing and reading files to disk. For other options see :ref:`sec-interfaces`. If you design a bespoke interface for your experiment please consider :ref:`sec-contributing` to the project by sharing your method with other users. + +The file interface works under the assumption that you experiment follows the following algorithm. + +1. Wait for the file *exp_input.txt* to be made on the disk in the same folder M-LOOP is run. +2. Read the parameters for the next experiment from the file (named params). +3. Delete the file *exp_input.txt*. +4. Run the experiment with the parameters provided and calculate a cost, and optionally the uncertainty. +5. Write the cost to the file *exp_output.txt*. Go back to step 1. + +It is important you delete the file *exp_input.txt* after reading it, since it is used to as an indicator for the next experiment to run. + +When writing the file *exp_output.txt* there are three keywords and values you can include in your file, for example after the first run your experiment may produce the following:: + + cost = 0.5 + uncer = 0.01 + bad = false + +cost refers to the cost calculated from the experimental data. uncer, is optional, and refers to the uncertainty in the cost measurement made. Note, M-LOOP by default assumes there is some noise corrupting costs, which is fitted and compensated for. Hence, if there is some noise in your costs which you are unable to predict from a single measurement, do not worry, you do not have to estimate uncer, you can just leave it out. Lastly bad can be used to indicate an experiment failed and was not able to produce a cost. If the experiment worked set bad = false and if it failed set bad = true. + +Note you do not have to include all of the keywords, you must provide at least a cost or the bad keyword set to true. For example a successful run can simply be:: + + cost = 0.3 + +and failed experiment can be as simple as:: + + bad = True + +Once the *exp_output.txt* has been written to disk, M-LOOP will read it and delete it. + +Parameters and cost function +---------------------------- + +Choosing the right parameterization of your experiment and cost function will be an important part of getting great results. + +If you have time dependent functions in your experiment you will need to choose a parametrization of these function before interfacing them with M-LOOP. M-LOOP will take more time and experiments to find an optimum, given more parameters. But if you provide too few parameters, you may not be able to achieve your cost target. + +Fortunately, the visualizations provided after the optimization will help you determine which parameters contributed the most to the optimization process. Try with whatever parameterization is convenient to start and use the data produced afterwards to guide you on how to better improve the parametrization of your experiment. + +Picking the right cost function from experimental observables will also be important. M-LOOP will always find a global optimal as quick as it can, but if you have a poorly chosen cost function, the global optimal may not what you really wanted to optimize. Make sure you pick a cost function that will uniquely produce the result you want. Again, do not be afraid to experiment and use the data produced by the optimization runs to improve the cost function you are using. + +Have a look at our `paper `_ on using M-LOOP to create a Bose-Einstein Condensate for an example of choosing a parametrization and cost function for an experiment. + +.. _sec-results: + +Results +------- + +Once M-LOOP has completed the optimization, it will output results in several ways. + +M-LOOP will print results to the console. It will give the parameters of the experimental run that produced the lowest cost. It will also provide a set of parameters which are predicted to be produce the lowest average cost. If there is no noise in the costs your experiment produced, then the best parameters and predicted best parameters will be the same. If there was some noise your costs then it is possible that there will be a difference between the two. This is because the noise might have resulted with a set of experimental parameters that produced a lower cost due to a random fluke. The real optimal parameters that correspond to the minimum average cost are the predicted best parameters. In general, use the predicted best parameters (when provided) as the final result of the experiment. + +M-LOOP will produce an archive for the controller and machine learner. The controller archive contains all the data gathered during the experimental run and also other configuration details set by the user. By default it will be a 'txt' file which is human readable. If the meaning of a keyword and its associated data in the file is unclear, just :ref:`search` the documentation with the keyword to find a description. The learner archive contains a model of the experiment produced by the machine learner algorithm, which is currently a gaussian process. By default it will also be a 'txt' file. For more detail on these files see :ref:`sec-data`. + +M-LOOP, by default, will produce a set of visualizations. These plots show the optimizations process over time and also predictions made by the learner of the cost landscape. For more details on these visualizations and their interpretation see :ref:`sec-visualizations`. + +.. _sec-python-experiment: + +Python controlled experiment +============================ + +If you have an experiment that is already under python control you can use M-LOOP as an API. Below we go over the example python script *python_controlled_experiment.py* you should also read over the :ref:`first tutorial ` to get a general idea of how M-LOOP works. + +When integrating M-LOOP into your laboratory remember that it will be controlling you experiment, not vice versa. Hence, at the top level of your python script you will execute M-LOOP which will then call on your experiment when needed. Your experiment will not be making calls of M-LOOP. + +An example script for a python controlled experiment is given in the examples folder called *python_controlled_experiment.py*, which is copied below:: + + #Imports for python 2 compatibility + from __future__ import absolute_import, division, print_function + __metaclass__ = type + + #Imports for M-LOOP + import mloop.interfaces as mli + import mloop.controllers as mlc + import mloop.visualizations as mlv + + #Other imports + import numpy as np + import time + + #Declare your custom class that inherits from the Interface class + class CustomInterface(mli.Interface): + + #Initialization of the interface, including this method is optional + def __init__(self): + #You must include the super command to call the parent class, Interface, constructor + super(CustomInterface,self).__init__() + + #Attributes of the interface can be added here + #If you want to pre-calculate any variables etc. this is the place to do it + #In this example we will just define the location of the minimum + self.minimum_params = np.array([0,0.1,-0.1]) + + #You must include the get_next_cost_dict method in your class + #this method is called whenever M-LOOP wants to run an experiment + def get_next_cost_dict(self,params_dict): + + #Get parameters from the provided dictionary + params = params_dict['params'] + + #Here you can include the code to run your experiment given a particular set of parameters + #In this example we will just evaluate a sum of sinc functions + cost = -np.sum(np.sinc(params - self.minimum_params)) + #There is no uncertainty in our result + uncer = 0 + #The evaluation will always be a success + bad = False + #Add a small time delay to mimic a real experiment + time.sleep(1) + + #The cost, uncertainty and bad boolean must all be returned as a dictionary + #You can include other variables you want to record as well if you want + cost_dict = {'cost':cost, 'uncer':uncer, 'bad':bad} + return cost_dict + + def main(): + #M-LOOP can be run with three commands + + #First create your interface + interface = CustomInterface() + #Next create the controller, provide it with your controller and any options you want to set + controller = mlc.create_controller(interface, max_num_runs = 1000, target_cost = -2.99, num_params = 3, min_boundary = [-2,-2,-2], max_boundary = [2,2,2]) + #To run M-LOOP and find the optimal parameters just use the controller method optimize + controller.optimize() + + #The results of the optimization will be saved to files and can also be accessed as attributes of the controller. + print('Best parameters found:') + print(controller.best_params) + + #You can also run the default sets of visualizations for the controller with one command + mlv.show_all_default_visualizations(controller) + + + #Ensures main is run when this code is run as a script + if __name__ == '__main__': + main() + +Each part of the code is explained in the following sections. + +Imports +------- + +The start of the script imports the libraries that are necessary for M-LOOP to work:: + + #Imports for python 2 compatibility + from __future__ import absolute_import, division, print_function + __metaclass__ = type + + #Imports for M-LOOP + import mloop.interfaces as mli + import mloop.controllers as mlc + import mloop.visualizations as mlv + + #Other imports + import numpy as np + import time + +The first group of imports are just for python 2 compatibility. M-LOOP is targeted at python3, but has been designed to be bilingual. These imports ensure backward compatibility. + +The second group of imports are the most important modules M-LOOP needs to run. The interfaces and controllers modules are essential, while the visualizations module is only needed if you want to view your data afterwards. + +Lastly, you can add any other imports you may need. + +Custom Interface +---------------- + +M-LOOP takes an object oriented approach to controlling the experiment. This is different than the functional approach taken by other optimization packages, like scipy. When using M-LOOP you must make your own class that inherits from the Interface class in M-LOOP. This class must implement a method called *get_next_cost_dict* that takes a set of parameters, runs your experiment and then returns the appropriate cost and uncertainty. + +An example of the simplest implementation of a custom interface is provided below :: + + #Declare your custom class that inherits from the Interface class + class SimpleInterface(mli.Interface): + + #the method that runs the experiment given a set of parameters and returns a cost + def get_next_cost_dict(self,params_dict): + + #The parameters come in a dictionary and are provided in a numpy array + params = params_dict['params']pre-calculate + + #Here you can include the code to run your experiment given a particular set of parameters + #For this example we just evaluate a simple function + cost = np.sum(params**2) + uncer = 0 + bad = False + + #The cost, uncertainty and bad boolean must all be returned as a dictionary + cost_dict = {'cost':cost, 'uncer':uncer, 'bad':bad} + return cost_dict + +The code above defines a new class that inherits from the Interface class in M-LOOP. Note this code is different to the example above, we will consider this later. It is slightly more complicated than just defining a method, however there is a lot more flexibility when taking this approach. You should put the code you use to run your experiment in the *get_next_cost_dict* method. This method is executed by the interface whenever M-LOOP wants a cost corresponding to a set of parameters. + +When you actually run M-LOOP you will need to make an instance of your interface. To make an instance of the class above you would use:: + + interface = SimpleInterface() + +This interface is then provided to the controller, which is discussed in the next section. + +Dictionaries are used for both input and output of the method, to give the user flexibility. For example, if you had a bad run, you do not have to return a cost and uncertainty, you can just return a dictionary with bad set to True:: + + cost_dict = {'bad':True} + return cost_dict + +By taking an object oriented approach, M-LOOP can provide a lot more flexibility when controlling your experiment. For example if you wish to start up your experiment or perform some initial numerical analysis you can add a customized constructor or __init__ method for the class. We consider this in the main example:: + + class CustomInterface(mli.Interface): + + #Initialization of the interface, including this method is optional + def __init__(self): + #You must include the super command to call the parent class, Interface, constructor + super(CustomInterface,self).__init__() + + #Attributes of the interface can be added here + #If you want to pre-calculate any variables etc. this is the place to do it + #In this example we will just define the location of the minimum + self.minimum_params = np.array([0,0.1,-0.1]) + + #You must include the get_next_cost_dict method in your class + #this method is called whenever M-LOOP wants to run an experiment + def get_next_cost_dict(self,params_dict): + + #Get parameters from the provided dictionary + params = params_dict['params'] + + #Here you can include the code to run your experiment given a particular set of parameters + #In this example we will just evaluate a sum of sinc functions + cost = -np.sum(np.sinc(params - self.minimum_params)) + #There is no uncertainty in our result + uncer = 0 + #The evaluation will always be a success + bad = False + #Add a small time delay to mimic a real experiment + time.sleep(1) + + #The cost, uncertainty and bad boolean must all be returned as a dictionary + #You can include other variables you want to record as well if you want + cost_dict = {'cost':cost, 'uncer':uncer, 'bad':bad} + return cost_dict + +In this code snippet we also implement a constructor. Here we just define a numpy array which defines the minimum_parameter values. We can call this variable whenever we need in the *get_next_cost_dict method*. You can also define your own custom methods in your interface or even inherit from other classes. + +Once you have implemented your own Interface running M-LOOP can be done in three lines. + +Running M-LOOP +-------------- + +Once you have made your interface class running M-LOOP can be as simple as three lines. In the example script M-LOOP is run in the main method:: + + def main(): + #M-LOOP can be run with three commands + + #First create your interface + interface = CustomInterface() + #Next create the controller, provide it with your controller and any options you want to set + controller = mlc.create_controller(interface, max_num_runs = 1000, target_cost = -2.99, num_params = 3, min_boundary = [-2,-2,-2], max_boundary = [2,2,2]) + #To run M-LOOP and find the optimal parameters just use the controller method optimize + controller.optimize() + +In the code snippet we first make an instance of our custom interface class called interface. We then create an instance of a controller. The controller will run the experiment and perform the optimization. You must provide the controller with the interface and any of the M-LOOP options you would normally provide in the configuration file. In this case we give five options, which do the following: + +1. *max_num_runs = 1000* sets the maximum number of runs to be 1000. +2. *target_cost = -2.99* sets a cost that M-LOOP will halt at once it has been reached. +3. *num_params = 3* sets the number of parameters to be 3. +4. *min_boundary = [-2,-2,-2]* defines the minimum values of each of the parameters. +5. *max_boundary = [2,2,2]* defines the maximum values of each of the parameters. + +There are many other options you can use. Have a look at :ref:`sec-configuration-file` for a detailed introduction into all the important configuration options. Remember you can include any option you would include in a configuration file as keywords for the controller. For more options you should look at all the config files in :ref:`sec-examples`, or for a comprehensive list look at the :ref:`sec-api`. + +Once you have created your interface and controller you can run M-LOOP by calling the optimize method of the controller. So in summary M-LOOP is executed in three lines:: + + interface = CustomInterface() + controller = mlc.create_controller(interface, [options]) + controller.optimize() + +Results +------- + +The results will be displayed on the console and also saved in a set of files. Have a read over :ref:`sec-results` for more details on the results displayed and saved. Also read :ref:`sec-data` for more details on data formats and how it is stored. + +Within the python environment you can also access the results as attributes of the controller after it has finished optimization. The example includes a simple demonstration of this:: + + #The results of the optimization will be saved to files and can also be accessed as attributes of the controller. + print('Best parameters found:') + print(controller.best_params) + +All of the results saved in the controller archive can be directly accessed as attributes of the controller object. For a comprehensive list of the attributes of the controller generated after an optimization run see the :ref:`sec-api`. + +Visualizations +-------------- + +For each controller there is normally a default set of visualizations available. The visualizations for the Gaussian Process, the default optimization algorithm, is described in :ref:`sec-visualizations`. Visualizations can be called through the visualization module. The example includes a simple demonstration of this:: + + #You can also run the default sets of visualizations for the controller with one command + mlv.show_all_default_visualizations(controller) + +This code snippet will display all the visualizations available for that controller. There are many other visualization methods and options available that let you control which plots are displayed and when, see the :ref:`sec-api` for details. + + + + + + + + + diff --git a/docs/visualizations.rst b/docs/visualizations.rst new file mode 100644 index 0000000..6355123 --- /dev/null +++ b/docs/visualizations.rst @@ -0,0 +1,48 @@ +.. _sec-visualizations: + +============== +Visualizations +============== + +At the end of an optimization run a set of visualizations will be produce by default. + +.. figure:: _static/M-LOOP_visualizations.png + :alt: Six visualizations of data produced by M-LOOP. + + An example of the six visualizations automatically produced when M-LOOP is run with the default controller, the Gaussian process machine learner. + +The number of visualizations will depend on what controller you use. By default there should be six which are described below: + +- **Controller: Cost vs run number.** Here the returned by the experiment versus run number is plotted. The legend shows what algorithm was used to generate the parameters tested by the experiment. If you use the Gaussian process, there will also be another algorithm used throughout the optimization algorithm in order to (a) ensure parameters are generated fast enough and (b) add new prior free data to ensure the Gaussian process converges to the correct model. + +- **Controller: Parameters vs run number.** The parameters values are all plotted against the run number. Note the parameters will all be scaled between their minimum and maximum value. the legend indicates what color corresponds to what parameter. + +- **Controller: Cost vs parameters.** The cost versus the parameters. Here each of the parameters tested are plotted against the cost they returned as a set. Again the parameter values are all scaled between their minimum and maximum values. + +- **GP Learner: Predicted landscape.** 1D cross sections of the landscape about the best recorded cost are plotted against each parameter. The color of the cross section corresponds to the parameter that is varied in the cross section. This predicted landscape is generated by the model fit to the experiment by the Gaussian process. Be sure to check after an optimization run that all parameters contributed. If one parameter produces a flat cross section, it is most likely it did not have any influence on the final cost. You may want to remove it on the next optimization run. + +- **GP Learner: Log of length scales vs fit number.** The Gaussian process fits a correlation length to each of the parameters in the experiment. Here we see a plot of the correlation lengths versus fit number. The last correlation lengths (highest fit number) is the most reliable values. Correlation lengths indicate how sensitive the cost is to changes in these parameters. If the correlation length is large, the parameter has a very little influence on the cost, if the correlation length is small, the parameter will have a very large influence on the cost. The correlation lengths are not precisely estimate. They should only be trusted accurate to +/- an order of magnitude. If a parameter has an extremely large value at the end of the optimization, say 5 or more, it is unlikely to have much affect on the cost and should be removed on the next optimization run. + +- **GP Learner: Noise level vs fit number.** This is the estimated noise in the costs as a function of fit number. The most reliable estimate of the noise level will be the last value (highest fit number). The noise level is useful for quantifying the intrinsic noise and uncertainty in your cost value. Most other optimization algorithms will not provide this estimate. The noise level estimate may be helpful when isolating what part of your system can be optimized and what part is due to random fluctuations. + +The plots which start with *Controller:* are generated from the controller archive, while plots that start with *Learner:* are generated from the learner archive. + +Reproducing visualizations +========================== + +If you have a controller and learner archive and would like to examine the visualizations again, it is best to do so using the :ref:`sec-api`. For example the following code will plot the visualizations again from the files *controller_archive_2016-08-23_13-59.mat* and *learner_archive_2016-08-18_12-18.pkl*:: + + import mloop.visualizations as mlv + import matplotlib.pyplot as plt + + mlv.configure_plots() + mlv.create_contoller_visualizations('controller_archive_2016-08-23_13-59.mat',file_type='mat') + mlv.create_gaussian_process_learner_visualizations('learner_archive_2016-08-18_12-18.pkl',file_type='pkl') + + plt.show() + + + + + + diff --git a/examples/complete_controller_config.txt b/examples/complete_controller_config.txt deleted file mode 100644 index effa6e0..0000000 --- a/examples/complete_controller_config.txt +++ /dev/null @@ -1,10 +0,0 @@ -#Controller Options -#----------------- - -#General options -max_num_runs = 1000 #number of planned runs -target_cost = 0.1 #cost to beat -max_repeats_without_better_params = 10 #max allowed number of runs between finding better parameters -controller_archive_filename = 'test' #filename prefix for controller archive -controller_archive_file_type = 'mat' #file_type for controller archive -archive_extra_dict = {'test':'this_is'} #dictionary of any data to be put in archive diff --git a/examples/controller_config.txt b/examples/controller_config.txt new file mode 100644 index 0000000..2c226dc --- /dev/null +++ b/examples/controller_config.txt @@ -0,0 +1,19 @@ +#General Controller Options +#-------------------------- + +#Halting conditions +max_num_runs = 1000 #number of planned runs +target_cost = 0.1 #cost to beat +max_num_runs_without_better_params = 100 #max allowed number of runs between finding better parameters + +#Parameter controls +num_params = 2 #Number of parameters +min_boundary = [0,0] #Minimum value for each parameter +max_boundary = [2,2] #Maximum value for each parameter + +#Filename related +controller_archive_filename = 'agogo' #filename prefix for controller archive +controller_archive_file_type = 'mat' #file_type for controller archive +learner_archive_filename = 'ogoga' #filename prefix for learner archive +learner_archive_file_type = 'pkl' #file_type for learner archive +archive_extra_dict = {'test':'this_is'} #dictionary of any extra data to be put in archive \ No newline at end of file diff --git a/examples/differential_evolution_complete_config.txt b/examples/differential_evolution_complete_config.txt new file mode 100644 index 0000000..88a8547 --- /dev/null +++ b/examples/differential_evolution_complete_config.txt @@ -0,0 +1,19 @@ +#Differential Evolution Complete Options +#--------------------------------------- + +#General options +max_num_runs = 500 #number of planned runs +target_cost = 0.1 #cost to beat + +#Differential evolution controller options +controller_type = 'differential_evolution' +num_params = 2 #number of parameters +min_boundary = [-1.2,-2] #minimum boundary +max_boundary = [10.0,4] #maximum boundary +trust_region = [3.2,3.1] #maximum move distance from best params +first_params = None #first parameters to try if None a random set of parameters is chosen +evolution_strategy='best2' #evolution strategy can be 'best1', 'best2', 'rand1' and 'rand2'. Best uses the best point, rand uses a random one, the number indicates the number of directions added. +population_size=10 #a multiplier for the population size of a generation +mutation_scale=(0.4, 1.1) #the minimum and maximum value for the mutation scale factor. Each generation is randomly selected from this. Each value must be between 0 and 2. +cross_over_probability=0.8 #the probability a parameter will be resampled during a mutation in a new generation +restart_tolerance=0.02 #the fraction the standard deviation in the costs of the population must reduce from the initial sample, before the search is restarted. \ No newline at end of file diff --git a/examples/differential_evolution_simple_config.txt b/examples/differential_evolution_simple_config.txt new file mode 100644 index 0000000..d4615a0 --- /dev/null +++ b/examples/differential_evolution_simple_config.txt @@ -0,0 +1,15 @@ +#Differential Evolution Basic Options +#------------------------------------ + +#General options +max_num_runs = 500 #number of planned runs +target_cost = 0.1 #cost to beat + +#Differential evolution controller options +controller_type = 'differential_evolution' +num_params = 1 #number of parameters +min_boundary = [-4.8] #minimum boundary +max_boundary = [10.0] #maximum boundary +trust_region = 0.6 #maximum % move distance from best params +first_params = [5.3] #first parameters to try + diff --git a/examples/complete_extras_config.txt b/examples/extras_config.txt similarity index 54% rename from examples/complete_extras_config.txt rename to examples/extras_config.txt index f85f8b7..7a3ff48 100644 --- a/examples/complete_extras_config.txt +++ b/examples/extras_config.txt @@ -1,3 +1,4 @@ -#Extras -#----- +#Extra Options +#------------- + visualizations=False #whether plots should be presented after run \ No newline at end of file diff --git a/examples/file_interface_config.txt b/examples/file_interface_config.txt new file mode 100644 index 0000000..9412123 --- /dev/null +++ b/examples/file_interface_config.txt @@ -0,0 +1,7 @@ +#File Interface Options +#---------------------- + +interface_type = 'file' #The type of interface +interface_out_filename = 'exp_input' #The filename of the file output by the interface and input into the experiment +interface_in_filename = 'exp_output' #The filename o the file input into the interface and output by the experiment +interface_file_type = 'txt' #The file_type of both the input and output files, can be 'txt', 'pkl' or 'mat'. diff --git a/examples/complete_gaussian_process_config.txt b/examples/gaussian_process_complete_config.txt similarity index 74% rename from examples/complete_gaussian_process_config.txt rename to examples/gaussian_process_complete_config.txt index 598a4a1..2e50dbe 100644 --- a/examples/complete_gaussian_process_config.txt +++ b/examples/gaussian_process_complete_config.txt @@ -1,5 +1,5 @@ -#Controller Options -#------------------ +#Gaussian Process Complete Options +#--------------------------------- #General options max_num_runs = 100 #number of planned runs @@ -8,7 +8,7 @@ target_cost = 0.1 #cost to beat #Gaussian process options controller_type = 'gaussian_process' num_params = 2 #number of parameters -min_boundary = [-10.,-10.] #minimum boundary +min_boundary = [-10.,-10.] #minimum boundary max_boundary = [10.,10.] #maximum boundary length_scale = [1.0] #initial lengths scales for GP cost_has_noise = True #whether cost function has noise @@ -17,15 +17,16 @@ update_hyperparameters = True #whether noise level and lengths scales a trust_region = [5,5] #maximum move distance from best params default_bad_cost = 10 #default cost for bad run default_bad_uncertainty = 1 #default uncertainty for bad run -gp_archive_filename = 'cpg_run' #filename for archive -gp_archive_file_type = 'txt' #file type of archive +learner_archive_filename = 'a_word' #filename of gp archive +learner_archive_file_type = 'mat' #file type of archive predict_global_minima_at_end = True #find predicted global minima at end predict_local_minima_at_end = True #find all local minima of landscape at end +no_delay = True #whether to wait for the GP to make predictions or not. Default True (do not wait) #Training source options training_type = 'random' #training type can be random or nelder_mead first_params = [1.9,-1.0] #first parameters to try in initial training -gp_training_filename = None #training data filename +gp_training_filename = None #filename for training from previous experiment gp_training_file_type = 'pkl' #training data file type #if you use nelder_mead for the initial training source see the CompleteNelderMeadConfig.txt for options. diff --git a/examples/simple_gaussian_process_config.txt b/examples/gaussian_process_simple_config.txt similarity index 89% rename from examples/simple_gaussian_process_config.txt rename to examples/gaussian_process_simple_config.txt index 0fbb7de..a5f0c62 100644 --- a/examples/simple_gaussian_process_config.txt +++ b/examples/gaussian_process_simple_config.txt @@ -1,5 +1,5 @@ -#Controller Options -#----------------- +#Gaussian Process Basic Options +#------------------------------ #General options max_num_runs = 100 #number of planned runs diff --git a/examples/complete_logging_config.txt b/examples/logging_config.txt similarity index 100% rename from examples/complete_logging_config.txt rename to examples/logging_config.txt diff --git a/examples/complete_nelder_mead_config.txt b/examples/nelder_mead_complete_config.txt similarity index 91% rename from examples/complete_nelder_mead_config.txt rename to examples/nelder_mead_complete_config.txt index a8269b5..26243a3 100644 --- a/examples/complete_nelder_mead_config.txt +++ b/examples/nelder_mead_complete_config.txt @@ -1,5 +1,5 @@ -#Controller Options -#----------------- +#Nelder-Mead Complete Options +#---------------------------- #General options max_num_runs = 100 #number of planned runs diff --git a/examples/simple_nelder_mead_config.txt b/examples/nelder_mead_simple_config.txt similarity index 88% rename from examples/simple_nelder_mead_config.txt rename to examples/nelder_mead_simple_config.txt index 24b3738..b038981 100644 --- a/examples/simple_nelder_mead_config.txt +++ b/examples/nelder_mead_simple_config.txt @@ -1,5 +1,5 @@ -#Controller Options -#----------------- +#Nelder-Mead Basic Options +#------------------------- #General options max_num_runs = 100 #number of planned runs diff --git a/examples/python__controlled_experiment.py b/examples/python__controlled_experiment.py new file mode 100644 index 0000000..96572ec --- /dev/null +++ b/examples/python__controlled_experiment.py @@ -0,0 +1,74 @@ +#Imports for python 2 compatibility +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +#Imports for M-LOOP +import mloop.interfaces as mli +import mloop.controllers as mlc +import mloop.visualizations as mlv + +#Other imports +import numpy as np +import time + +#Declare your custom class that inherets from the Interface class +class CustomInterface(mli.Interface): + + #Initialization of the interface, including this method is optional + def __init__(self): + #You must include the super command to call the parent class, Interface, constructor + super(CustomInterface,self).__init__() + + #Attributes of the interface can be added here + #If you want to precalculate any variables etc. this is the place to do it + #In this example we will just define the location of the minimum + self.minimum_params = np.array([0,0.1,-0.1]) + + #You must include the get_next_cost_dict method in your class + #this method is called whenever M-LOOP wants to run an experiment + def get_next_cost_dict(self,params_dict): + + #Get parameters from the provided dictionary + params = params_dict['params'] + + #Here you can include the code to run your experiment given a particular set of parameters + #In this example we will just evaluate a sum of sinc functions + cost = -np.sum(np.sinc(params - self.minimum_params)) + #There is no uncertainty in our result + uncer = 0 + #The evaluation will always be a success + bad = False + #Add a small time delay to mimic a real experiment + time.sleep(1) + + #The cost, uncertainty and bad boolean must all be returned as a dictionary + #You can include other variables you want to record as well if you want + cost_dict = {'cost':cost, 'uncer':uncer, 'bad':bad} + return cost_dict + +def main(): + #M-LOOP can be run with three commands + + #First create your interface + interface = CustomInterface() + #Next create the controller, provide it with your controller and any options you want to set + controller = mlc.create_controller(interface, + max_num_runs = 1000, + target_cost = -2.99, + num_params = 3, + min_boundary = [-2,-2,-2], + max_boundary = [2,2,2]) + #To run M-LOOP and find the optimal parameters just use the controller method optimize + controller.optimize() + + #The results of the optimization will be saved to files and can also be accessed as attributes of the controller. + print('Best parameters found:') + print(controller.best_params) + + #You can also run the default sets of visualizations for the controller with one command + mlv.show_all_default_visualizations(controller) + + +#Ensures main is run when this code is run as a script +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/examples/complete_random_config.txt b/examples/random_complete_config.txt similarity index 89% rename from examples/complete_random_config.txt rename to examples/random_complete_config.txt index 64b80a8..f6f5889 100644 --- a/examples/complete_random_config.txt +++ b/examples/random_complete_config.txt @@ -1,5 +1,5 @@ -#Controller Options -#------------------ +#Random Complete Options +#----------------------- #General options max_num_runs = 20 #number of planned runs diff --git a/examples/simple_random_config.txt b/examples/random_simple_config.txt similarity index 90% rename from examples/simple_random_config.txt rename to examples/random_simple_config.txt index df94ba5..9e9d85a 100644 --- a/examples/simple_random_config.txt +++ b/examples/random_simple_config.txt @@ -1,5 +1,5 @@ -#Controller Options -#----------------- +#Random Basic Options +#-------------------- #General options max_num_runs = 10 #number of planned runs diff --git a/examples/shell_interface_config.txt b/examples/shell_interface_config.txt new file mode 100644 index 0000000..7fa786e --- /dev/null +++ b/examples/shell_interface_config.txt @@ -0,0 +1,6 @@ +#Command Line Interface Options +#------------------------------ + +interface_type = 'shell' #The type of interface +command = 'python shell_script.py' #The command for the command line to run the experiment to get a cost from the parameters +params_args_type = 'direct' #The format of the parameters when providing them on the command line. 'direct' simply appends them, e.g. python shell_script.py 7 2 1, 'named' names each parameter, e.g. python shell_script.py --param1 7 --param2 2 --param3 1 \ No newline at end of file diff --git a/examples/tutorial_config.txt b/examples/tutorial_config.txt new file mode 100644 index 0000000..cd07d29 --- /dev/null +++ b/examples/tutorial_config.txt @@ -0,0 +1,31 @@ +#Tutorial Config +#--------------- + +#Interface settings +interface_type = 'file' + +#Parameter settings +num_params = 2 #number of parameters +min_boundary = [-1,-1] #minimum boundary +max_boundary = [1,1] #maximum boundary +first_params = [0.5,0.5] #first parameters to try +trust_region = 0.4 #maximum % move distance from best params + +#Halting conditions +max_num_runs = 1000 #maximum number of runs +max_num_runs_without_better_params = 50 #maximum number of runs without finding better parameters +target_cost = 0.01 #optimization halts when a cost below this target is found + +#Learner options +cost_has_noise = True #whether the cost are corrupted by noise or not + +#Timing options +no_delay = True #wait for learner to make generate new parameters or use training algorithms + +#File format options +interface_file_type = 'txt' #file types of *exp_input.mat* and *exp_output.mat* +controller_archive_file_type = 'mat' #file type of the controller archive +learner_archive_file_type = 'pkl' #file type of the learner archive + +#Visualizations +visualizations = True \ No newline at end of file diff --git a/mloop/__init__.py b/mloop/__init__.py index 11fc455..9e53155 100644 --- a/mloop/__init__.py +++ b/mloop/__init__.py @@ -7,12 +7,10 @@ To contribute to the project or report a bug visit the project's github https://github.com/michaelhush/M-LOOP. ''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type import os -__version__= "2.0.0" -__all__ = ['controllers','interfaces','launchers','learners','testing','utilities','visualizations'] - -#Add a null handler in case the user does not run config_logger() before running the optimization -import logging -logging.getLogger(__name__).addHandler(logging.NullHandler()) \ No newline at end of file +__version__= "2.1.1" +__all__ = ['controllers','interfaces','launchers','learners','testing','utilities','visualizations','cmd'] \ No newline at end of file diff --git a/bin/M-LOOP b/mloop/cmd.py similarity index 57% rename from bin/M-LOOP rename to mloop/cmd.py index 38e3048..22ae662 100644 --- a/bin/M-LOOP +++ b/mloop/cmd.py @@ -1,25 +1,29 @@ -#! /usr/bin/env python - ''' -M-LOOP Launcher - -Starts an instance of M-LOOP configured using a configuration file. - -Takes the following command line options - --c filename for configuration file - --h display help - -the default name for the configuration is "ExpConfig.txt" +Module of command line tools that can be used to execute mloop. ''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type import sys import argparse import mloop as ml import mloop.launchers as mll +import multiprocessing as mp -def main(argv): +def run_mloop(): + ''' + M-LOOP Launcher + + Starts an instance of M-LOOP configured using a configuration file. + + Takes the following command line options + + -c filename for configuration file + + -h display help + + the default name for the configuration is "ExpConfig.txt" + ''' parser = argparse.ArgumentParser(description='M-LOOP Launcher \n Version:' + ml.__version__+'\n \n Starts a new instance of M-LOOP based a on configuration file.', formatter_class=argparse.RawDescriptionHelpFormatter) @@ -32,8 +36,4 @@ def main(argv): _ = mll.launch_from_file(config_filename) -if __name__=="__main__": - main(sys.argv[1:]) - - - + \ No newline at end of file diff --git a/mloop/controllers.py b/mloop/controllers.py index 7070dc9..d018367 100644 --- a/mloop/controllers.py +++ b/mloop/controllers.py @@ -1,26 +1,27 @@ ''' Module of all the controllers used in M-LOOP. The controllers, as the name suggests, control the interface to the experiment and all the learners employed to find optimal parameters. ''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type -import queue import datetime import mloop.utilities as mlu import mloop.learners as mll import mloop.interfaces as mli -import multiprocessing as mp -import logging.handlers +import logging import os -controller_dict = {'random':1,'nelder_mead':2,'gaussian_process':3} -number_of_controllers = 3 +controller_dict = {'random':1,'nelder_mead':2,'gaussian_process':3,'differential_evolution':4} +number_of_controllers = 4 default_controller_archive_filename = 'controller_archive' +default_controller_archive_file_type = 'txt' class ControllerInterrupt(Exception): ''' Exception that is raised when the controlled is ended with the end flag or event. ''' def __init__(self): - super().__init__() + super(ControllerInterrupt,self).__init__() def create_controller(interface, controller_type='gaussian_process', @@ -46,6 +47,8 @@ def create_controller(interface, controller_type = str(controller_type) if controller_type=='gaussian_process': controller = GaussianProcessController(interface, **controller_config_dict) + elif controller_type=='differential_evolution': + controller = DifferentialEvolutionController(interface, **controller_config_dict) elif controller_type=='nelder_mead': controller = NelderMeadController(interface, **controller_config_dict) elif controller_type=='random': @@ -74,11 +77,11 @@ class Controller(): Keyword Args: max_num_runs (Optional [float]): The number of runs before the controller stops. If set to float('+inf') the controller will run forever. Default float('inf'), meaning the controller will run until another condition is met. target_cost (Optional [float]): The target cost for the run. If a run achieves a cost lower than the target, the controller is stopped. Default float('-inf'), meaning the controller will run until another condition is met. - max_repeats_without_better_params (Otional [float]): Puts a limit on the number of runs are allowed before a new better set of parameters is found. Default float('inf'), meaning the controller will run until another condition is met. + max_num_runs_without_better_params (Otional [float]): Puts a limit on the number of runs are allowed before a new better set of parameters is found. Default float('inf'), meaning the controller will run until another condition is met. controller_archive_filename (Optional [string]): Filename for archive. Contains costs, parameter history and other details depending on the controller type. Default 'ControllerArchive.mat' controller_archive_file_type (Optional [string]): File type for archive. Can be either 'txt' a human readable text file, 'pkl' a python dill file, 'mat' a matlab file or None if there is no archive. Default 'mat'. archive_extra_dict (Optional [dict]): A dictionary with any extra variables that are to be saved to the archive. If None, nothing is added. Default None. - start_datetime (datetime): Datetime for when controller was started. + start_datetime (Optional datetime): Datetime for when controller was started. Attributes: params_out_queue (queue): Queue for parameters to next be run by experiment. @@ -88,7 +91,6 @@ class Controller(): learner_params_queue (queue): The parameters queue for the learner learner_costs_queue (queue): The costs queue for the learner end_learner (event): Event used to trigger the end of the learner - log_queue (queue): Queue used to safely pipe log data from the learner num_in_costs (int): Counter for the number of costs received. num_out_params (int): Counter for the number of parameters received. out_params (list): List of all parameters sent out by controller. @@ -105,13 +107,17 @@ class Controller(): def __init__(self, interface, max_num_runs = float('+inf'), target_cost = float('-inf'), - max_repeats_without_better_params = float('+inf'), + max_num_runs_without_better_params = float('+inf'), controller_archive_filename=default_controller_archive_filename, - controller_archive_file_type='pkl', + controller_archive_file_type=default_controller_archive_file_type, archive_extra_dict = None, start_datetime = None, **kwargs): + #Make logger + self.remaining_kwargs = mlu._config_logger(**kwargs) + self.log = logging.getLogger(__name__) + #Variable that are included in archive self.num_in_costs = 0 self.num_out_params = 0 @@ -145,13 +151,6 @@ def __init__(self, interface, self.end_learner = None self.learner = None - #Create a logger that is multiprocessing safe. - self.log = logging.getLogger(__name__) - self.log_queue = mp.Queue() - self.log_queue_listener = logging.handlers.QueueListener(self.log_queue, - *logging.getLogger('mloop').handlers, - respect_handler_level=True) - #Variables set by user #save interface and extract important variables @@ -164,7 +163,6 @@ def __init__(self, interface, self.params_out_queue = interface.params_out_queue self.costs_in_queue = interface.costs_in_queue self.end_interface = interface.end_event - self.interface.add_mp_safe_log(self.log_queue) #Other options if start_datetime is None: @@ -176,9 +174,9 @@ def __init__(self, interface, self.log.error('Number of runs must be greater than zero. max_num_runs:'+repr(self.max_num_run)) raise ValueError self.target_cost = float(target_cost) - self.max_repeats_without_better_params = float(max_repeats_without_better_params) - if self.max_repeats_without_better_params<=0: - self.log.error('Max number of repeats must be greater than zero. max_num_runs:'+repr(max_repeats_without_better_params)) + self.max_num_runs_without_better_params = float(max_num_runs_without_better_params) + if self.max_num_runs_without_better_params<=0: + self.log.error('Max number of repeats must be greater than zero. max_num_runs:'+repr(max_num_runs_without_better_params)) raise ValueError if mlu.check_file_type_supported(controller_archive_file_type): @@ -209,18 +207,16 @@ def __init__(self, interface, if archive_extra_dict is not None: self.archive_dict.update(archive_extra_dict) - self.remaining_kwargs = kwargs - self.log.debug('Controller init completed.') def check_end_conditions(self): ''' - Check whether either of the three end contions have been met: number_of_runs, target_cost or max_repeats_without_better_params. + Check whether either of the three end contions have been met: number_of_runs, target_cost or max_num_runs_without_better_params. Returns: bool : True, if the controlled should continue, False if the controller should end. ''' - return (self.num_in_costs < self.max_num_runs) and (self.best_cost > self.target_cost) and (self.num_last_best_cost < self.max_repeats_without_better_params) + return (self.num_in_costs < self.max_num_runs) and (self.best_cost > self.target_cost) and (self.num_last_best_cost < self.max_num_runs_without_better_params) def _update_controller_with_learner_attributes(self): ''' @@ -229,7 +225,6 @@ def _update_controller_with_learner_attributes(self): self.learner_params_queue = self.learner.params_out_queue self.learner_costs_queue = self.learner.costs_in_queue self.end_learner = self.learner.end_event - self.learner.add_mp_safe_log(self.log_queue) self.remaining_kwargs = self.learner.remaining_kwargs self.archive_dict.update({'num_params':self.learner.num_params, @@ -256,8 +251,8 @@ def _put_params_and_out_dict(self, params, param_type=None, **kwargs): self.out_extras.append(kwargs) if param_type is not None: self.out_type.append(param_type) - self.log.debug('Controller params=' + repr(params)) - self.log.debug('Put params num:' + repr(self.num_out_params )) + self.log.info('params ' + str(params)) + #self.log.debug('Put params num:' + repr(self.num_out_params )) def _get_cost_and_in_dict(self): ''' @@ -268,7 +263,7 @@ def _get_cost_and_in_dict(self): while True: try: in_dict = self.costs_in_queue.get(True, self.controller_wait) - except queue.Empty: + except mlu.empty_exception: continue else: break @@ -287,7 +282,7 @@ def _get_cost_and_in_dict(self): except ValueError: self.log.error('One of the values you provided in the cost dict could not be converted into the right type.') raise - if self.curr_bad and 'cost' in dict: + if self.curr_bad and ('cost' in in_dict): self.log.warning('The cost provided with the bad run will be saved, but not used by the learners.') self.in_costs.append(self.curr_cost) @@ -301,8 +296,11 @@ def _get_cost_and_in_dict(self): self.best_index = self.num_in_costs self.best_params = self.curr_params self.num_last_best_cost = 0 - self.log.debug('Controller cost=' + repr(self.curr_cost)) - self.log.debug('Got cost num:' + repr(self.num_in_costs)) + if self.curr_bad: + self.log.info('bad run') + else: + self.log.info('cost ' + str(self.curr_cost) + ' +/- ' + str(self.curr_uncer)) + #self.log.debug('Got cost num:' + repr(self.num_in_costs)) def save_archive(self): ''' @@ -336,6 +334,8 @@ def optimize(self): self._start_up() self._optimization_routine() log.info('Controller finished. Closing down M-LOOP. Please wait a moment...') + except ControllerInterrupt: + self.log.warning('Controller ended by interruption.') except (KeyboardInterrupt,SystemExit): log.warning('!!! Do not give the interrupt signal again !!! \n M-LOOP stopped with keyboard interupt or system exit. Please wait at least 1 minute for the threads to safely shut down. \n ') log.warning('Closing down controller.') @@ -353,7 +353,6 @@ def _start_up(self): ''' Start the learner and interface threads/processes. ''' - self.log_queue_listener.start() self.learner.start() self.interface.start() @@ -365,24 +364,26 @@ def _shut_down(self): self.end_learner.set() self.log.debug('Interface end event set.') self.end_interface.set() - self.learner.join() - self.log.debug('Learner joined.') #After 3 or 4 executions of mloop in same python environment, sometimes excution can be trapped here #Likely to be a bug with multiprocessing in python, but difficult to isolate. #current solution is to join with a timeout and kill if that fails - self.interface.join(self.interface.interface_wait*3) - if self.interface.is_alive(): - self.log.debug('Interface did not join in time had to terminate.') - self.interface.terminate() + self.learner.join() + self.log.debug('Learner joined.') + self.interface.join() self.log.debug('Interface joined.') - self.save_archive() - self.log_queue_listener.stop() - self.log.debug('Log listener stopped') + self.save_archive() def print_results(self): ''' Print results from optimization run to the logs ''' + self.log.info('Optimization ended because:-') + if self.num_in_costs >= self.max_num_runs: + self.log.info('Maximum number of runs reached.') + if self.best_cost <= self.target_cost: + self.log.info('Target cost reached.') + if self.num_last_best_cost >= self.max_num_runs_without_better_params: + self.log.info('Maximum number of runs without better params reached.') self.log.info('Results:-') self.log.info('Best parameters found:' + str(self.best_params)) self.log.info('Best cost returned:' + str(self.best_cost) + ' +/- ' + str(self.best_uncer)) @@ -393,22 +394,19 @@ def _optimization_routine(self): Runs controller main loop. Gives parameters to experiment and saves costs returned. ''' self.log.debug('Start controller loop.') - try: - next_params = self._first_params() - self._put_params_and_out_dict(next_params) + self.log.info('Run:' + str(self.num_in_costs +1)) + next_params = self._first_params() + self._put_params_and_out_dict(next_params) + self.save_archive() + self._get_cost_and_in_dict() + while self.check_end_conditions(): self.log.info('Run:' + str(self.num_in_costs +1)) + next_params = self._next_params() + self._put_params_and_out_dict(next_params) self.save_archive() self._get_cost_and_in_dict() - while self.check_end_conditions(): - next_params = self._next_params() - self._put_params_and_out_dict(next_params) - self.log.info('Run:' + str(self.num_in_costs +1)) - self.save_archive() - self._get_cost_and_in_dict() - self.log.debug('End controller loop.') - except ControllerInterrupt: - self.log.warning('Controller ended by interruption.') - + self.log.debug('End controller loop.') + def _first_params(self): ''' Checks queue to get first parameters. @@ -441,8 +439,9 @@ class RandomController(Controller): ''' def __init__(self, interface,**kwargs): - super().__init__(interface, **kwargs) + super(RandomController,self).__init__(interface, **kwargs) self.learner = mll.RandomLearner(start_datetime = self.start_datetime, + learner_archive_filename=None, **self.remaining_kwargs) self._update_controller_with_learner_attributes() @@ -472,7 +471,7 @@ class NelderMeadController(Controller): ''' def __init__(self, interface, **kwargs): - super().__init__(interface, **kwargs) + super(NelderMeadController,self).__init__(interface, **kwargs) self.learner = mll.NelderMeadLearner(start_datetime = self.start_datetime, **self.remaining_kwargs) @@ -491,6 +490,37 @@ def _next_params(self): self.learner_costs_queue.put(cost) return self.learner_params_queue.get() +class DifferentialEvolutionController(Controller): + ''' + Controller for the differential evolution learner. + + Args: + params_out_queue (queue): Queue for parameters to next be run by experiment. + costs_in_queue (queue): Queue for costs (and other details) that have been returned by experiment. + **kwargs (Optional [dict]): Dictionary of options to be passed to Controller parent class and differential evolution learner. + ''' + def __init__(self, interface, + **kwargs): + super(DifferentialEvolutionController,self).__init__(interface, **kwargs) + + self.learner = mll.DifferentialEvolutionLearner(start_datetime = self.start_datetime, + **self.remaining_kwargs) + + self._update_controller_with_learner_attributes() + self.out_type.append('differential_evolution') + + def _next_params(self): + ''' + Gets next parameters from differential evolution learner. + ''' + if self.curr_bad: + cost = float('inf') + else: + cost = self.curr_cost + self.learner_costs_queue.put(cost) + return self.learner_params_queue.get() + + class GaussianProcessController(Controller): @@ -508,15 +538,17 @@ class GaussianProcessController(Controller): ''' def __init__(self, interface, - training_type='random', + training_type='differential_evolution', num_training_runs=None, no_delay=True, num_params=None, min_boundary=None, max_boundary=None, trust_region=None, + learner_archive_filename = mll.default_learner_archive_filename, + learner_archive_file_type = mll.default_learner_archive_file_type, **kwargs): - super().__init__(interface, **kwargs) + super(GaussianProcessController,self).__init__(interface, **kwargs) self.last_training_cost = None self.last_training_bad = None @@ -541,6 +573,8 @@ def __init__(self, interface, min_boundary=min_boundary, max_boundary=max_boundary, trust_region=trust_region, + learner_archive_filename=None, + learner_archive_file_type=learner_archive_file_type, **self.remaining_kwargs) elif self.training_type == 'nelder_mead': @@ -548,7 +582,21 @@ def __init__(self, interface, num_params=num_params, min_boundary=min_boundary, max_boundary=max_boundary, + learner_archive_filename=None, + learner_archive_file_type=learner_archive_file_type, **self.remaining_kwargs) + + elif self.training_type == 'differential_evolution': + self.learner = mll.DifferentialEvolutionLearner(start_datetime=self.start_datetime, + num_params=num_params, + min_boundary=min_boundary, + max_boundary=max_boundary, + trust_region=trust_region, + evolution_strategy='rand2', + learner_archive_filename=None, + learner_archive_file_type=learner_archive_file_type, + **self.remaining_kwargs) + else: self.log.error('Unknown training type provided to Gaussian process controller:' + repr(training_type)) @@ -560,21 +608,22 @@ def __init__(self, interface, min_boundary=min_boundary, max_boundary=max_boundary, trust_region=trust_region, + learner_archive_filename=learner_archive_filename, + learner_archive_file_type=learner_archive_file_type, **self.remaining_kwargs) self.gp_learner_params_queue = self.gp_learner.params_out_queue self.gp_learner_costs_queue = self.gp_learner.costs_in_queue self.end_gp_learner = self.gp_learner.end_event self.new_params_event = self.gp_learner.new_params_event - self.gp_learner.add_mp_safe_log(self.log_queue) self.remaining_kwargs = self.gp_learner.remaining_kwargs self.generation_num = self.gp_learner.generation_num - + def _put_params_and_out_dict(self, params): ''' Override _put_params_and_out_dict function, used when the training learner creates parameters. Makes the defualt param_type the training type and sets last_training_run_flag. ''' - super()._put_params_and_out_dict(params, param_type=self.training_type) + super(GaussianProcessController,self)._put_params_and_out_dict(params, param_type=self.training_type) self.last_training_run_flag = True def _get_cost_and_in_dict(self): @@ -582,7 +631,7 @@ def _get_cost_and_in_dict(self): Call _get_cost_and_in_dict() of parent Controller class. But also sends cost to Gaussian process learner and saves the cost if the parameters came from a trainer. ''' - super()._get_cost_and_in_dict() + super(GaussianProcessController,self)._get_cost_and_in_dict() if self.last_training_run_flag: self.last_training_cost = self.curr_cost self.last_training_bad = self.curr_bad @@ -596,10 +645,20 @@ def _next_params(self): ''' Gets next parameters from training learner. ''' - if self.training_type == 'nelder_mead': - temp = NelderMeadController._next_params(self) + if self.training_type == 'differential_evolution' or self.training_type == 'nelder_mead': + #Copied from NelderMeadController + if self.last_training_bad: + cost = float('inf') + else: + cost = self.last_training_cost + self.learner_costs_queue.put(cost) + temp = self.learner_params_queue.get() + elif self.training_type == 'random': - temp = RandomController._next_params(self) + #Copied from RandomController + self.learner_costs_queue.put(self.best_params) + temp = self.learner_params_queue.get() + else: self.log.error('Unknown training type called. THIS SHOULD NOT HAPPEN') return temp @@ -608,7 +667,7 @@ def _start_up(self): ''' Runs pararent method and also starts training_learner. ''' - super()._start_up() + super(GaussianProcessController,self)._start_up() self.log.debug('GP learner started.') self.gp_learner.start() @@ -616,42 +675,50 @@ def _optimization_routine(self): ''' Overrides _optimization_routine. Uses the parent routine for the training runs. Implements a customized _optimization_rountine when running the Gaussian Process learner. ''' - #Run the training runs using the standard optimization routine. Adjust the number of max_runs - save_max_num_runs = self.max_num_runs - self.max_num_runs = self.num_training_runs - 1 + #Run the training runs using the standard optimization routine. self.log.debug('Starting training optimization.') - super()._optimization_routine() - - #Start last training run - next_params = self._next_params() - self._put_params_and_out_dict(next_params) - - #Begin GP optimization routine - self.max_num_runs = save_max_num_runs - - self.log.debug('Starting GP optimization.') - self.new_params_event.set() self.log.info('Run:' + str(self.num_in_costs +1)) + next_params = self._first_params() + self._put_params_and_out_dict(next_params) self.save_archive() self._get_cost_and_in_dict() + while (self.num_in_costs < self.num_training_runs) and self.check_end_conditions(): + self.log.info('Run:' + str(self.num_in_costs +1)) + next_params = self._next_params() + self._put_params_and_out_dict(next_params) + self.save_archive() + self._get_cost_and_in_dict() + + if self.check_end_conditions(): + #Start last training run + self.log.info('Run:' + str(self.num_in_costs +1)) + next_params = self._next_params() + self._put_params_and_out_dict(next_params) + + self.log.debug('Starting GP optimization.') + self.new_params_event.set() + self.save_archive() + self._get_cost_and_in_dict() + self.log.debug('End training runs.') + + gp_consec = 0 + gp_count = 0 - gp_consec = 0 - gp_count = 0 while self.check_end_conditions(): + self.log.info('Run:' + str(self.num_in_costs +1)) if gp_consec==self.generation_num or (self.no_delay and self.gp_learner_params_queue.empty()): next_params = self._next_params() self._put_params_and_out_dict(next_params) gp_consec = 0 else: next_params = self.gp_learner_params_queue.get() - super()._put_params_and_out_dict(next_params, param_type='gaussian_process') + super(GaussianProcessController,self)._put_params_and_out_dict(next_params, param_type='gaussian_process') gp_consec += 1 gp_count += 1 if gp_count%self.generation_num == 2: self.new_params_event.set() - self.log.info('Run:' + str(self.num_in_costs +1)) self.save_archive() self._get_cost_and_in_dict() @@ -663,6 +730,7 @@ def _shut_down(self): self.log.debug('GP learner end set.') self.end_gp_learner.set() self.gp_learner.join() + self.log.debug('GP learner joined') last_dict = None while not self.gp_learner_params_queue.empty(): @@ -684,14 +752,14 @@ def _shut_down(self): self.archive_dict.update(last_dict) else: if self.gp_learner.predict_global_minima_at_end or self.gp_learner.predict_local_minima_at_end: - self.log.warning('GP Learner may not have closed properly unable to get best and/or all minima.') - super()._shut_down() + self.log.info('GP Learner did not provide best and/or all minima.') + super(GaussianProcessController,self)._shut_down() def print_results(self): ''' Adds some additional output to the results specific to controller. ''' - super().print_results() + super(GaussianProcessController,self).print_results() try: self.log.info('Predicted best parameters:' + str(self.predicted_best_parameters)) self.log.info('Predicted best cost:' + str(self.predicted_best_cost) + ' +/- ' + str(self.predicted_best_uncertainty)) diff --git a/mloop/interfaces.py b/mloop/interfaces.py index bc94ad4..589fea7 100644 --- a/mloop/interfaces.py +++ b/mloop/interfaces.py @@ -1,10 +1,15 @@ ''' Module of the interfaces used to connect the controller to the experiment. ''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type import time +import subprocess as sp +import numpy as np import os -import queue +import sys +import threading import multiprocessing as mp import mloop.utilities as mlu import mloop.testing as mlt @@ -16,7 +21,7 @@ def create_interface(interface_type='file', Start a new interface with the options provided. Args: - interface_type (Optional [str]): Defines the type of interface, currently the only option is 'file'. Default 'file'. + interface_type (Optional [str]): Defines the type of interface, can be 'file', 'shell' or 'test'. Default 'file'. **interface_config_dict : Options to be passed to interface. Returns: @@ -25,23 +30,31 @@ def create_interface(interface_type='file', log = logging.getLogger(__name__) if interface_type=='file': - file_interface = FileInterface(**interface_config_dict) + interface = FileInterface(**interface_config_dict) log.info('Using the file interface with the experiment.') + elif interface_type == 'shell': + interface = ShellInterface(**interface_config_dict) + log.info('Using the command line interface with the experiment.') + elif interface_type == 'test': + interface = TestInterface(**interface_config_dict) + log.info('Using the test interface with the experiment.') else: log.error('Unknown interface type:' + repr(interface_type)) raise ValueError - return file_interface + + + return interface class InterfaceInterrupt(Exception): ''' Exception that is raised when the interface is ended with the end event, or some other interruption. ''' def __init__(self): - super().__init__() + super(InterfaceInterrupt,self).__init__() -class Interface(mp.Process): +class Interface(threading.Thread): ''' A abstract class for interfaces which populate the costs_in_queue and read from the params_out_queue. Inherits from Thread @@ -55,15 +68,16 @@ class Interface(mp.Process): params_out_queue (queue): Queue for parameters to next be run by experiment. costs_in_queue (queue): Queue for costs (and other details) that have been returned by experiment. end_event (event): Event which triggers the end of the interface. - - + ''' - + def __init__(self, interface_wait = 1, **kwargs): - super().__init__() + super(Interface,self).__init__() + + self.remaining_kwargs = mlu._config_logger(**kwargs) self.log = logging.getLogger(__name__) self.log.debug('Creating interface.') @@ -75,17 +89,6 @@ def __init__(self, if self.interface_wait<=0: self.log.error('Interface wait time must be a positive number.') raise ValueError - - self.remaining_kwargs = kwargs - - def add_mp_safe_log(self,log_queue): - ''' - Add a multiprocess safe log based using a queue (which is presumed to be listened to by a QueueListener). - ''' - self.log = logging.getLogger(__name__) - que_handler = logging.handlers.QueueHandler(log_queue) - self.log.addHandler(que_handler) - self.log.propagate = False def run(self): ''' @@ -97,17 +100,17 @@ def run(self): while not self.end_event.is_set(): try: params_dict = self.params_out_queue.get(True, self.interface_wait) - except queue.Empty: + except mlu.empty_exception: continue else: - cost_dict = self._get_next_cost_dict(params_dict) + cost_dict = self.get_next_cost_dict(params_dict) self.costs_in_queue.put(cost_dict) except InterfaceInterrupt: pass self.log.debug('Interface ended') #self.log = None - def _get_next_cost_dict(self,params_dict): + def get_next_cost_dict(self,params_dict): ''' Abstract method. This is the only method that needs to be implemented to make a working interface. Given the parameters the interface must then produce a new cost. This may occur by running an experiment or program. If you wish to abruptly end this interface for whatever rease please raise the exception InterfaceInterrupt, which will then be safely caught. @@ -128,39 +131,33 @@ class FileInterface(Interface): costs_in_queue (queue): Queue for costs (and other details) that have been returned by experiment. Keyword Args: - out_filename (Optional [string]): filename for file written with parameters. - out_file_type (Optional [string]): currently supports: 'txt' where the output is a text file with the parameters as a list of numbers, and 'mat' a matlab file with variable parameters with the next_parameters. Default is 'mat'. - in_filename (Optional [string]): filename for file written with parameters. - in_file_type (Optional [string]): file type to be written either 'mat' for matlab or 'txt' for readible text file. Defaults to 'mat'. + interface_out_filename (Optional [string]): filename for file written with parameters. + interface_in_filename (Optional [string]): filename for file written with parameters. + interface_file_type (Optional [string]): file type to be written either 'mat' for matlab or 'txt' for readible text file. Defaults to 'txt'. ''' def __init__(self, - out_filename=mlu.default_out_filename, - out_file_type=mlu.default_out_file_type, - in_filename=mlu.default_in_filename, - in_file_type=mlu.default_in_file_type, + interface_out_filename=mlu.default_interface_out_filename, + interface_in_filename=mlu.default_interface_in_filename, + interface_file_type=mlu.default_interface_file_type, **kwargs): - super().__init__(**kwargs) + super(FileInterface,self).__init__(**kwargs) self.out_file_count = 0 self.in_file_count = 0 - if mlu.check_file_type_supported(out_file_type): - self.out_file_type = str(out_file_type) + if mlu.check_file_type_supported(interface_file_type): + self.out_file_type = str(interface_file_type) + self.in_file_type = str(interface_file_type) else: - self.log.error('File out type is not supported:' + out_file_type) - self.out_filename = str(out_filename) + self.log.error('File out type is not supported:' + interface_file_type) + self.out_filename = str(interface_out_filename) self.total_out_filename = self.out_filename + '.' + self.out_file_type - if mlu.check_file_type_supported(in_file_type): - self.in_file_type = str(in_file_type) - else: - self.log.error('File in type is not supported:' + in_file_type) - raise ValueError - self.in_filename = str(in_filename) + self.in_filename = str(interface_in_filename) self.total_in_filename = self.in_filename + '.' + self.in_file_type - def _get_next_cost_dict(self,params_dict): + def get_next_cost_dict(self,params_dict): ''' Implementation of file read in and out. Put parameters into a file and wait for a cost file to be returned. ''' @@ -170,6 +167,7 @@ def _get_next_cost_dict(self,params_dict): mlu.save_dict_to_file(self.last_params_dict,self.total_out_filename,self.out_file_type) while not self.end_event.is_set(): if os.path.isfile(self.total_in_filename): + time.sleep(mlu.filewrite_wait) #wait for file to be written to disk try: in_dict = mlu.get_dict_from_file(self.total_in_filename, self.in_file_type) except IOError: @@ -206,18 +204,14 @@ def __init__(self, test_landscape=None, **kwargs): - super().__init__(**kwargs) + super(TestInterface,self).__init__(**kwargs) if test_landscape is None: self.test_landscape = mlt.TestLandscape() else: self.test_landscape = test_landscape self.test_count = 0 - def add_mp_safe_log(self,log_queue): - super().add_mp_safe_log(log_queue) - self.test_landscape.add_mp_safe_log(log_queue) - - def _get_next_cost_dict(self, params_dict): + def get_next_cost_dict(self, params_dict): ''' Test implementation. Gets the next cost from the test_landscape. ''' @@ -231,8 +225,96 @@ def _get_next_cost_dict(self, params_dict): raise cost_dict = self.test_landscape.get_cost_dict(params) return cost_dict + + +class ShellInterface(Interface): + ''' + Interface for running programs from the shell. + + Args: + params_out_queue (queue): Queue for parameters to next be run by experiment. + costs_in_queue (queue): Queue for costs (and other details) that have been returned by experiment. + + Keyword Args: + command (Optional [string]): The command used to run the experiment. Default './run_exp' + params_args_type (Optional [string]): The style used to pass parameters. Can be 'direct' or 'named'. If 'direct' it is assumed the parameters are fed directly to the program. For example if I wanted to run the parameters [7,5,9] with the command './run_exp' I would use the syntax:: + + ./run_exp 7 5 9 + + 'named' on the other hand requires an option for each parameter. The options should be name --param1, --param2 etc. The same example as before would be :: + + ./run_exp --param1 7 --param2 5 --param3 9 + + Default 'direct'. + ''' + + def __init__(self, + command = './run_exp', + params_args_type = 'direct', + **kwargs): + + super(ShellInterface,self).__init__(**kwargs) + + #User defined variables + self.command = str(command) + if params_args_type == 'direct' or params_args_type == 'named': + self.params_args_type = str(params_args_type) + else: + self.log.error('params_args_type not recognized: ' + repr(params_args_type)) + + #Counters + self.command_count = 0 + + def get_next_cost_dict(self,params_dict): + ''' + Implementation of running a command with parameters on the command line and reading the result. + ''' + self.command_count += 1 + self.log.debug('Running command count' + repr(self.command_count)) + self.last_params_dict = params_dict + params = params_dict['params'] + curr_command = self.command + + if self.params_args_type == 'direct': + for p in params: + curr_command += ' ' + str(p) + elif self.params_args_type == 'named': + for ind,p in enumerate(params): + curr_command += ' ' + '--param' + str(ind +1) + ' ' + str(p) + else: + self.log.error('THIS SHOULD NOT HAPPEN. params_args_type not recognized') + + #execute command and look at output + cli_return = sp.check_output(curr_command.split()).decode(sys.stdout.encoding) + print(cli_return) + + tdict_string = '' + take_flag = False + for line in cli_return.splitlines(): + temp = (line.partition('#')[0]).strip('\n').strip() + if temp == 'M-LOOP_start' or temp == 'MLOOP_start': + take_flag = True + elif temp == 'M-LOOP_end' or temp == 'MLOOP_end': + take_flag = False + elif take_flag: + tdict_string += temp + ',' + + print(tdict_string) + + #Setting up words for parsing a dict, ignore eclipse warnings + array = np.array #@UnusedVariable + inf = float('inf') #@UnusedVariable + nan = float('nan') #@UnusedVariable + tdict = eval('dict('+tdict_string+')') + + return tdict + + + + + \ No newline at end of file diff --git a/mloop/launchers.py b/mloop/launchers.py index bedb3f7..a41e378 100644 --- a/mloop/launchers.py +++ b/mloop/launchers.py @@ -1,6 +1,9 @@ ''' Modules of launchers used to start M-LOOP. ''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type + import logging import mloop.utilities as mlu import mloop.controllers as mlc @@ -24,11 +27,8 @@ def launch_from_file(config_filename, except (IOError, OSError): print('Unable to open M-LOOP configuration file:' + repr(config_filename)) raise - file_kwargs.update(kwargs) #Main run sequence - #Create controller and extract unused keywords - file_kwargs = mlu._config_logger(**file_kwargs) #Create interface and extract unused keywords interface = mli.create_interface(**file_kwargs) file_kwargs = interface.remaining_kwargs diff --git a/mloop/learners.py b/mloop/learners.py index 42c5485..b4e8b76 100644 --- a/mloop/learners.py +++ b/mloop/learners.py @@ -3,23 +3,26 @@ Each learner is created and controlled by a controller. ''' -import queue -import multiprocessing as mp +from __future__ import absolute_import, division, print_function +__metaclass__ = type + import threading import numpy as np +import random import numpy.random as nr import scipy.optimize as so -import logging.handlers +import logging import datetime import os import mloop.utilities as mlu import sklearn.gaussian_process as skg import sklearn.gaussian_process.kernels as skk import sklearn.preprocessing as skp +import multiprocessing as mp learner_thread_count = 0 -default_nelder_mead_archive_filename = 'nelder_mead_archive' -default_gp_archive_filename = 'gaussian_process_archive' +default_learner_archive_filename = 'learner_archive' +default_learner_archive_file_type = 'txt' class LearnerInterrupt(Exception): ''' @@ -29,7 +32,7 @@ def __init__(self): ''' Create LearnerInterrupt. ''' - super().__init__() + super(LearnerInterrupt,self).__init__() class Learner(): @@ -43,10 +46,10 @@ class Learner(): min_boundary (Optional [array]): Array with minimimum values allowed for each parameter. Note if certain values have no minimum value you can set them to -inf for example [-1, 2, float('-inf')] is a valid min_boundary. If None sets all the boundaries to '-1'. Default None. max_boundary (Optional [array]): Array with maximum values allowed for each parameter. Note if certain values have no maximum value you can set them to +inf for example [0, float('inf'),3,-12] is a valid max_boundary. If None sets all the boundaries to '1'. Default None. learner_archive_filename (Optional [string]): Name for python archive of the learners current state. If None, no archive is saved. Default None. But this is typically overloaded by the child class. - log_queue (Optional [queue]): Queue for sending log messages to main logger. If None, default behavoir is to send warnings and above to console level. Default None. + learner_archive_file_type (Optional [string]): File type for archive. Can be either 'txt' a human readable text file, 'pkl' a python dill file, 'mat' a matlab file or None if there is no archive. Default 'mat'. log_level (Optional [int]): Level for the learners logger. If None, set to warning. Default None. start_datetime (Optional [datetime]): Start date time, if None, is automatically generated. - + Attributes: params_out_queue (queue): Queue for parameters created by learner. costs_in_queue (queue): Queue for costs to be used by learner. @@ -57,17 +60,16 @@ def __init__(self, num_params=None, min_boundary=None, max_boundary=None, - learner_archive_filename=None, - learner_archive_file_type='pkl', + learner_archive_filename=default_learner_archive_filename, + learner_archive_file_type=default_learner_archive_file_type, start_datetime=None, **kwargs): - super().__init__() + super(Learner,self).__init__() global learner_thread_count - learner_thread_count += 1 - - self.log = logging.getLogger(__name__) + learner_thread_count += 1 + self.log = logging.getLogger(__name__ + '.' + str(learner_thread_count)) self.learner_wait=float(1) @@ -127,16 +129,7 @@ def __init__(self, 'start_datetime':mlu.datetime_to_string(self.start_datetime)} self.log.debug('Learner init completed.') - - def add_mp_safe_log(self,log_queue): - ''' - Add a multiprocess safe log based using a queue (which is presumed to be listened to by a QueueListener). - ''' - self.log = logging.getLogger(__name__ + '.' + str(learner_thread_count)) - que_handler = logging.handlers.QueueHandler(log_queue) - self.log.addHandler(que_handler) - self.log.propagate = False - + def check_num_params(self,param): ''' Check the number of parameters is right. @@ -181,27 +174,27 @@ def put_params_and_get_cost(self, params, **kwargs): Returns: cost from the cost queue ''' - self.log.debug('Learner params='+repr(params)) + #self.log.debug('Learner params='+repr(params)) if not self.check_num_params(params): self.log.error('Incorrect number of parameters sent to queue.Params' + repr(params)) raise ValueError if not self.check_in_boundary(params): self.log.warning('Parameters sent to queue are not within boundaries. Params:' + repr(params)) - self.log.debug('Learner puts params.') + #self.log.debug('Learner puts params.') self.params_out_queue.put(params) - self.log.debug('Learner waiting for costs.') + #self.log.debug('Learner waiting for costs.') self.save_archive() while not self.end_event.is_set(): try: cost = self.costs_in_queue.get(True, self.learner_wait) - except queue.Empty: + except mlu.empty_exception: continue else: break else: self.log.debug('Learner end signal received. Ending') raise LearnerInterrupt - self.log.debug('Learner cost='+repr(cost)) + #self.log.debug('Learner cost='+repr(cost)) return cost def save_archive(self): @@ -211,8 +204,6 @@ def save_archive(self): self.update_archive() if self.learner_archive_filename is not None: mlu.save_dict_to_file(self.archive_dict, self.total_archive_filename, self.learner_archive_file_type) - else: - self.log.debug('Did not save archive file.') def update_archive(self): ''' @@ -269,20 +260,16 @@ class RandomLearner(Learner, threading.Thread): Keyword Args: min_boundary (Optional [array]): If set to None, overrides default learner values and sets it to a set of value 0. Default None. max_boundary (Optional [array]): If set to None overides default learner values and sets it to an array of value 1. Default None. - trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction. - random_archive_filename: Name for python archive of the learners current state. If None, no archive is saved. Default None. - random_archive_file_type: Type of archive. 'pkl' for pickle, 'mat' for matlab and 'txt' for text. + first_params (Optional [array]): The first parameters to test. If None will just randomly sample the initial condition. + trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction. ''' def __init__(self, trust_region=None, first_params=None, - random_archive_filename=None, - random_archive_file_type='pkl', **kwargs): - super().__init__(learner_archive_filename=random_archive_filename, - **kwargs) + super(RandomLearner,self).__init__(**kwargs) if not np.all(self.diff_boundary>0.0): self.log.error('All elements of max_boundary are not larger than min_boundary') @@ -332,7 +319,6 @@ def run(self): self._shut_down() self.log.debug('Ended Random Learner') - class NelderMeadLearner(Learner, threading.Thread): ''' Nelder-Mead learner. Executes the Nelder-Mead learner algorithm and stores the needed simplex to estimate the next points. @@ -359,12 +345,9 @@ def __init__(self, initial_simplex_corner=None, initial_simplex_displacements=None, initial_simplex_scale=None, - nelder_mead_archive_filename=default_nelder_mead_archive_filename, - nelder_mead_archive_file_type='pkl', **kwargs): - super().__init__(learner_archive_filename=nelder_mead_archive_filename, - **kwargs) + super(NelderMeadLearner,self).__init__(**kwargs) self.num_boundary_hits = 0 self.rho = 1 @@ -566,14 +549,305 @@ def run(self): self._shut_down() self.log.info('Ended Nelder-Mead') -def update_archive(self): + def update_archive(self): ''' Update the archive. ''' - self.archive_dict.update({'archive_type':'nelder_mead_learner', - 'simplex_parameters':self.simplex_params, + self.archive_dict.update({'simplex_parameters':self.simplex_params, 'simplex_costs':self.simplex_costs}) +class DifferentialEvolutionLearner(Learner, threading.Thread): + ''' + Adaption of the differential evolution algorithm in scipy. + + Args: + params_out_queue (queue): Queue for parameters sent to controller. + costs_in_queue (queue): Queue for costs for gaussian process. This must be tuple + end_event (event): Event to trigger end of learner. + + Keyword Args: + first_params (Optional [array]): The first parameters to test. If None will just randomly sample the initial condition. Default None. + trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction. + evolution_strategy (Optional [string]): the differential evolution strategy to use, options are 'best1', 'best1', 'rand1' and 'rand2'. The default is 'best2'. + population_size (Optional [int]): multiplier proportional to the number of parameters in a generation. The generation population is set to population_size * parameter_num. Default 15. + mutation_scale (Optional [tuple]): The mutation scale when picking new points. Otherwise known as differential weight. When provided as a tuple (min,max) a mutation constant is picked randomly in the interval. Default (0.5,1.0). + cross_over_probability (Optional [float]): The recombination constand or crossover probability, the probability a new points will be added to the population. + restart_tolerance (Optional [float]): when the current population have a spread less than the initial tolerance, namely stdev(curr_pop) < restart_tolerance stdev(init_pop), it is likely the population is now in a minima, and so the search is started again. + + Attributes: + has_trust_region (bool): Whether the learner has a trust region. + num_population_members (int): The number of parameters in a generation. + params_generations (list): History of the parameters generations. A list of all the parameters in the population, for each generation created. + costs_generations (list): History of the costs generations. A list of all the costs in the population, for each generation created. + init_std (float): The initial standard deviation in costs of the population. Calucalted after sampling (or resampling) the initial population. + curr_std (float): The current standard devation in costs of the population. Calculated after sampling each generation. + ''' + + def __init__(self, + first_params = None, + trust_region = None, + evolution_strategy='best1', + population_size=15, + mutation_scale=(0.5, 1), + cross_over_probability=0.7, + restart_tolerance=0.01, + **kwargs): + + super(DifferentialEvolutionLearner,self).__init__(**kwargs) + + if first_params is None: + self.first_params = float('nan') + else: + self.first_params = np.array(first_params, dtype=float) + if not self.check_num_params(self.first_params): + self.log.error('first_params has the wrong number of parameters:' + repr(self.first_params)) + raise ValueError + if not self.check_in_boundary(self.first_params): + self.log.error('first_params is not in the boundary:' + repr(self.first_params)) + raise ValueError + + self._set_trust_region(trust_region) + + if evolution_strategy == 'best1': + self.mutation_func = self._best1 + elif evolution_strategy == 'best2': + self.mutation_func = self._best2 + elif evolution_strategy == 'rand1': + self.mutation_func = self._rand1 + elif evolution_strategy == 'rand2': + self.mutation_func = self._rand2 + else: + self.log.error('Please select a valid mutation strategy') + raise ValueError + + self.evolution_strategy = evolution_strategy + self.restart_tolerance = restart_tolerance + + if len(mutation_scale) == 2 and (np.any(np.array(mutation_scale) <= 2) or np.any(np.array(mutation_scale) > 0)): + self.mutation_scale = mutation_scale + else: + self.log.error('Mutation scale must be a tuple with (min,max) between 0 and 2. mutation_scale:' + repr(mutation_scale)) + raise ValueError + + if cross_over_probability <= 1 and cross_over_probability >= 0: + self.cross_over_probability = cross_over_probability + else: + self.log.error('Cross over probability must be between 0 and 1. cross_over_probability:' + repr(cross_over_probability)) + + if population_size >= 5: + self.population_size = population_size + else: + self.log.error('Population size must be greater or equal to 5:' + repr(population_size)) + + self.num_population_members = self.population_size * self.num_params + + self.first_sample = True + + self.params_generations = [] + self.costs_generations = [] + self.generation_count = 0 + + self.min_index = 0 + self.init_std = 0 + self.curr_std = 0 + + self.archive_dict.update({'archive_type':'differential_evolution', + 'evolution_strategy':self.evolution_strategy, + 'mutation_scale':self.mutation_scale, + 'cross_over_probability':self.cross_over_probability, + 'population_size':self.population_size, + 'num_population_members':self.num_population_members, + 'restart_tolerance':self.restart_tolerance, + 'first_params':self.first_params, + 'has_trust_region':self.has_trust_region, + 'trust_region':self.trust_region}) + + + def run(self): + ''' + Runs the Differential Evolution Learner. + ''' + try: + + self.generate_population() + + while not self.end_event.is_set(): + + self.next_generation() + + if self.curr_std < self.restart_tolerance * self.init_std: + self.generate_population() + + except LearnerInterrupt: + return + + def save_generation(self): + ''' + Save history of generations. + ''' + self.params_generations.append(np.copy(self.population)) + self.costs_generations.append(np.copy(self.population_costs)) + self.generation_count += 1 + + def generate_population(self): + ''' + Sample a new random set of variables + ''' + + self.population = [] + self.population_costs = [] + self.min_index = 0 + + if np.all(np.isfinite(self.first_params)) and self.first_sample: + curr_params = self.first_params + self.first_sample = False + else: + curr_params = self.min_boundary + nr.rand(self.num_params) * self.diff_boundary + + curr_cost = self.put_params_and_get_cost(curr_params) + + self.population.append(curr_params) + self.population_costs.append(curr_cost) + + for index in range(1, self.num_population_members): + + if self.has_trust_region: + temp_min = np.maximum(self.min_boundary,self.population[self.min_index] - self.trust_region) + temp_max = np.minimum(self.max_boundary,self.population[self.min_index] + self.trust_region) + curr_params = temp_min + nr.rand(self.num_params) * (temp_max - temp_min) + else: + curr_params = self.min_boundary + nr.rand(self.num_params) * self.diff_boundary + + curr_cost = self.put_params_and_get_cost(curr_params) + + self.population.append(curr_params) + self.population_costs.append(curr_cost) + + if curr_cost < self.population_costs[self.min_index]: + self.min_index = index + + self.population = np.array(self.population) + self.population_costs = np.array(self.population_costs) + + self.init_std = np.std(self.population_costs) + self.curr_std = self.init_std + + self.save_generation() + + def next_generation(self): + ''' + Evolve the population by a single generation + ''' + + self.curr_scale = nr.uniform(self.mutation_scale[0], self.mutation_scale[1]) + + for index in range(self.num_population_members): + + curr_params = self.mutate(index) + + curr_cost = self.put_params_and_get_cost(curr_params) + + if curr_cost < self.population_costs[index]: + self.population[index] = curr_params + self.population_costs[index] = curr_cost + + if curr_cost < self.population_costs[self.min_index]: + self.min_index = index + + self.curr_std = np.std(self.population_costs) + + self.save_generation() + + def mutate(self, index): + ''' + Mutate the parameters at index. + + Args: + index (int): Index of the point to be mutated. + ''' + + fill_point = nr.randint(0, self.num_params) + candidate_params = self.mutation_func(index) + crossovers = nr.rand(self.num_params) < self.cross_over_probability + crossovers[fill_point] = True + mutated_params = np.where(crossovers, candidate_params, self.population[index]) + + if self.has_trust_region: + temp_min = np.maximum(self.min_boundary,self.population[self.min_index] - self.trust_region) + temp_max = np.minimum(self.max_boundary,self.population[self.min_index] + self.trust_region) + rand_params = temp_min + nr.rand(self.num_params) * (temp_max - temp_min) + else: + rand_params = self.min_boundary + nr.rand(self.num_params) * self.diff_boundary + + projected_params = np.where(np.logical_or(mutated_params < self.min_boundary, mutated_params > self.max_boundary), rand_params, mutated_params) + + return projected_params + + def _best1(self, index): + ''' + Use best parameters and two others to generate mutation. + + Args: + index (int): Index of member to mutate. + ''' + r0, r1 = self.random_index_sample(index, 2) + return (self.population[self.min_index] + self.curr_scale *(self.population[r0] - self.population[r1])) + + def _rand1(self, index): + ''' + Use three random parameters to generate mutation. + + Args: + index (int): Index of member to mutate. + ''' + r0, r1, r2 = self.random_index_sample(index, 3) + return (self.population[r0] + self.curr_scale * (self.population[r1] - self.population[r2])) + + def _best2(self, index): + ''' + Use best parameters and four others to generate mutation. + + Args: + index (int): Index of member to mutate. + ''' + r0, r1, r2, r3 = self.random_index_sample(index, 4) + return self.population[self.min_index] + self.curr_scale * (self.population[r0] + self.population[r1] - self.population[r2] - self.population[r3]) + + def _rand2(self, index): + ''' + Use five random parameters to generate mutation. + + Args: + index (int): Index of member to mutate. + ''' + r0, r1, r2, r3, r4 = self.random_index_sample(index, 5) + return self.population[r0] + self.curr_scale * (self.population[r1] + self.population[r2] - self.population[r3] - self.population[r4]) + + def random_index_sample(self, index, num_picks): + ''' + Randomly select a num_picks of indexes, without index. + + Args: + index(int): The index that is not included + num_picks(int): The number of picks. + ''' + rand_indexes = list(range(self.num_population_members)) + rand_indexes.remove(index) + return random.sample(rand_indexes, num_picks) + + def update_archive(self): + ''' + Update the archive. + ''' + self.archive_dict.update({'params_generations':self.params_generations, + 'costs_generations':self.costs_generations, + 'population':self.population, + 'population_costs':self.population_costs, + 'init_std':self.init_std, + 'curr_std':self.curr_std, + 'generation_count':self.generation_count}) + + class GaussianProcessLearner(Learner, mp.Process): ''' @@ -592,11 +866,7 @@ class GaussianProcessLearner(Learner, mp.Process): trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction. default_bad_cost (Optional [float]): If a run is reported as bad and default_bad_cost is provided, the cost for the bad run is set to this default value. If default_bad_cost is None, then the worst cost received is set to all the bad runs. Default None. default_bad_uncertainty (Optional [float]): If a run is reported as bad and default_bad_uncertainty is provided, the uncertainty for the bad run is set to this default value. If default_bad_uncertainty is None, then the uncertainty is set to a tenth of the best to worst cost range. Default None. - gp_archive_filename (Optional [string]): Name for the python pickle archive of the learner. Default GaussianProcessLearnerArchive. - gp_archive_file_type (Optional [string]): File type of the training file_type archive. Can be 'mat' for matlabe file, 'pkl' for python pickle file, 'txt' for text file. minimum_uncertainty (Optional [float]): The minimum uncertainty associated with provided costs. Must be above zero to avoid fitting errors. Default 1e-8. - gp_training_filename (Optional [string]): Filename of a previously archive that will be used to train the gaussian process. Note if this is provided, only the data from the previous experiment, properties of the GP, boundary values, and number of parameters are copied into the new learner. Be sure to also provide the same other configuration details if you want the experiment to continue identically, for example the trust region of the previous experiment is not included. - gp_training_file_type (Optional [string]): File type of the training file_type archive. Can be 'mat' for matlabe file, 'pkl' for python pickle file, 'txt' for text file. predict_global_minima_at_end (Optional [bool]): If True finds the global minima when the learner is ended. Does not if False. Default True. predict_local_minima_at_end (Optional [bool]): If True finds the all minima when the learner is ended. Does not if False. Default False. @@ -631,11 +901,9 @@ def __init__(self, trust_region=None, default_bad_cost = None, default_bad_uncertainty = None, - gp_archive_filename=default_gp_archive_filename, - gp_archive_file_type='pkl', minimum_uncertainty = 1e-8, gp_training_filename =None, - gp_training_file_type ='pkl', + gp_training_file_type ='txt', predict_global_minima_at_end = True, predict_local_minima_at_end = False, **kwargs): @@ -651,15 +919,15 @@ def __init__(self, #Basic optimization settings num_params = int(self.training_dict['num_params']) - min_boundary = np.array(self.training_dict['min_boundary'], dtype=float) - max_boundary = np.array(self.training_dict['max_boundary'], dtype=float) + min_boundary = mlu.safe_cast_to_array(self.training_dict['min_boundary']) + max_boundary = mlu.safe_cast_to_array(self.training_dict['max_boundary']) #Configuration of the learner self.cost_has_noise = bool(self.training_dict['cost_has_noise']) - self.length_scale = np.array(self.training_dict['length_scale']) + self.length_scale = mlu.safe_cast_to_array(self.training_dict['length_scale']) self.length_scale_history = list(self.training_dict['length_scale_history']) self.noise_level = float(self.training_dict['noise_level']) - self.noise_level_history = list(self.training_dict['noise_level_history']) + self.noise_level_history = mlu.safe_cast_to_list(self.training_dict['noise_level_history']) #Counters self.costs_count = int(self.training_dict['costs_count']) @@ -667,46 +935,46 @@ def __init__(self, self.params_count = int(self.training_dict['params_count']) #Data from previous experiment - self.all_params = np.array(self.training_dict['all_params'], dtype=float) - self.all_costs = np.array(self.training_dict['all_costs'], dtype=float) - self.all_uncers = np.array(self.training_dict['all_uncers'], dtype=float) - self.bad_run_indexs = list(self.training_dict['bad_run_indexs']) + self.all_params = np.array(self.training_dict['all_params']) + self.all_costs = mlu.safe_cast_to_array(self.training_dict['all_costs']) + self.all_uncers = mlu.safe_cast_to_array(self.training_dict['all_uncers']) + + self.bad_run_indexs = mlu.safe_cast_to_list(self.training_dict['bad_run_indexs']) #Derived properties self.best_cost = float(self.training_dict['best_cost']) - self.best_params = np.array(self.training_dict['best_params'], dtype=float) + self.best_params = mlu.safe_cast_to_array(self.training_dict['best_params']) self.best_index = int(self.training_dict['best_index']) self.worst_cost = float(self.training_dict['worst_cost']) self.worst_index = int(self.training_dict['worst_index']) self.cost_range = float(self.training_dict['cost_range']) try: - self.predicted_best_parameters = np.array(self.training_dict['predicted_best_parameters']) + self.predicted_best_parameters = mlu.safe_cast_to_array(self.training_dict['predicted_best_parameters']) self.predicted_best_cost = float(self.training_dict['predicted_best_cost']) self.predicted_best_uncertainty = float(self.training_dict['predicted_best_uncertainty']) self.has_global_minima = True except KeyError: self.has_global_minima = False try: - self.local_minima_parameters = list(self.training_dict['local_minima_parameters']) - self.local_minima_costs = list(self.training_dict['local_minima_costs']) - self.local_minima_uncers = list(self.training_dict['local_minima_uncers']) + self.local_minima_parameters = mlu.safe_cast_to_list(self.training_dict['local_minima_parameters']) + self.local_minima_costs = mlu.safe_cast_to_list(self.training_dict['local_minima_costs']) + self.local_minima_uncers = mlu.safe_cast_to_list(self.training_dict['local_minima_uncers']) + self.has_local_minima = True except KeyError: self.has_local_minima = False - - super().__init__(learner_archive_filename=gp_archive_filename, - learner_archive_file_type=gp_archive_file_type, - num_params=num_params, + if 'num_params' in kwargs: + super(GaussianProcessLearner,self).__init__(**kwargs) + else: + super(GaussianProcessLearner,self).__init__(num_params=num_params, min_boundary=min_boundary, max_boundary=max_boundary, **kwargs) else: - super().__init__(learner_archive_filename=gp_archive_filename, - learner_archive_file_type=gp_archive_file_type, - **kwargs) + super(GaussianProcessLearner,self).__init__(**kwargs) #Storage variables, archived self.all_params = np.array([], dtype=float) @@ -734,7 +1002,6 @@ def __init__(self, self.length_scale = np.ones((self.num_params,)) else: self.length_scale = np.array(length_scale, dtype=float) - self.update_hyperparameters = bool(update_hyperparameters) self.noise_level = float(noise_level) self.cost_has_noise = bool(cost_has_noise) @@ -794,9 +1061,9 @@ def __init__(self, if self.default_bad_uncertainty < 0: self.log.error('Default bad uncertainty must be positive.') raise ValueError - if (self.default_bad_cost is None) and (self.default_bad_cost is None): + if (self.default_bad_cost is None) and (self.default_bad_uncertainty is None): self.bad_defaults_set = False - elif (self.default_bad_cost is not None) and (self.default_bad_cost is not None): + elif (self.default_bad_cost is not None) and (self.default_bad_uncertainty is not None): self.bad_defaults_set = True else: self.log.error('Both the default cost and uncertainty must be set for a bad run or they must both be set to None.') @@ -832,7 +1099,11 @@ def __init__(self, 'has_trust_region':self.has_trust_region, 'predict_global_minima_at_end':self.predict_global_minima_at_end, 'predict_local_minima_at_end':self.predict_local_minima_at_end}) - + + #Remove logger so gaussian process can be safely picked for multiprocessing on Windows + self.log = None + + def create_gaussian_process(self): ''' Create the initial Gaussian process. @@ -873,13 +1144,14 @@ def get_params_and_costs(self): new_costs = [] new_uncers = [] new_bads = [] - new_costs_count = 0 update_bads_flag = False while not self.costs_in_queue.empty(): (param, cost, uncer, bad) = self.costs_in_queue.get_nowait() + self.costs_count +=1 + if bad: - new_bads.append(self.data_count) + new_bads.append(self.costs_count-1) if self.bad_defaults_set: cost = self.default_bad_cost uncer = self.default_bad_uncertainty @@ -898,18 +1170,15 @@ def get_params_and_costs(self): self.log.error('Provided uncertainty must be larger or equal to zero:' + repr(uncer)) uncer = max(float(uncer), self.minimum_uncertainty) - new_costs_count += 1 - self.costs_count +=1 - cost_change_flag = False if cost > self.worst_cost: self.worst_cost = cost - self.worst_index = self.costs_count + self.worst_index = self.costs_count-1 cost_change_flag = True if cost < self.best_cost: self.best_cost = cost self.best_params = param - self.best_index = self.costs_count + self.best_index = self.costs_count-1 cost_change_flag = True if cost_change_flag: self.cost_range = self.worst_cost - self.best_cost @@ -919,7 +1188,8 @@ def get_params_and_costs(self): new_params.append(param) new_costs.append(cost) new_uncers.append(uncer) - + + if self.all_params.size==0: self.all_params = np.array(new_params, dtype=float) self.all_costs = np.array(new_costs, dtype=float) @@ -929,13 +1199,15 @@ def get_params_and_costs(self): self.all_costs = np.concatenate((self.all_costs, np.array(new_costs, dtype=float))) self.all_uncers = np.concatenate((self.all_uncers, np.array(new_uncers, dtype=float))) + self.bad_run_indexs.append(new_bads) + if self.all_params.shape != (self.costs_count,self.num_params): self.log('Saved GP params are the wrong size. THIS SHOULD NOT HAPPEN:' + repr(self.all_params)) if self.all_costs.shape != (self.costs_count,): self.log('Saved GP costs are the wrong size. THIS SHOULD NOT HAPPEN:' + repr(self.all_costs)) if self.all_uncers.shape != (self.costs_count,): self.log('Saved GP uncertainties are the wrong size. THIS SHOULD NOT HAPPEN:' + repr(self.all_uncers)) - + if update_bads_flag: self.update_bads() @@ -1002,20 +1274,26 @@ def fit_gaussian_process(self): self.scaled_uncers = self.all_uncers * self.cost_scaler.scale_ self.gaussian_process.alpha_ = self.scaled_uncers self.gaussian_process.fit(self.all_params,self.scaled_costs) - self.fit_count += 1 - self.gaussian_process.kernel = self.gaussian_process.kernel_ - - last_hyperparameters = self.gaussian_process.kernel.get_params() - if self.cost_has_noise: - self.length_scale = last_hyperparameters['k1__length_scale'] - self.length_scale_history.append(self.length_scale) - self.noise_level = last_hyperparameters['k2__noise_level'] - self.noise_level_history.append(self.noise_level) - else: - self.length_scale = last_hyperparameters['length_scale'] - self.length_scale_history.append(self.length_scale) + if self.update_hyperparameters: + + self.fit_count += 1 + self.gaussian_process.kernel = self.gaussian_process.kernel_ + last_hyperparameters = self.gaussian_process.kernel.get_params() + + if self.cost_has_noise: + self.length_scale = last_hyperparameters['k1__length_scale'] + if isinstance(self.length_scale, float): + self.length_scale = np.array([self.length_scale]) + self.length_scale_history.append(self.length_scale) + self.noise_level = last_hyperparameters['k2__noise_level'] + self.noise_level_history.append(self.noise_level) + else: + self.length_scale = last_hyperparameters['length_scale'] + self.length_scale_history.append(self.length_scale) + + def update_bias_function(self): ''' Set the constants for the cost bias function. @@ -1057,12 +1335,16 @@ def run(self): ''' Starts running the Gaussian process learner. When the new parameters event is triggered, reads the cost information provided and updates the Gaussian process with the information. Then searches the Gaussian process for new optimal parameters to test based on the biased cost. Parameters to test next are put on the output parameters queue. ''' + #logging to the main log file from a process (as apposed to a thread) in cpython is currently buggy on windows and/or python 2.7 + #current solution is to only log to the console for warning and above from a process + self.log = mp.log_to_stderr(logging.WARNING) + try: while not self.end_event.is_set(): - self.log.debug('Learner waiting for new params event') + #self.log.debug('Learner waiting for new params event') self.save_archive() self.wait_for_new_params_event() - self.log.debug('Gaussian process learner reading costs') + #self.log.debug('Gaussian process learner reading costs') self.get_params_and_costs() self.fit_gaussian_process() for _ in range(self.generation_num): @@ -1073,6 +1355,9 @@ def run(self): raise LearnerInterrupt() except LearnerInterrupt: pass + if self.predict_global_minima_at_end or self.predict_local_minima_at_end: + self.get_params_and_costs() + self.fit_gaussian_process() end_dict = {} if self.predict_global_minima_at_end: self.find_global_minima() @@ -1177,7 +1462,11 @@ def find_local_minima(self): self.has_local_minima = True self.log.info('Search completed') - - + + + + + + diff --git a/mloop/testing.py b/mloop/testing.py index c6bcb48..f2f2087 100644 --- a/mloop/testing.py +++ b/mloop/testing.py @@ -1,6 +1,8 @@ ''' Module of classes used to test M-LOOP. ''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type import numpy as np import threading @@ -24,16 +26,6 @@ def __init__(self, num_params = 1): self.num_params = num_params self.set_default_landscape() - def add_mp_safe_log(self,log_queue): - ''' - Add a multiprocess safe log based using a queue (which is presumed to be listened to by a QueueListener). - ''' - self.log = logging.getLogger(__name__) - que_handler = logging.handlers.QueueHandler(log_queue) - self.log.addHandler(que_handler) - self.log.propagate = False - - def set_default_landscape(self): ''' Set landscape functions to their defaults @@ -163,22 +155,20 @@ class FakeExperiment(threading.Thread): Keyword Args: test_landscape (Optional TestLandscape): landscape to generate costs from. - out_file_type (Optional [string]): currently supports: 'txt' where the output is a text file with the parameters as a list of numbers, and 'mat' a matlab file with variable parameters with the next_parameters. Default is 'mat'. - in_file_type (Optional [string]): file type to be written either 'mat' for matlab or 'txt' for readible text file. Defaults to 'mat'. - + experiment_file_type (Optional [string]): currently supports: 'txt' where the output is a text file with the parameters as a list of numbers, and 'mat' a matlab file with variable parameters with the next_parameters. Default is 'txt'. + Attributes self.end_event (Event): Used to trigger end of experiment. ''' def __init__(self, test_landscape = None, - out_file_type=mlu.default_in_file_type, - in_file_type=mlu.default_out_file_type, + experiment_file_type=mlu.default_interface_file_type, exp_wait = 0, poll_wait = 1, **kwargs): - super().__init__() + super(FakeExperiment,self).__init__() if test_landscape is None: self.test_landscape = TestLandscape() @@ -188,11 +178,11 @@ def __init__(self, self.log = logging.getLogger(__name__) self.exp_wait = float(exp_wait) self.poll_wait = float(poll_wait) - self.out_file_type = str(out_file_type) - self.in_file_type = str(in_file_type) + self.out_file_type = str(experiment_file_type) + self.in_file_type = str(experiment_file_type) - self.total_out_filename = mlu.default_in_filename + '.' + self.out_file_type - self.total_in_filename = mlu.default_out_filename + '.' + self.in_file_type + self.total_out_filename = mlu.default_interface_in_filename + '.' + self.out_file_type + self.total_in_filename = mlu.default_interface_out_filename + '.' + self.in_file_type self.end_event = threading.Event() self.test_count =0 @@ -213,6 +203,7 @@ def run(self): self.log.debug('Entering FakeExperiment loop') while not self.end_event.is_set(): if os.path.isfile(self.total_in_filename): + time.sleep(mlu.filewrite_wait) #wait for file to be written try: in_dict = mlu.get_dict_from_file(self.total_in_filename, self.in_file_type) except IOError: diff --git a/mloop/utilities.py b/mloop/utilities.py index 3cd4c20..2ec4b26 100644 --- a/mloop/utilities.py +++ b/mloop/utilities.py @@ -1,6 +1,8 @@ ''' Module of common utility methods and attributes used by all the modules. ''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type import scipy.io as si import pickle @@ -11,17 +13,32 @@ import numpy as np import mloop -default_in_filename = 'exp_output' -default_in_file_type = 'mat' -default_out_filename = 'exp_input' -default_out_file_type = 'mat' +python_version = sys.version_info[0] + +#For libraries with different names in pythons 2 and 3 +if python_version < 3: + import Queue #@UnresolvedImport @UnusedImport + empty_exception = Queue.Empty +else: + import queue + empty_exception = queue.Empty + + +default_interface_in_filename = 'exp_output' +default_interface_out_filename = 'exp_input' +default_interface_file_type = 'txt' archive_foldername = './M-LOOP_archives/' log_foldername = './M-LOOP_logs/' default_log_filename = 'M-LOOP_' +filewrite_wait = 0.1 + mloop_path = os.path.dirname(mloop.__file__) +#Set numpy to have no limit on printing to ensure all values are saved +np.set_printoptions(threshold=np.inf) + def config_logger(**kwargs): ''' Wrapper for _config_logger. @@ -40,22 +57,26 @@ def _config_logger(log_filename = default_log_filename, file_log_level (Optional[int]) : Level of log output for file, default is logging.DEBUG = 10 console_log_level (Optional[int]) :Level of log output for console, defalut is logging.INFO = 20 + Returns: + dictionary: Dict with extra keywords not used by the logging configuration. ''' if not os.path.exists(log_foldername): os.makedirs(log_foldername) log = logging.getLogger('mloop') - log.setLevel(min(file_log_level,console_log_level)) - if log_filename is not None: - fh = logging.FileHandler(log_foldername + log_filename + datetime_to_string(datetime.datetime.now()) + '.log') - fh.setLevel(file_log_level) - fh.setFormatter(logging.Formatter('%(asctime)s %(name)-20s %(levelname)-8s %(message)s')) - log.addHandler(fh) - ch = logging.StreamHandler(stream = sys.stdout) - ch.setLevel(console_log_level) - ch.setFormatter(logging.Formatter('%(levelname)-8s %(message)s')) - log.addHandler(ch) - log.debug('MLOOP Logger configured.') + + if len(log.handlers) == 0: + log.setLevel(min(file_log_level,console_log_level)) + if log_filename is not None: + fh = logging.FileHandler(log_foldername + log_filename + datetime_to_string(datetime.datetime.now()) + '.log') + fh.setLevel(file_log_level) + fh.setFormatter(logging.Formatter('%(asctime)s %(name)-20s %(levelname)-8s %(message)s')) + log.addHandler(fh) + ch = logging.StreamHandler(stream = sys.stdout) + ch.setLevel(console_log_level) + ch.setFormatter(logging.Formatter('%(levelname)-8s %(message)s')) + log.addHandler(ch) + log.debug('MLOOP Logger configured.') return kwargs @@ -97,9 +118,10 @@ def txt_file_to_dict(filename): temp = (line.partition('#')[0]).strip('\n').strip() if temp != '': tdict_string += temp+',' - #Setting up words for parsing a dict - array = np.array - inf = float('inf') + #Setting up words for parsing a dict, ignore eclipse warnings + array = np.array #@UnusedVariable + inf = float('inf') #@UnusedVariable + nan = float('nan') #@UnusedVariable tdict = eval('dict('+tdict_string+')') return tdict @@ -152,4 +174,66 @@ def check_file_type_supported(file_type): bool : True if file_type is supported, False otherwise. ''' return file_type == 'mat' or 'txt' or 'pkl' + +def safe_cast_to_array(in_array): + ''' + Attempts to safely cast the input to an array. Takes care of border cases + + Args: + in_array (array or equivalent): The array (or otherwise) to be converted to a list. + + Returns: + array : array that has been squeezed and 0-D cases change to 1-D cases + + ''' + + out_array = np.squeeze(np.array(in_array)) + + if out_array.shape == (): + out_array = np.array([out_array[()]]) + + return out_array + +def safe_cast_to_list(in_array): + ''' + Attempts to safely cast a numpy array to a list, if not a numpy array just casts to list on the object. + + Args: + in_array (array or equivalent): The array (or otherwise) to be converted to a list. + + Returns: + list : List of elements from in_array + + ''' + + if isinstance(in_array, np.ndarray): + t_array = np.squeeze(in_array) + if t_array.shape == (): + out_list = [t_array[()]] + else: + out_list = list(t_array) + else: + out_list = list(in_array) + + return out_list + + +class NullQueueListener(): + ''' + Shell class with start and stop functions that do nothing. Queue listener is not implemented in python 2. Current fix is to simply use the multiprocessing class to pipe straight to the cmd line if running on python 2. This is class is just a placeholder. + ''' + def start(self): + ''' + Does nothing + ''' + pass + + def stop(self): + ''' + Does nothing + ''' + pass + + + \ No newline at end of file diff --git a/mloop/visualizations.py b/mloop/visualizations.py index 931d43b..763b649 100644 --- a/mloop/visualizations.py +++ b/mloop/visualizations.py @@ -1,6 +1,8 @@ ''' Module of classes used to create visualizations of data produced by the experiment and learners. ''' +from __future__ import absolute_import, division, print_function +__metaclass__ = type import mloop.utilities as mlu import mloop.learners as mll @@ -9,12 +11,12 @@ import logging import matplotlib.pyplot as plt import matplotlib as mpl -from mloop.controllers import GaussianProcessController figure_counter = 0 cmap = plt.get_cmap('hsv') run_label = 'Run number' cost_label = 'Cost' +generation_label = 'Generation number' scale_param_label = 'Min (0) to max (1) parameters' param_label = 'Parameter' log_length_scale_label = 'Log of length scale' @@ -36,12 +38,19 @@ def show_all_default_visualizations(controller, show_plots=True): log.debug('Creating controller visualizations.') create_contoller_visualizations(controller.total_archive_filename, file_type=controller.controller_archive_file_type) - if isinstance(controller, GaussianProcessController): + + if isinstance(controller, mlc.DifferentialEvolutionController): + log.debug('Creating differential evolution visualizations.') + create_differential_evolution_learner_visualizations(controller.learner.total_archive_filename, + file_type=controller.learner.learner_archive_file_type) + + if isinstance(controller, mlc.GaussianProcessController): log.debug('Creating gaussian process visualizations.') plot_all_minima_vs_cost_flag = bool(controller.gp_learner.has_local_minima) create_gaussian_process_learner_visualizations(controller.gp_learner.total_archive_filename, file_type=controller.gp_learner.learner_archive_file_type, plot_all_minima_vs_cost=plot_all_minima_vs_cost_flag) + log.info('Showing visualizations, close all to end MLOOP.') if show_plots: plt.show() @@ -129,7 +138,7 @@ def __init__(self, filename, self.num_in_costs = int(controller_dict['num_in_costs']) self.num_out_params = int(controller_dict['num_out_params']) self.out_params = np.array(controller_dict['out_params']) - self.out_type = list(controller_dict['out_type']) + self.out_type = [x.strip() for x in list(controller_dict['out_type'])] self.in_costs = np.squeeze(np.array(controller_dict['in_costs'])) self.in_uncers = np.squeeze(np.array(controller_dict['in_uncers'])) self.in_bads = np.squeeze(list(controller_dict['in_bads'])) @@ -163,7 +172,7 @@ def plot_cost_vs_run(self): plt.scatter(self.in_numbers,self.in_costs,marker='o',c=self.cost_colors,s=5*mpl.rcParams['lines.markersize']) plt.xlabel(run_label) plt.ylabel(cost_label) - plt.title('Controller: Cost against number.') + plt.title('Controller: Cost vs run number.') artists = [] for ut in self.unique_types: artists.append(plt.Line2D((0,1),(0,0), color=_color_from_controller_name(ut), marker='o', linestyle='')) @@ -187,7 +196,7 @@ def plot_parameters_vs_run(self): plt.ylabel(run_label) plt.xlabel(run_label) - plt.title('Controller: Parameters against run number.') + plt.title('Controller: Parameters vs run number.') artists=[] for ind in range(self.num_params): artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind],marker='o',linestyle='')) @@ -217,27 +226,132 @@ def plot_parameters_vs_cost(self): plt.plot(self.out_params[:,ind],self.in_costs,'o',color=self.param_colors[ind]) plt.xlabel(run_label) plt.ylabel(cost_label) - plt.title('Controller: Cost against parameters.') + plt.title('Controller: Cost vs parameters.') artists=[] for ind in range(self.num_params): artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind],marker='o',linestyle='')) plt.legend(artists,[str(x) for x in range(1,self.num_params+1)], loc=legend_loc) +def create_differential_evolution_learner_visualizations(filename, + file_type='pkl', + plot_params_vs_generations=True, + plot_costs_vs_generations=True): + ''' + Runs the plots from a differential evolution learner file. + + Args: + filename (Optional [string]): Filename for the differential evolution archive. Must provide datetime or filename. Default None. + + Keyword Args: + file_type (Optional [string]): File type 'pkl' pickle, 'mat' matlab or 'txt' text. + plot_params_generations (Optional [bool]): If True plot parameters vs generations, else do not. Default True. + plot_costs_generations (Optional [bool]): If True plot costs vs generations, else do not. Default True. + ''' + visualization = DifferentialEvolutionVisualizer(filename, file_type=file_type) + if plot_params_vs_generations: + visualization.plot_params_vs_generations() + if plot_costs_vs_generations: + visualization.plot_costs_vs_generations() + +class DifferentialEvolutionVisualizer(): + ''' + DifferentialEvolutionVisualizer creates figures from a differential evolution archive. + + Args: + filename (String): Filename of the DifferentialEvolutionVisualizer archive. + + Keyword Args: + file_type (String): Can be 'mat' for matlab, 'pkl' for pickle or 'txt' for text. Default 'pkl'. + + ''' + def __init__(self, filename, + file_type ='pkl', + **kwargs): + + self.log = logging.getLogger(__name__) + + self.filename = str(filename) + self.file_type = str(file_type) + if not mlu.check_file_type_supported(self.file_type): + self.log.error('GP training file type not supported' + repr(self.file_type)) + learner_dict = mlu.get_dict_from_file(self.filename, self.file_type) + + if 'archive_type' in learner_dict and not (learner_dict['archive_type'] == 'differential_evolution'): + self.log.error('The archive appears to be the wrong type.' + repr(learner_dict['archive_type'])) + raise ValueError + self.archive_type = learner_dict['archive_type'] + + self.num_generations = int(learner_dict['generation_count']) + self.num_population_members = int(learner_dict['num_population_members']) + self.num_params = int(learner_dict['num_params']) + self.min_boundary = np.squeeze(np.array(learner_dict['min_boundary'])) + self.max_boundary = np.squeeze(np.array(learner_dict['max_boundary'])) + self.params_generations = np.array(learner_dict['params_generations']) + self.costs_generations = np.array(learner_dict['costs_generations']) + + self.finite_flag = True + self.param_scaler = lambda p: (p-self.min_boundary)/(self.max_boundary - self.min_boundary) + self.scaled_params_generations = np.array([[self.param_scaler(self.params_generations[inda,indb,:]) for indb in range(self.num_population_members)] for inda in range(self.num_generations)]) + + self.gen_numbers = np.arange(1,self.num_generations+1) + self.param_colors = _color_list_from_num_of_params(self.num_params) + self.gen_plot = np.array([np.full(self.num_population_members, ind, dtype=int) for ind in self.gen_numbers]).flatten() + + def plot_costs_vs_generations(self): + ''' + Create a plot of the costs versus run number. + ''' + if self.costs_generations.size == 0: + self.log.warning('Unable to plot DE: costs vs generations as the initial generation did not complete.') + return + + global figure_counter, cost_label, generation_label + figure_counter += 1 + plt.figure(figure_counter) + plt.plot(self.gen_plot,self.costs_generations.flatten(),marker='o',linestyle='',color='k') + plt.xlabel(generation_label) + plt.ylabel(cost_label) + plt.title('Differential evolution: Cost vs generation number.') + + def plot_params_vs_generations(self): + ''' + Create a plot of the parameters versus run number. + ''' + if self.params_generations.size == 0: + self.log.warning('Unable to plot DE: params vs generations as the initial generation did not complete.') + return + + global figure_counter, generation_label, scale_param_label, legend_loc + figure_counter += 1 + plt.figure(figure_counter) + + for ind in range(self.num_params): + plt.plot(self.gen_plot,self.params_generations[:,:,ind].flatten(),marker='o',linestyle='',color=self.param_colors[ind]) + plt.ylim((0,1)) + plt.xlabel(generation_label) + plt.ylabel(scale_param_label) + + plt.title('Differential evolution: Params vs generation number.') + artists=[] + for ind in range(self.num_params): + artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind],marker='o',linestyle='')) + plt.legend(artists,[str(x) for x in range(1,self.num_params+1)],loc=legend_loc) + def create_gaussian_process_learner_visualizations(filename, file_type='pkl', plot_cross_sections=True, - plot_all_minima_vs_cost=True, + plot_all_minima_vs_cost=False, plot_hyperparameters_vs_run=True): ''' Runs the plots from a gaussian process learner file. Args: - filename (Optional [string]): Filename for the controller archive. Must provide datetime or filename. Default None. + filename (Optional [string]): Filename for the gaussian process archive. Must provide datetime or filename. Default None. Keyword Args: file_type (Optional [string]): File type 'pkl' pickle, 'mat' matlab or 'txt' text. plot_cross_sections (Optional [bool]): If True plot predict landscape cross sections, else do not. Default True. - plot_all_minima_vs_cost (Optional [bool]): If True plot all minima parameters versus cost number, False does not. If None it will only make the plots if all minima were previously calculated. Default None. + plot_all_minima_vs_cost (Optional [bool]): If True plot all minima parameters versus cost number, False does not. If None it will only make the plots if all minima were previously calculated. Default False. ''' visualization = GaussianProcessVisualizer(filename, file_type=file_type) if plot_cross_sections: @@ -261,21 +375,21 @@ class GaussianProcessVisualizer(mll.GaussianProcessLearner): def __init__(self, filename, file_type = 'pkl', **kwargs): - super().__init__(gp_training_filename = filename, - gp_training_file_type = file_type, - update_hyperparameters = False, - **kwargs) + super(GaussianProcessVisualizer, self).__init__(gp_training_filename = filename, + gp_training_file_type = file_type, + update_hyperparameters = False, + **kwargs) self.log = logging.getLogger(__name__) #Trust region self.has_trust_region = bool(np.array(self.training_dict['has_trust_region'])) - self.trust_region = np.array(self.training_dict['trust_region'], dtype=float) + self.trust_region = np.squeeze(np.array(self.training_dict['trust_region'], dtype=float)) self.create_gaussian_process() self.fit_gaussian_process() - self.log_length_scale_history = np.log10(np.array(self.length_scale_history)) + self.log_length_scale_history = np.log10(np.array(self.length_scale_history, dtype=float)) self.noise_level_history = np.array(self.noise_level_history) self.fit_numbers = np.arange(1,self.fit_count+1) @@ -372,8 +486,7 @@ def plot_all_minima_vs_cost(self): ''' Produce figure of the all the local minima versus cost. ''' - if not self.has_all_minima: - self.find_all_minima() + self.find_all_minima() global figure_counter, legend_loc figure_counter += 1 plt.figure(figure_counter) @@ -395,7 +508,7 @@ def plot_all_minima_vs_cost(self): plt.xlabel(scale_param_label) plt.xlim((0,1)) plt.ylabel(cost_label) - plt.title('GP Learner: Cost against parameters.') + plt.title('GP Learner: Cost vs parameters.') artists = [] for ind in range(self.num_params): artists.append(plt.Line2D((0,1),(0,0), color=self.param_colors[ind],marker='o',linestyle='')) @@ -416,7 +529,7 @@ def plot_hyperparameters_vs_run(self): plt.plot(self.fit_numbers,self.log_length_scale_history[:,ind],'o',color=self.param_colors[ind]) plt.xlabel(run_label) plt.ylabel(log_length_scale_label) - plt.title('GP Learner: Log_10 of lengths scales vs run number.') + plt.title('GP Learner: Log of lengths scales vs fit number.') if scale_num!=1: artists=[] for ind in range(self.num_params): @@ -430,5 +543,5 @@ def plot_hyperparameters_vs_run(self): plt.plot(self.fit_numbers,self.noise_level_history,'o',color='k') plt.xlabel(run_label) plt.ylabel(noise_label) - plt.title('GP Learner: Noise level vs run number.') + plt.title('GP Learner: Noise level vs fit number.') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5012a5c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +pip>=7.0 +docutils>=0.3 +numpy>=1.11 +scipy>=0.17 +matplotlib>=1.5 +pytest>=2.9 +scikit-learn>=0.18 +setuptools>=26 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 5aef279..7f9083a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,5 @@ [metadata] description-file = README.rst + +[aliases] +test=pytest diff --git a/setup.py b/setup.py index 7b16cbc..c6b6017 100644 --- a/setup.py +++ b/setup.py @@ -1,42 +1,63 @@ ''' Setup script for M-LOOP using setuptools. See the documentation of setuptools for further details. ''' +from __future__ import absolute_import, division, print_function +import multiprocessing as mp import mloop as ml + from setuptools import setup, find_packages -setup( - name = 'M-LOOP', - version = ml.__version__, - packages = find_packages(), - scripts = ['./bin/M-LOOP'], - - setup_requires=['pytest-runner'], - install_requires = ['docutils>=0.3'], - tests_require=['pytest'], - package_data = { - # If any package contains *.txt or *.rst files, include them: - '': ['*.txt','*.md'], - }, - author = 'Michael R Hush', - author_email = 'MichaelRHush@gmail.com', - description = 'M-LOOP: Machine-learning online optimization package. A python package of automated optimization tools - enhanced with machine-learning - for quantum scientific experiments, computer controlled systems or other optimization tasks.', - license = 'MIT', - keywords = 'automated machine learning optimization optimisation science experiment quantum', - url = 'https://github.com/michaelhush/M-LOOP/', - download_url = 'https://github.com/michaelhush/M-LOOP/tarball/v2.0.0', +def main(): + setup( + name = 'M-LOOP', + version = ml.__version__, + packages = find_packages(), + entry_points={ + 'console_scripts': [ + 'M-LOOP = mloop.cmd:run_mloop' + ], + }, + + setup_requires=['pytest-runner'], + install_requires = ['pip>=7.0', + 'docutils>=0.3', + 'numpy>=1.11', + 'scipy>=0.17', + 'matplotlib>=1.5', + 'pytest>=2.9', + 'scikit-learn>=0.18'], + tests_require=['pytest','setuptools>=26'], + + package_data = { + # If any package contains *.txt or *.rst files, include them: + '': ['*.txt','*.md'], + }, + author = 'Michael R Hush', + author_email = 'MichaelRHush@gmail.com', + description = 'M-LOOP: Machine-learning online optimization package. A python package of automated optimization tools - enhanced with machine-learning - for quantum scientific experiments, computer controlled systems or other optimization tasks.', + license = 'MIT', + keywords = 'automated machine learning optimization optimisation science experiment quantum', + url = 'https://github.com/michaelhush/M-LOOP/', + download_url = 'https://github.com/michaelhush/M-LOOP/tarball/v2.1.1', + + classifiers = ['Development Status :: 2 - Pre-Alpha', + 'Intended Audience :: Science/Research', + 'Intended Audience :: Manufacturing', + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX :: Linux', + 'Operating System :: Microsoft :: Windows', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: Implementation :: CPython', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Scientific/Engineering :: Physics'] + ) - classifiers = ['Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Science/Research', - 'Intended Audience :: Manufacturing', - 'License :: OSI Approved :: MIT License', - 'Natural Language :: English', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: POSIX :: Linux', - 'Operating System :: Microsoft :: Windows', - 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: Implementation :: CPython', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Scientific/Engineering :: Physics'] -) \ No newline at end of file +if __name__=='__main__': + mp.freeze_support() + main() \ No newline at end of file diff --git a/tests/shell_script.py b/tests/shell_script.py new file mode 100644 index 0000000..e18e7c0 --- /dev/null +++ b/tests/shell_script.py @@ -0,0 +1,21 @@ +#! /usr/bin/env python + +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +import sys +import mloop.testing as mlt +import numpy as np + +def main(argv): + + params = np.array([float(v) for v in argv]) + tester = mlt.TestLandscape() + cost_dict = tester.get_cost_dict(params) + + print('M-LOOP_start') + print('cost = '+str(cost_dict['cost'])) + print('M-LOOP_end') + +if __name__ == '__main__': + main(sys.argv[1:]) \ No newline at end of file diff --git a/tests/test_examples.py b/tests/test_examples.py index 4695e2b..959ea78 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,6 +1,7 @@ ''' Unit test for all of the example scripts provided in the examples folder. ''' +from __future__ import absolute_import, division, print_function import os import unittest @@ -9,75 +10,129 @@ import mloop.utilities as mlu import logging import numpy as np -import shutil +import multiprocessing as mp class TestExamples(unittest.TestCase): @classmethod def setUpClass(cls): os.chdir(mlu.mloop_path + '/../tests') - cls.override_dict = {'file_log_level':logging.DEBUG,'console_log_level':logging.WARNING,'visualizations':False} - cls.fake_experiment = mlt.FakeExperiment() - cls.fake_experiment.start() - + cls.override_dict = {'file_log_level':logging.WARNING,'console_log_level':logging.DEBUG,'visualizations':False} + @classmethod def tearDownClass(cls): - cls.fake_experiment.end_event.set() - cls.fake_experiment.join() - shutil.rmtree(mlu.archive_foldername) - shutil.rmtree(mlu.log_foldername) + pass - def test_complete_controller_config(self): - controller = mll.launch_from_file(mlu.mloop_path+'/../examples/complete_controller_config.txt', - num_params=1, + def test_controller_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/controller_config.txt', + interface_type = 'test', + no_delay = False, **self.override_dict) self.asserts_for_cost_and_params(controller) - def test_complete_extras_config(self): - controller = mll.launch_from_file(mlu.mloop_path+'/../examples/complete_extras_config.txt', + def test_extras_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/extras_config.txt', num_params=1, + min_boundary = [-1.0], + max_boundary = [1.0], target_cost = 0.1, + interface_type = 'test', + no_delay = False, **self.override_dict) self.asserts_for_cost_and_params(controller) - def test_complete_logging_config(self): - controller = mll.launch_from_file(mlu.mloop_path+'/../examples/complete_logging_config.txt', + def test_logging_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/logging_config.txt', num_params=1, + min_boundary = [-1.0], + max_boundary = [1.0], target_cost = 0.1, + interface_type = 'test', + no_delay = False, **self.override_dict) self.asserts_for_cost_and_params(controller) - def test_simple_random_config(self): - _ = mll.launch_from_file(mlu.mloop_path+'/../examples/simple_random_config.txt', + def test_random_simple_config(self): + _ = mll.launch_from_file(mlu.mloop_path+'/../examples/random_simple_config.txt', + interface_type = 'test', **self.override_dict) - def test_complete_random_config(self): - _ = mll.launch_from_file(mlu.mloop_path+'/../examples/complete_random_config.txt', + def test_random_complete_config(self): + _ = mll.launch_from_file(mlu.mloop_path+'/../examples/random_complete_config.txt', + interface_type = 'test', **self.override_dict) - def test_simple_nelder_mead_config(self): - controller = mll.launch_from_file(mlu.mloop_path+'/../examples/simple_nelder_mead_config.txt', + def test_nelder_mead_simple_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/nelder_mead_simple_config.txt', + interface_type = 'test', **self.override_dict) self.asserts_for_cost_and_params(controller) - - def test_complete_nelder_mead_config(self): - controller = mll.launch_from_file(mlu.mloop_path+'/../examples/complete_nelder_mead_config.txt', + + def test_nelder_mead_complete_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/nelder_mead_complete_config.txt', + interface_type = 'test', + **self.override_dict) + self.asserts_for_cost_and_params(controller) + + def test_differential_evolution_simple_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/differential_evolution_simple_config.txt', + interface_type = 'test', + **self.override_dict) + self.asserts_for_cost_and_params(controller) + + def test_differential_evolution_complete_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/differential_evolution_complete_config.txt', + interface_type = 'test', + **self.override_dict) + self.asserts_for_cost_and_params(controller) + + def test_gaussian_process_simple_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/gaussian_process_simple_config.txt', + interface_type = 'test', + no_delay = False, + **self.override_dict) + self.asserts_for_cost_and_params(controller) + + def test_gaussian_process_complete_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/gaussian_process_complete_config.txt', + interface_type = 'test', + no_delay = False, **self.override_dict) self.asserts_for_cost_and_params(controller) - def test_simple_gaussian_process_config(self): - controller = mll.launch_from_file(mlu.mloop_path+'/../examples/simple_gaussian_process_config.txt', + def test_tutorial_config(self): + fake_experiment = mlt.FakeExperiment() + fake_experiment.start() + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/tutorial_config.txt', **self.override_dict) self.asserts_for_cost_and_params(controller) + fake_experiment.end_event.set() + fake_experiment.join() - def test_complete_gaussian_process_config(self): - controller = mll.launch_from_file(mlu.mloop_path+'/../examples/complete_gaussian_process_config.txt', + def test_file_interface_config(self): + fake_experiment = mlt.FakeExperiment() + fake_experiment.start() + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/file_interface_config.txt', + num_params=1, + target_cost = 0.1, + **self.override_dict) + self.asserts_for_cost_and_params(controller) + fake_experiment.end_event.set() + fake_experiment.join() + + def test_shell_interface_config(self): + controller = mll.launch_from_file(mlu.mloop_path+'/../examples/shell_interface_config.txt', + num_params=1, + target_cost = 0.1, + no_delay = False, **self.override_dict) self.asserts_for_cost_and_params(controller) def asserts_for_cost_and_params(self,controller): self.assertTrue(controller.best_cost<=controller.target_cost) self.assertTrue(np.sum(np.square(controller.best_params))<=controller.target_cost) - + + if __name__ == "__main__": - unittest.main() \ No newline at end of file + mp.freeze_support() + unittest.main() \ No newline at end of file diff --git a/tests/test_units.py b/tests/test_units.py new file mode 100644 index 0000000..905143a --- /dev/null +++ b/tests/test_units.py @@ -0,0 +1,80 @@ +''' +Unit test for all of the example scripts provided in the examples folder. +''' +from __future__ import absolute_import, division, print_function + +import os +import unittest +import math +import mloop.interfaces as mli +import mloop.controllers as mlc +import numpy as np +import multiprocessing as mp + +class CostListInterface(mli.Interface): + def __init__(self, cost_list): + super(CostListInterface,self).__init__() + self.call_count = 0 + self.cost_list = cost_list + def get_next_cost_dict(self,params_dict): + if np.isfinite(self.cost_list[self.call_count]): + cost_dict = {'cost': self.cost_list[self.call_count]} + else: + cost_dict = {'bad': True} + self.call_count += 1 + return cost_dict + +class TestUnits(unittest.TestCase): + + def test_max_num_runs(self): + cost_list = [5.,4.,3.,2.,1.] + interface = CostListInterface(cost_list) + controller = mlc.create_controller(interface, + max_num_runs = 5, + target_cost = -1, + max_num_runs_without_better_params = 10) + controller.optimize() + self.assertTrue(controller.best_cost == 1.) + self.assertTrue(np.array_equiv(np.array(controller.in_costs), + np.array(cost_list))) + + + def test_max_num_runs_without_better_params(self): + cost_list = [1.,2.,3.,4.,5.] + interface = CostListInterface(cost_list) + controller = mlc.create_controller(interface, + max_num_runs = 10, + target_cost = -1, + max_num_runs_without_better_params = 4) + controller.optimize() + self.assertTrue(controller.best_cost == 1.) + self.assertTrue(np.array_equiv(np.array(controller.in_costs), + np.array(cost_list))) + + def test_target_cost(self): + cost_list = [1.,2.,-1.] + interface = CostListInterface(cost_list) + controller = mlc.create_controller(interface, + max_num_runs = 10, + target_cost = -1, + max_num_runs_without_better_params = 4) + controller.optimize() + self.assertTrue(controller.best_cost == -1.) + self.assertTrue(np.array_equiv(np.array(controller.in_costs), + np.array(cost_list))) + + def test_bad(self): + cost_list = [1., float('nan'),2.,float('nan'),-1.] + interface = CostListInterface(cost_list) + controller = mlc.create_controller(interface, + max_num_runs = 10, + target_cost = -1, + max_num_runs_without_better_params = 4) + controller.optimize() + self.assertTrue(controller.best_cost == -1.) + for x,y in zip(controller.in_costs,cost_list): + self.assertTrue(x==y or (math.isnan(x) and math.isnan(y))) + +if __name__ == "__main__": + mp.freeze_support() + unittest.main() \ No newline at end of file