diff --git a/.travis.yml b/.travis.yml
index cfb85ec..e99f535 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,14 +1,15 @@
language: python
python:
- - "2.7"
- - "3.4"
+ - "2.7.12"
- "3.5"
-install:
- - pip install .
+install:
+ - pip install --upgrade pip
+ - python --version
+ - pip --version
+ - pip install -r requirements.txt
# command to run tests
script: python setup.py test
os:
- linux
- - osx
\ No newline at end of file
diff --git a/docs/api/controllers.rst b/docs/api/controllers.rst
index 9af1001..4679486 100644
--- a/docs/api/controllers.rst
+++ b/docs/api/controllers.rst
@@ -1,7 +1,7 @@
.. _api-controllers:
controllers
------------
+===========
.. automodule:: mloop.controllers
:members:
diff --git a/docs/api/index.rst b/docs/api/index.rst
index 3d2ff16..b8d6915 100644
--- a/docs/api/index.rst
+++ b/docs/api/index.rst
@@ -1,5 +1,6 @@
.. _sec-api:
+==========
M-LOOP API
==========
diff --git a/docs/api/interfaces.rst b/docs/api/interfaces.rst
index 80eb1e9..9d443c8 100644
--- a/docs/api/interfaces.rst
+++ b/docs/api/interfaces.rst
@@ -1,5 +1,5 @@
interfaces
-----------
+==========
.. automodule:: mloop.interfaces
:members:
diff --git a/docs/api/launchers.rst b/docs/api/launchers.rst
index 7d3c105..3e9454c 100644
--- a/docs/api/launchers.rst
+++ b/docs/api/launchers.rst
@@ -1,5 +1,5 @@
launchers
----------
+=========
.. automodule:: mloop.launchers
:members:
diff --git a/docs/api/learners.rst b/docs/api/learners.rst
index 642105a..7385be9 100644
--- a/docs/api/learners.rst
+++ b/docs/api/learners.rst
@@ -1,7 +1,7 @@
.. _api-learners:
learners
----------
+========
.. automodule:: mloop.learners
:members:
diff --git a/docs/api/mloop.rst b/docs/api/mloop.rst
index a0127dd..affcb8f 100644
--- a/docs/api/mloop.rst
+++ b/docs/api/mloop.rst
@@ -1,4 +1,4 @@
mloop
------
+=====
.. automodule:: mloop
diff --git a/docs/api/t_esting.rst b/docs/api/t_esting.rst
index 9bb25ae..1209b5a 100644
--- a/docs/api/t_esting.rst
+++ b/docs/api/t_esting.rst
@@ -1,5 +1,5 @@
testing
--------
+=======
.. automodule:: mloop.testing
:members:
diff --git a/docs/api/utilities.rst b/docs/api/utilities.rst
index 1f22fb5..8e63990 100644
--- a/docs/api/utilities.rst
+++ b/docs/api/utilities.rst
@@ -1,5 +1,5 @@
utilities
----------
+=========
.. automodule:: mloop.utilities
:members:
diff --git a/docs/api/visualizations.rst b/docs/api/visualizations.rst
index f602372..91d7209 100644
--- a/docs/api/visualizations.rst
+++ b/docs/api/visualizations.rst
@@ -1,5 +1,5 @@
visualizations
---------------
+==============
.. automodule:: mloop.visualizations
:members:
diff --git a/docs/conf.py b/docs/conf.py
index fa265ce..389407f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -70,9 +70,9 @@
# built documents.
#
# The short X.Y version.
-version = '2.0'
+version = '2.1'
# The full version, including alpha/beta/rc tags.
-release = '2.0.2'
+release = '2.1.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/docs/contributing.rst b/docs/contributing.rst
index e7f98a2..48709b8 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -1,19 +1,20 @@
.. _sec-contributing:
+============
Contributing
============
If you use M-LOOP please consider contributing to the project. There are many quick and easy ways to help out.
-- If you use M-LOOP be sure to cite paper where it first used: `'Fast machine-learning online optimization of ultra-cold-atom experiments', Sci Rep 6, 25890 (2016) `_.
-- Star and watch the `M-LOOP github `_.
-- Make a suggestion on what features you would like added, or report an issue, on the `github `_ or by `email `_.
-- Contribute your own code to the `M-LOOP github `_, this could be the interface you designed, more options or a completely new solver.
+- If you use M-LOOP be sure to cite the paper where it first used: `'Fast machine-learning online optimization of ultra-cold-atom experiments', Sci Rep 6, 25890 (2016) `_.
+- Star and watch the `M-LOOP GitHub `_.
+- Make a suggestion on what features you would like added, or report an issue, on the `GitHub `_ or by `email `_.
+- Contribute your own code to the `M-LOOP GitHub `_, this could be the interface you designed, more options or a completely new solver.
Finally spread the word! Let others know the success you have had with M-LOOP and recommend they try it too.
Contributors
-------------
+============
M-LOOP is written and maintained by `Michael R Hush `_
diff --git a/docs/data.rst b/docs/data.rst
index 4c328a8..4f78e13 100644
--- a/docs/data.rst
+++ b/docs/data.rst
@@ -1,12 +1,13 @@
.. _sec-data:
+====
Data
====
M-LOOP saves all data produced by the experiment in archives which are saved to disk during and after the optimization run. The archives also contain information derived from the data, including the machine learning model for how the experiment works. Here we explain how to interpret the file archives.
File Formats
-------------
+============
M-LOOP currently supports three file formats for all file input and output.
@@ -15,7 +16,7 @@ M-LOOP currently supports three file formats for all file input and output.
- 'pkl' pickle files: a serialization of a python dictionary made with `pickle `. Your data can be retrieved from this dictionary using the appropriate keywords.
File Keywords
--------------
+=============
The archives contain a set of keywords/variable names with associated data. The quickest way to understand what the values mean for a particular keyword is to :ref:`search` the documentation for a description.
@@ -26,7 +27,7 @@ For the controller archive see :ref:`api-controllers`.
For the learner archive see :ref:`api-learners`. The generic keywords are described in the class Learner, with learner specific options described in the derived classes, for example GaussianProcessLearner.
Converting files
-----------------
+================
If for whatever reason you want to convert files between the formats you can do so using the utilities module of M-LOOP. For example the following python code will convert the file controller_archive_2016-08-18_12-18.pkl from a 'pkl' file to a 'mat' file::
diff --git a/docs/examples.rst b/docs/examples.rst
index 8ec9258..5349d30 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -1,5 +1,6 @@
.. _sec-examples:
+========
Examples
========
@@ -10,24 +11,33 @@ The options available are also comprehensively documented in the :ref:`sec-api`
Each of the example files is used when running tests of M-LOOP. So please copy and modify them elsewhere if you use them as a starting point for your configuration file.
Interfaces
-----------
+==========
-There is currently one interface supported: 'file'. You can specify which interface you want with the option::
+There are currently two interfaces supported: 'file' and 'shell'. You can specify which interface you want with the option::
interface_type = [name]
The default will be 'file'. The specific options for each of the interfaces are described below.
File Interface
-~~~~~~~~~~~~~~
+--------------
-You can change the names of the files used for the file interface and their type. The file interface options are described in *file_interface_config.txt*.
+The file interface exchanges information with the experiment by writing files to disk. You can change the names of the files used for the file interface and their type. The file interface options are described in *file_interface_config.txt*.
.. include:: ../examples/file_interface_config.txt
:literal:
+Shell Interface
+---------------
+
+The shell interface is for experiments that can be run through a command executed in a shell. Information is then piped between M-LOOP and the experiment through the shell. You can change the command to run the experiment and the way the parameters are formatted. The shell interface options are described in *shell_interface_config.txt*
+
+.. include:: ../examples/shell_interface_config.txt
+ :literal:
+
+
Controllers
------------
+===========
There are currently three controller types supported: 'gaussian_process', 'random' and 'nelder_mead'. The default is 'gaussian_process'. You can set which interface you want to use with the option::
@@ -38,8 +48,8 @@ Each of the controllers and their specific options are described below. There is
.. include:: ../examples/controller_config.txt
:literal:
-Gaussian Process
-~~~~~~~~~~~~~~~~
+Gaussian process
+----------------
The Gaussian-process controller is the default controller and is the currently the most sophisticated machine learner algorithm. It uses a `Link Gaussian process `_ to develop a model for how the parameters relate to the measured cost, effectively creating a model for how the experiment operates. This model is then used when picking new points to test.
@@ -52,9 +62,25 @@ There are two example files for the Gaussian-process controller: *gaussian_proce
.. include:: ../examples/gaussian_process_complete_config.txt
:literal:
+
+Differential evolution
+----------------------
+
+The differential evolution (DE) controller uses a `Link DE algorithm `_ for optimization. DE is a type of evolutionary algorithm, and is historically the most commonly used in automated optimization. DE will eventually find a global solution, however it can take many experiments before it does so.
+
+There are two example files for the differential evolution controller: *differential_evolution_simple_config.txt* which contains the basic options.
+
+.. include:: ../examples/differential_evolution_simple_config.txt
+ :literal:
+
+*differential_evolution_complete_config.txt* which contains a comprehensive list of options.
+
+.. include:: ../examples/differential_evolution_complete_config.txt
+ :literal:
+
Nelder Mead
-~~~~~~~~~~~
+-----------
The Nelder Mead controller implements the `Link Nelder-Mead method `_ for optimization. You can control the starting point and size of the initial simplex of the method with the configuration file.
@@ -69,7 +95,7 @@ There are two example files for the Nelder-Mead controller: *nelder_mead_simple_
:literal:
Random
-~~~~~~
+------
The random optimization algorithm picks parameters randomly from a uniform distribution from within the parameter bounds or trust region.
@@ -84,7 +110,7 @@ There are two example files for the random controller: *random_simple_config.txt
:literal:
Logging
--------
+=======
You can control the filename of the logs and also the level which is reported to the file and the console. For more information see `Link logging levels `_. The logging options are described in *logging_config.txt*.
@@ -92,7 +118,7 @@ You can control the filename of the logs and also the level which is reported to
:literal:
Extras
-------
+======
Extras refers to options related to post processing your data once the optimization is complete. Currently the only extra option is for visualizations. The extra options are described in *extras_config.txt*.
diff --git a/docs/index.rst b/docs/index.rst
index 02f1b15..d5b173f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,6 +1,6 @@
-######
+======
M-LOOP
-######
+======
The Machine-Learner Online Optimization Package is designed to automatically and rapidly optimize the parameters of a scientific experiment or computer controller system.
@@ -13,25 +13,24 @@ Using M-LOOP is simple, once the parameters of your experiment is computer contr
M-LOOP not only finds an optimal set of parameters for the experiment it also provides a model of how the parameters are related to the costs which can be used to improve the experiment.
-If you use M-LOOP please cite our publication where we first used the package to optimise the production of a Bose-Einstein Condensate:
+If you use M-LOOP please cite our publication where we first used the package to optimize the production of a Bose-Einstein Condensate:
Fast Machine-Learning Online Optimization of Ultra-Cold-Atom Experiments. *Scientific Reports* **6**, 25890 (2016). DOI: `Link 10.1038/srep25890 `_
http://www.nature.com/articles/srep25890
Quick Start
------------
+===========
-To get the M-LOOP running as soon as possible follow the :ref:`sec-installation` instructions and :ref:`sec-tutorial`.
+To get M-LOOP running follow the :ref:`sec-installation` instructions and :ref:`sec-tutorial`.
Contents
---------
+========
.. toctree::
- :maxdepth: 2
install
- tutorial
+ tutorials
interfaces
data
visualizations
@@ -40,7 +39,7 @@ Contents
api/index
Indices
--------
+=======
* :ref:`genindex`
* :ref:`modindex`
diff --git a/docs/install.rst b/docs/install.rst
index 142a56e..de51e7f 100644
--- a/docs/install.rst
+++ b/docs/install.rst
@@ -1,60 +1,121 @@
.. _sec-installation:
+============
Installation
============
-M-LOOP is available on PyPI and can be installed with your favorite package manager. However, we currently recommend you install from the source code to ensure you have the latest improvements and bug fixes.
+
+M-LOOP is available on PyPI and can be installed with your favorite package manager; simply search for 'M-LOOP' and install. However, if you want the latest features and a local copy of the examples you should install M-LOOP using the source code from the `Link GitHub `_. Detailed installation instruction are provided below.
The installation process involves three steps.
1. Get a Python distribution with the standard scientific packages. We recommend installing :ref:`sec-anaconda`.
-2. Install the development version of :ref:`sec-M-LOOP`.
-3. :ref:`Test` your M-LOOP install.
+2. Install the latest release of :ref:`sec-M-LOOP`.
+3. (Optional) :ref:`Test` your M-LOOP install.
+
+If you are having any trouble with the installation you may need to check your the :ref:`package dependencies` have been correctly installed. If you ares still having trouble, you can `Link submit an issue `_ to the GitHub.
.. _sec-anaconda:
Anaconda
---------
+========
+
We recommend installing Anaconda to get a python environment with all the required scientific packages. The Anaconda distribution is available here:
https://www.continuum.io/downloads
Follow the installation instructions they provide.
-M-LOOP is targeted at python 3.\* but also supports 2.7. Please use python 3.\* if you do not have a reason to use 2.7, see :ref:`sec-py3vpy2` for details.
+M-LOOP is targeted at python 3 but also supports 2. Please use python 3 if you do not have a reason to use 2, see :ref:`sec-py3vpy2` for details.
.. _sec-m-loop:
M-LOOP
-------
-M-LOOP can be installed from the source code with three commands::
+======
+
+You have two options when installing M-LOOP, you can perform a basic installation of the last release with pip or you can install from source to get the latest features. We recommend installing from source so you can test your installation, see all the examples and get the most recent bug fixes.
+
+Installing from source
+----------------------
+
+M-LOOP can be installed from the latest source code with three commands::
git clone git://github.com/michaelhush/M-LOOP.git
cd ./M-LOOP
python setup.py develop
-The first command downloads the latest source code for M-LOOP from github into the current directory, the second moves into the M-LOOP source directory, and the third link builds the package and creates a link from you python package to the source. You may need admin privileges to run the setup script.
+The first command downloads the latest source code for M-LOOP from GitHub into the current directory, the second moves into the M-LOOP source directory, and the third link builds the package and creates a link from you python package to the source. If you are using linux or MacOS you may need admin privileges to run the setup script.
-At any time you can update M-LOOP to the latest version from github by running the command::
+At any time you can update M-LOOP to the latest version from GitHub by running the command::
git pull origin master
in the M-LOOP directory.
+Installing with pip
+-------------------
+
+M-LOOP can be installed with pip with a single command::
+
+ pip install M-LOOP
+
+If you are using linux or MacOS you may need admin privileges to run the command. To update M-LOOP to the latest version use::
+
+ pip install M-LOOP --upgrade
+
+
.. _sec-Testing:
-Test Installation
------------------
+Testing
+=======
-To test your M-LOOP installation use the command::
+If you have installed from source, to test you installation use the command::
python setup.py test
-In the M-LOOP source code directory. The tests should take around five minutes to complete. If you find a error please consider :ref:`sec-contributing` to the project and report a bug on the `github `_.
+In the M-LOOP source code directory. The tests should take around five minutes to complete. If you find a error please consider :ref:`sec-contributing` to the project and report a bug on the `GitHub `_.
+
+If you installed M-LOOP using pip, you will not need to test your installation.
+
+.. _sec-dependencies:
+
+Dependencies
+============
+
+M-LOOP requires the following packages to run correctly.
+
+============ =======
+Package Version
+============ =======
+docutils >=0.3
+matplotlib >=1.5
+numpy >=1.11
+pip >=7.0
+pytest >=2.9
+setuptools >=26
+scikit-learn >=0.18
+scipy >=0.17
+============ =======
+
+These packages should be automatically installed by pip or the script setup.py when you install M-LOOP.
+
+However, if you are using Anaconda some packages that are managed by the conda command may not be correctly updated, even if your installation passes all the tests. In this case, you will have to update these packages manually. You can check what packages you have installed and their version with the command::
+
+ conda list
+
+To install a package that is missing, say for example pytest, use the command::
+
+ conda install pytest
+
+To update a package to the latest version, say for example scikit-learn, use the command::
+
+ conda update scikit-learn
+
+Once you install and update all the required packages with conda M-LOOP should run correctly.
Documentation
--------------
+=============
-If you would also like a local copy of the documentation enter the docs folder and use the command::
+The latest documentation will always be available here online. If you would also like a local copy of the documentation, and you have downloaded the source code, enter the docs folder and use the command::
make html
@@ -63,8 +124,8 @@ Which will generate the documentation in docs/_build/html.
.. _sec-py3vpy2:
Python 3 vs 2
--------------
+=============
-M-LOOP is developed in python 3.\* and it gets the best performance in this environment. This is primarily because other packages that M-LOOP uses, like numpy, run fastest in python 3. The tests typically take about 20% longer to complete in python 2 than 3.
+M-LOOP is developed in python 3 and it gets the best performance in this environment. This is primarily because other packages that M-LOOP uses, like numpy, run fastest in python 3. The tests typically take about 20% longer to complete in python 2 than 3.
-If you have a specific reason to stay in a python 2.7 environment, you may use other packages which are not python 3 compatible, then you can still use M-LOOP without upgrading to 3.\*. However, if you do not have a specific reason to stay with python 2, it is highly recommended you use the latest python 3.\* package.
+If you have a specific reason to stay in a python 2 environment (you may use other packages which are not python 3 compatible) then you can still use M-LOOP without upgrading to 3. However, if you do not have a specific reason to stay with python 2, it is highly recommended you use the latest python 3 package.
diff --git a/docs/interfaces.rst b/docs/interfaces.rst
index cd31aeb..926fb9d 100644
--- a/docs/interfaces.rst
+++ b/docs/interfaces.rst
@@ -1,18 +1,25 @@
.. _sec-interfaces:
+==========
Interfaces
==========
-Currently M-LOOP only support the File interface, which is also described in :ref:`sec-tutorial`. There will be more added very soon. If you have any suggestions for interfaces please consider :ref:`sec-contributing` to the project.
+Currently M-LOOP supports three ways to interface your experiment
+
+1. File interface where parameters and costs are exchanged between the experiment and M-LOOP through files written to disk. This approach is described in a :ref:`tutorial `.
+2. Shell interface where parameters and costs are exchanged between the experiment and M-LOOP through information piped through a shell (or command line). This option should be considered if you can execute your experiment using a command from a shell.
+3. Implementing your own interface through the M-LOOP python API.
-File Interface
---------------
+Each of these options is described below. If you have any suggestions for interfaces please consider :ref:`sec-contributing` to the project.
+
+File interface
+==============
The simplest method to connect your experiment to M-LOOP is with the file interface where data is exchanged by writing files to disk. To use this interface you can include the option::
interface='file'
-in you configuration file. The file interface happens to be the default, so this is not necessary.
+in your configuration file. The file interface happens to be the default, so this is not necessary.
The file interface works under the assumption that you experiment follows the following algorithm.
@@ -32,7 +39,7 @@ When writing the file *exp_output.txt* there are three keywords and values you c
cost refers to the cost calculated from the experimental data. uncer, is optional, and refers to the uncertainty in the cost measurement made. Note, M-LOOP by default assumes there is some noise corrupting costs, which is fitted and compensated for. Hence, if there is some noise in your costs which you are unable to predict from a single measurement, do not worry, you do not have to estimate uncer, you can just leave it out. Lastly bad can be used to indicate an experiment failed and was not able to produce a cost. If the experiment worked set bad = false and if it failed set bad = true.
-Note you do not have to include all of the keywords, you must provide at least a cost or the bad keyword set to false. For example a succesful run can simply be::
+Note you do not have to include all of the keywords, you must provide at least a cost or the bad keyword set to false. For example a successful run can simply be::
cost = 0.3
@@ -41,3 +48,48 @@ and failed experiment can be as simple as::
bad = True
Once the *exp_output.txt* has been written to disk, M-LOOP will read it and delete it.
+
+Shell interface
+===============
+
+The shell interface is used when experiments can be run from a command in a shell. M-LOOP will still need to be configured and executed in the same manner described for a file interface as describe in :ref:`tutorial `. The only difference is how M-LOOP starts the experiment and reads data. To use this interface you must include the following options::
+
+ interface_type='shell'
+ command='./run_exp'
+ params_args_type='direct'
+
+in the configuration file. The interface keyword simply indicates that you want M-LOOP to operate the experiment through the shell. The other two keywords need to be customized to your needs.
+
+The command keyword should be provided with the command on the shell that runs the experiment. In the example above the executable would be *run_exp*. Note M-LOOP will try and execute the command in the folder that you run M-LOOP from, if this causes trouble you should just the absolute address of your executable. Your command can be more complicated than a single work, for example if you wanted to include some options like './run_exp --verbose -U' this would also be acceptable.
+
+The params_args_type keyword controls how M-LOOP delivers the parameters to the executable. If you use the 'direct' option the parameters will just be fed directly to the experiment as arguments. For example if the command was ./run_exp and the parameters to test next were 1.3, -23 and 12, M-LOOP would execute the following command::
+
+ ./run_exp 1.3 -23 12
+
+the other params_args_type option is 'named' in this case each parameter is fed to the experiment as a named option. Given the same parameters as before, M-LOOP would execute the command::
+
+ ./run_exp --param1 1.3 --param2 -23 --param3 12
+
+After the experiment has run and a cost (and uncertainty or bad value) has been found they must be provided back to M-LOOP through the shell. For example if you experiment completed with a cost 1.3, uncertainty 0.1 you need to program your executable to print the following to the shell::
+
+ M-LOOP_start
+ cost = 1.3
+ uncer = 0.1
+ M-LOOP_end
+
+You can also output other information to the shell and split up the information you provide to M-LOOP if you wish. The following output would also valid.
+
+ Running experiment... Experiment complete.
+ Checking it was valid... It worked.
+ M-LOOP_start
+ bad = False
+ M-LOOP_end
+ Calculating cost... Was 3.2.
+ M-LOOP_start
+ cost = 3.2
+ M-LOOP_end
+
+Python interfaces
+=================
+
+If your experiment is controlled in python you can use M-LOOP as an API in your own custom python script. In this case you must create your own implementation of the abstract interface class to control the experiment. This is explained in detail in the :ref:`tutorial for python controlled experiments `.
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
deleted file mode 100644
index 8df958c..0000000
--- a/docs/tutorial.rst
+++ /dev/null
@@ -1,205 +0,0 @@
-.. _sec-tutorial:
-
-Tutorial
-========
-
-Here we provide a tutorial on how to use M-LOOP to optimize a generic experiment. M-LOOP is flexible and can be customized with a variety of :ref:`options ` and :ref:`sec-interfaces`, it can be run from the command line or used as a :ref:`python API `. Here we introduce the basic settings to get M-LOOP up and running as quick as possible.
-
-Overview
---------
-
-The basic operation of M-LOOP is sketched below.
-
-.. _fig-mloop-diag:
-
-.. figure:: _static/M-LOOP_diagram.png
- :alt: M-LOOP in a loop with an experiment sending parameters and receiving costs.
-
-There are three stages:
-
-1. M-LOOP is started with the command::
-
- M-LOOP
-
- M-LOOP first looks for the configuration file *exp_input.txt*, which contains options like the number of parameters and their limits, in the folder it is executed, then starts the optimization process.
-
-2. M-LOOP controls and optimizes the experiment by exchanging files written to disk. M-LOOP produces a file called *exp_input.txt* which contains a variable params with the next parameters to be run by the experiment. The experiment is expected to run an experiment with these parameters and measure the resultant cost. The experiment should then write the file *exp_output.txt* which contains at least the variable cost which quantifies the performance of that experimental run, and optionally, the variables uncer (for uncertainty) and bad (if the run failed). This process is repeated many times until the halting condition is met.
-
-3. Once the optimization process is complete, M-LOOP prints to the console the parameters and cost of the best run performed during the experiment, and a prediction of what the optimal parameters (with the corresponding predicted cost and uncertainty). M-LOOP also produces a set of plots that allow the user to visualize the optimization process and cost landscape. During operation and at the end M-LOOP write three files to disk:
-
- - *M-LOOP_[datetime].log* a log of the console output and other debugging information during the run.
- - *controller_archive_[datetime].txt* an archive of all the experimental data recorded and the results.
- - *learner_archive_[datetime].txt* an archive of the model created by the machine learner of the experiment.
-
-In what follows we will unpack this process and give details on how to configure and run M-LOOP.
-
-Launching M-LOOP
-----------------
-
-Launching M-LOOP is performed by executing the command M-LOOP on the console. You can also provide the name of your configuration file if you do not want to use the default with the command::
-
- M-LOOP -c [config_filename]
-
-Configuration File
-------------------
-
-The configuration file contains a list of options and settings for the optimization run. Each option must be started on a new line and formatted as::
-
- [keyword] = [value]
-
-You can add comments to your file using #, everything past # will be ignored. Examples of relevant keywords and syntax for the values is provided in :ref:`sec-examples` and a comprehensive list of options is described in :ref:`sec-examples`. The values should be formatted with python syntax, strings should be surrounded with single or double quotes and arrays of values can be surrounded with square brackets/parentheses with numbers separated with commas. In this tutorial we will examine the example file *tutoral_config.txt*::
-
- #Tutorial Config
- #---------------
-
- #Parameter settings
- num_params = 2 #number of parameters
- min_boundary = [-1,-1] #minimum boundary
- max_boundary = [1,1] #maximum boundary
-
- #Halting conditions
- max_num_runs = 1000 #maximum number of runs
- max_num_runs_without_better_params = 50 #maximum number of runs without finding better parameters
- target_cost = 0.01 #optimization halts when a cost below this target is found
-
- #Learner specific options
- first_params = [0.5,0.5] #first parameters to try
- trust_region = 0.4 #maximum % move distance from best params
-
- #File format options
- interface_file_type = 'txt' #file types of *exp_input.mat* and *exp_output.mat*
- controller_archive_file_type = 'mat' #file type of the controller archive
- learner_archive_file_type = 'pkl' #file type of the learner archive
-
- #Visualizations
- visualizations = True
-
-We will now explain the options in each of their groups. In almost all cases you will only need to the parameters settings and halting conditions, but we have also describe a few of the most commonly used extra options.
-
-Parameter settings
-~~~~~~~~~~~~~~~~~~
-
-The number of parameters and their limits is defined with three keywords::
-
- num_params = 2
- min_boundary = [-1,-1]
- max_boundary = [1,1]
-
-num_params defines the number of parameters, min_boundary defines the minimum value each of the parameters can take and max_boundary defines the maximum value each parameter can take. Here there are two value which each must be between -1 and 1.
-
-Halting conditions
-~~~~~~~~~~~~~~~~~~
-
-The halting conditions define when the simulation will stop. We present three options here::
-
- max_num_runs = 100
- max_num_runs_without_better_params = 10
- target_cost = 0.1
-
-max_num_runs is the maximum number of runs that the optimization algorithm is allowed to run. max_num_runs_without_better_params is the maximum number of runs allowed before a lower cost and better parameters is found. Finally, when target_cost is set, if a run produces a cost that is less than this value the optimization process will stop.
-
-When multiple halting conditions are set, the optimization process will halt when any one of them is met.
-
-If you do not have any prior knowledge of the problem use only the keyword max_num_runs and set it to the highest value you can wait for. If you have some knowledge about what the minimum attainable cost is or there is some cost threshold you need to achieve, you might want to set the target_cost. max_num_runs_without_better_params is useful if you want to let the optimization algorithm run as long as it needs until there is a good chance the global optimum has been found.
-
-If you do not want one of the halting conditions, simply delete it from your file. For example if you just wanted the algorithm to search as long as it can until it found a global minimum you could set::
-
- max_num_runs_without_better_params = 10
-
-
-Learner specific options
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-There are many learner specific options (and different learner algorithms) described in :ref:`sec-examples`. Here we consider just a couple of the most commonly used ones. M-LOOP has been designed to find an optimum quickly with no custom configuration as long as the experiment is able to provide a cost for every parameter it provides.
-
-However if your experiment will fail to work if there are sudden and significant changes to your parameters you may need to set the following options::
-
- first_parameters = [0.5,0.5]
- trust_region = 0.4
-
-first_parameters defines the first parameters the learner will try. trust_region defines the maximum change allowed in the parameters from the best parameters found so far. In the current example the region size is 2 by 2, with a trust region of 40% thus the maximum allowed change for the second run will be [0 +/- 0.8, 0 +/- 0.8].
-
-If you experiment reliably produces costs for any parameter set you will not need these settings and you can just delete them.
-
-File format options
-~~~~~~~~~~~~~~~~~~~
-
-You can set the file formats for the archives produced at the end and the files exchanged with the experiment with the options::
-
- interface_file_type = 'txt'
- controller_archive_file_type = 'mat'
- learner_archive_file_type = 'pkl'
-
-interface_file_type controls the file format for the files exchanged with the experiment. controller_archive_file_type and learner_archive_file_type control the format of the respective archives.
-
-There are three file formats currently available: 'mat' is for MATLAB readable files, 'pkl' if for python binary archives created using the `pickle package `_, and 'txt' human readable text files. For more details on these formats see :ref:`sec-data`.
-
-Visualization
-~~~~~~~~~~~~~
-
-By default M-LOOP will display a set of plots that allow the user to visualize the optimization process and the cost landscape. To change this behavior use the option::
-
- visualizations = True
-
-Set it to false to turn the visualizations off. For more details see :ref:`sec-visualizations`.
-
-File Interface
---------------
-
-There are many options of how to connect M-LOOP to your experiment. We consider the most generic method, writing and reading files to disk. For other options see :ref:`sec-interfaces`. If you design a bespoke interface for your experiment please consider :ref:`sec-contributing` to the project by sharing your method with other users.
-
-The file interface works under the assumption that you experiment follows the following algorithm.
-
-1. Wait for the file *exp_input.txt* to be made on the disk in the same folder M-LOOP is run.
-2. Read the parameters for the next experiment from the file (named params).
-3. Delete the file *exp_input.txt*.
-4. Run the experiment with the parameters provided and calculate a cost, and optionally the uncertainty.
-5. Write the cost to the file *exp_output.txt*. Go back to step 1.
-
-It is important you delete the file *exp_input.txt* after reading it, since it is used to as an indicator for the next experiment to run.
-
-When writing the file *exp_output.txt* there are three keywords and values you can include in your file, for example after the first run your experiment may produce the following::
-
- cost = 0.5
- uncer = 0.01
- bad = false
-
-cost refers to the cost calculated from the experimental data. uncer, is optional, and refers to the uncertainty in the cost measurement made. Note, M-LOOP by default assumes there is some noise corrupting costs, which is fitted and compensated for. Hence, if there is some noise in your costs which you are unable to predict from a single measurement, do not worry, you do not have to estimate uncer, you can just leave it out. Lastly bad can be used to indicate an experiment failed and was not able to produce a cost. If the experiment worked set bad = false and if it failed set bad = true.
-
-Note you do not have to include all of the keywords, you must provide at least a cost or the bad keyword set to false. For example a successful run can simply be::
-
- cost = 0.3
-
-and failed experiment can be as simple as::
-
- bad = True
-
-Once the *exp_output.txt* has been written to disk, M-LOOP will read it and delete it.
-
-Parameters and cost function
-----------------------------
-
-Choosing the right parameterization of your experiment and cost function will be an important part of getting great results.
-
-If you have time dependent functions in your experiment you will need to choose a parametrization of these function before interfacing them with M-LOOP. M-LOOP will take more time and experiments to find an optimum, given more parameters. But if you provide too few parameters, you may not be able to achieve your cost target.
-
-Fortunately, the visualizations provided after the optimization will help you determine which parameters contributed the most to the optimization process. Try with whatever parameterization is convenient to start and use the data produced afterwards to guide you on how to better improve the parametrization of your experiment.
-
-Picking the right cost function from experimental observables will also be important. M-LOOP will always find a global optimal as quick as it can, but if you have a poorly chosen cost function, the global optimal may not what you really wanted to optimize. Make sure you pick a cost function that will uniquely produce the result you want. Again, do not be afraid to experiment and use the data produced by the optimization runs to improve the cost function you are using.
-
-Have a look at our `paper `_ on using M-LOOP to create a Bose-Einstein Condensate for an example of choosing a parametrization and cost function for an experiment.
-
-Results
--------
-
-Once M-LOOP has completed the optimization, it will output results in several ways.
-
-M-LOOP will print results to the console. It will give the parameters of the experimental run that produced the lowest cost. It will also provide a set of parameters which are predicted to be produce the lowest average cost. If there is no noise in the costs your experiment produced, then the best parameters and predicted best parameters will be the same. If there was some noise your costs then it is possible that there will be a difference between the two. This is because the noise might have resulted with a set of experimental parameters that produced a lower cost due to a random fluke. The real optimal parameters that correspond to the minimum average cost are the predicted best parameters. In general, use the predicted best parameters (when provided) as the final result of the experiment.
-
-M-LOOP will produce an archive for the controller and machine learner. The controller archive contains all the data gathered during the experimental run and also other configuration details set by the user. By default it will be a 'txt' file which is human readable. If the meaning of a keyword and its associated data in the file is unclear, just :ref:`search` the documentation with the keyword to find a description. The learner archive contains a model of the experiment produced by the machine learner algorithm, which is currently a gaussian process. By default it will also be a 'txt' file. For more detail on these files see :ref:`sec-data`.
-
-M-LOOP, by default, will produce a set of visualizations. These plots show the optimizations process over time and also predictions made by the learner of the cost landscape. For more details on these visualizations and their interpretation see :ref:`sec-visualizations`.
-
-
-
-
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
new file mode 100644
index 0000000..a7d0dd9
--- /dev/null
+++ b/docs/tutorials.rst
@@ -0,0 +1,478 @@
+.. _sec-tutorial:
+
+=========
+Tutorials
+=========
+
+Here we provide some tutorials on how to use M-LOOP. M-LOOP is flexible and can be customized with a variety of :ref:`options ` and :ref:`interfaces `. Here we provide some basic tutorials to get you up and started as quick as possible.
+
+There are two different approaches to using M-LOOP:
+
+1. You can execute M-LOOP from a command line (or shell) and configure it using a text file.
+2. You can use M-LOOP as a :ref:`python API `.
+
+If you have a standard experiment, that is operated by LabVIEW, Simulink or some other method, then you should use option 1 and follow the :ref:`first tutorial `. If your experiment is operated using python, you should consider using option 2 as it will give you more flexibility and control, in which case, look at the :ref:`second tutorial `.
+
+.. _sec-standard-experiment:
+
+Standard experiment
+===================
+
+The basic operation of M-LOOP is sketched below.
+
+.. _fig-mloop-diag:
+
+.. figure:: _static/M-LOOP_diagram.png
+ :alt: M-LOOP in a loop with an experiment sending parameters and receiving costs.
+
+There are three stages:
+
+1. M-LOOP is started with the command::
+
+ M-LOOP
+
+ M-LOOP first looks for the configuration file *exp_config.txt*, which contains options like the number of parameters and their limits, in the folder it is executed, then starts the optimization process.
+
+2. M-LOOP controls and optimizes the experiment by exchanging files written to disk. M-LOOP produces a file called *exp_input.txt* which contains a variable params with the next parameters to be run by the experiment. The experiment is expected to run an experiment with these parameters and measure the resultant cost. The experiment should then write the file *exp_output.txt* which contains at least the variable cost which quantifies the performance of that experimental run, and optionally, the variables uncer (for uncertainty) and bad (if the run failed). This process is repeated many times until the halting condition is met.
+
+3. Once the optimization process is complete, M-LOOP prints to the console the parameters and cost of the best run performed during the experiment, and a prediction of what the optimal parameters (with the corresponding predicted cost and uncertainty). M-LOOP also produces a set of plots that allow the user to visualize the optimization process and cost landscape. During operation and at the end M-LOOP write three files to disk:
+
+ - *M-LOOP_[datetime].log* a log of the console output and other debugging information during the run.
+ - *controller_archive_[datetime].txt* an archive of all the experimental data recorded and the results.
+ - *learner_archive_[datetime].txt* an archive of the model created by the machine learner of the experiment.
+
+In what follows we will unpack this process and give details on how to configure and run M-LOOP.
+
+Launching M-LOOP
+----------------
+
+Launching M-LOOP is performed by executing the command M-LOOP on the console. You can also provide the name of your configuration file if you do not want to use the default with the command::
+
+ M-LOOP -c [config_filename]
+
+.. _sec-configuration-file:
+
+Configuration File
+------------------
+
+The configuration file contains a list of options and settings for the optimization run. Each option must be started on a new line and formatted as::
+
+ [keyword] = [value]
+
+You can add comments to your file using #, everything past # will be ignored. Examples of relevant keywords and syntax for the values is provided in :ref:`sec-examples` and a comprehensive list of options is described in :ref:`sec-examples`. The values should be formatted with python syntax, strings should be surrounded with single or double quotes and arrays of values can be surrounded with square brackets/parentheses with numbers separated with commas. In this tutorial we will examine the example file *tutoral_config.txt*::
+
+ #Tutorial Config
+ #---------------
+
+ #Parameter settings
+ num_params = 2 #number of parameters
+ min_boundary = [-1,-1] #minimum boundary
+ max_boundary = [1,1] #maximum boundary
+ first_params = [0.5,0.5] #first parameters to try
+ trust_region = 0.4 #maximum % move distance from best params
+
+ #Halting conditions
+ max_num_runs = 1000 #maximum number of runs
+ max_num_runs_without_better_params = 50 #maximum number of runs without finding better parameters
+ target_cost = 0.01 #optimization halts when a cost below this target is found
+
+ #Learner options
+ cost_has_noise = True #whether the cost are corrupted by noise or not
+
+ #Timing options
+ no_delay = True #wait for learner to make generate new parameters or use training algorithms
+
+ #File format options
+ interface_file_type = 'txt' #file types of *exp_input.mat* and *exp_output.mat*
+ controller_archive_file_type = 'mat' #file type of the controller archive
+ learner_archive_file_type = 'pkl' #file type of the learner archive
+
+ #Visualizations
+ visualizations = True
+
+We will now explain the options in each of their groups. In almost all cases you will only need to the parameters settings and halting conditions, but we have also described a few of the most commonly used extra options.
+
+Parameter settings
+~~~~~~~~~~~~~~~~~~
+
+The number of parameters and their limits is defined with three keywords::
+
+ num_params = 2
+ min_boundary = [-1,-1]
+ max_boundary = [1,1]
+
+num_params defines the number of parameters, min_boundary defines the minimum value each of the parameters can take and max_boundary defines the maximum value each parameter can take. Here there are two value which each must be between -1 and 1.
+
+first_parameters defines the first parameters the learner will try. You only need to set this if you have a safe set of parameters you want the experiment to start with. Just delete this keyword if any set of parameters in the boundaries will work.
+
+trust_region defines the maximum change allowed in the parameters from the best parameters found so far. In the current example the region size is 2 by 2, with a trust region of 40% thus the maximum allowed change for the second run will be [0 +/- 0.8, 0 +/- 0.8]. This is only needed if your experiment produces bad results when the parameters are changes significantly between runs. Simply delete this keyword if your experiment works with any set of parameters within the boundaries.
+
+Halting conditions
+~~~~~~~~~~~~~~~~~~
+
+The halting conditions define when the simulation will stop. We present three options here::
+
+ max_num_runs = 100
+ max_num_runs_without_better_params = 10
+ target_cost = 0.1
+ first_params = [0.5,0.5]
+ trust_region = 0.4
+
+max_num_runs is the maximum number of runs that the optimization algorithm is allowed to run. max_num_runs_without_better_params is the maximum number of runs allowed before a lower cost and better parameters is found. Finally, when target_cost is set, if a run produces a cost that is less than this value the optimization process will stop.
+
+When multiple halting conditions are set, the optimization process will halt when any one of them is met.
+
+If you do not have any prior knowledge of the problem use only the keyword max_num_runs and set it to the highest value you can wait for. If you have some knowledge about what the minimum attainable cost is or there is some cost threshold you need to achieve, you might want to set the target_cost. max_num_runs_without_better_params is useful if you want to let the optimization algorithm run as long as it needs until there is a good chance the global optimum has been found.
+
+If you do not want one of the halting conditions, simply delete it from your file. For example if you just wanted the algorithm to search as long as it can until it found a global minimum you could set::
+
+ max_num_runs_without_better_params = 10
+
+
+Learner Options
+~~~~~~~~~~~~~~~
+
+There are many learner specific options (and different learner algorithms) described in :ref:`sec-examples`. Here we just present a common one::
+
+ cost_has_noise = True
+
+If the cost you provide has noise in it, meaning your the cost you calculate would fluctuate if you did multiple experiments with the same parameters, then set this flag to True. If the costs your provide have no noise then set this flag to False. M-LOOP will automatically determine if the costs have noise in them or not, so if you are unsure, just delete this keyword and it will use the default value of True.
+
+Timing options
+~~~~~~~~~~~~~~
+
+M-LOOP learns how the experiment works by fitting the parameters and costs using a gaussian process. This learning process can take some time. If M-LOOP is asked for new parameters before it has time to generate a new prediction, it will use the training algorithm to provide a new set of parameters to test. This allows for an experiment to be run while the learner is still thinking. The training algorithm by default is differential evolution, this algorithm is also used to do the first initial set of experiments which are then used to train M-LOOP. If you would prefer M-LOOP waits for the learner to come up with its best prediction before running another experiment you can change this behavior with the option::
+
+ no_delay = True
+
+Set no_delay to true to ensure there is no pauses between experiments and set it to false if you to give M-LOOP to have the time to come up with its most informed choice. Sometimes doing fewer more intelligent experiments will lead to an optimal quicker than many quick unintelligent experiments. You can delete the keyword if you are unsure and it will default to True.
+
+File format options
+~~~~~~~~~~~~~~~~~~~
+
+You can set the file formats for the archives produced at the end and the files exchanged with the experiment with the options::
+
+ interface_file_type = 'txt'
+ controller_archive_file_type = 'mat'
+ learner_archive_file_type = 'pkl'
+
+interface_file_type controls the file format for the files exchanged with the experiment. controller_archive_file_type and learner_archive_file_type control the format of the respective archives.
+
+There are three file formats currently available: 'mat' is for MATLAB readable files, 'pkl' if for python binary archives created using the `pickle package `_, and 'txt' human readable text files. For more details on these formats see :ref:`sec-data`.
+
+Visualization
+~~~~~~~~~~~~~
+
+By default M-LOOP will display a set of plots that allow the user to visualize the optimization process and the cost landscape. To change this behavior use the option::
+
+ visualizations = True
+
+Set it to false to turn the visualizations off. For more details see :ref:`sec-visualizations`.
+
+Interface
+---------
+
+There are many options of how to connect M-LOOP to your experiment. We consider the most generic method, writing and reading files to disk. For other options see :ref:`sec-interfaces`. If you design a bespoke interface for your experiment please consider :ref:`sec-contributing` to the project by sharing your method with other users.
+
+The file interface works under the assumption that you experiment follows the following algorithm.
+
+1. Wait for the file *exp_input.txt* to be made on the disk in the same folder M-LOOP is run.
+2. Read the parameters for the next experiment from the file (named params).
+3. Delete the file *exp_input.txt*.
+4. Run the experiment with the parameters provided and calculate a cost, and optionally the uncertainty.
+5. Write the cost to the file *exp_output.txt*. Go back to step 1.
+
+It is important you delete the file *exp_input.txt* after reading it, since it is used to as an indicator for the next experiment to run.
+
+When writing the file *exp_output.txt* there are three keywords and values you can include in your file, for example after the first run your experiment may produce the following::
+
+ cost = 0.5
+ uncer = 0.01
+ bad = false
+
+cost refers to the cost calculated from the experimental data. uncer, is optional, and refers to the uncertainty in the cost measurement made. Note, M-LOOP by default assumes there is some noise corrupting costs, which is fitted and compensated for. Hence, if there is some noise in your costs which you are unable to predict from a single measurement, do not worry, you do not have to estimate uncer, you can just leave it out. Lastly bad can be used to indicate an experiment failed and was not able to produce a cost. If the experiment worked set bad = false and if it failed set bad = true.
+
+Note you do not have to include all of the keywords, you must provide at least a cost or the bad keyword set to true. For example a successful run can simply be::
+
+ cost = 0.3
+
+and failed experiment can be as simple as::
+
+ bad = True
+
+Once the *exp_output.txt* has been written to disk, M-LOOP will read it and delete it.
+
+Parameters and cost function
+----------------------------
+
+Choosing the right parameterization of your experiment and cost function will be an important part of getting great results.
+
+If you have time dependent functions in your experiment you will need to choose a parametrization of these function before interfacing them with M-LOOP. M-LOOP will take more time and experiments to find an optimum, given more parameters. But if you provide too few parameters, you may not be able to achieve your cost target.
+
+Fortunately, the visualizations provided after the optimization will help you determine which parameters contributed the most to the optimization process. Try with whatever parameterization is convenient to start and use the data produced afterwards to guide you on how to better improve the parametrization of your experiment.
+
+Picking the right cost function from experimental observables will also be important. M-LOOP will always find a global optimal as quick as it can, but if you have a poorly chosen cost function, the global optimal may not what you really wanted to optimize. Make sure you pick a cost function that will uniquely produce the result you want. Again, do not be afraid to experiment and use the data produced by the optimization runs to improve the cost function you are using.
+
+Have a look at our `paper `_ on using M-LOOP to create a Bose-Einstein Condensate for an example of choosing a parametrization and cost function for an experiment.
+
+.. _sec-results:
+
+Results
+-------
+
+Once M-LOOP has completed the optimization, it will output results in several ways.
+
+M-LOOP will print results to the console. It will give the parameters of the experimental run that produced the lowest cost. It will also provide a set of parameters which are predicted to be produce the lowest average cost. If there is no noise in the costs your experiment produced, then the best parameters and predicted best parameters will be the same. If there was some noise your costs then it is possible that there will be a difference between the two. This is because the noise might have resulted with a set of experimental parameters that produced a lower cost due to a random fluke. The real optimal parameters that correspond to the minimum average cost are the predicted best parameters. In general, use the predicted best parameters (when provided) as the final result of the experiment.
+
+M-LOOP will produce an archive for the controller and machine learner. The controller archive contains all the data gathered during the experimental run and also other configuration details set by the user. By default it will be a 'txt' file which is human readable. If the meaning of a keyword and its associated data in the file is unclear, just :ref:`search` the documentation with the keyword to find a description. The learner archive contains a model of the experiment produced by the machine learner algorithm, which is currently a gaussian process. By default it will also be a 'txt' file. For more detail on these files see :ref:`sec-data`.
+
+M-LOOP, by default, will produce a set of visualizations. These plots show the optimizations process over time and also predictions made by the learner of the cost landscape. For more details on these visualizations and their interpretation see :ref:`sec-visualizations`.
+
+.. _sec-python-experiment:
+
+Python controlled experiment
+============================
+
+If you have an experiment that is already under python control you can use M-LOOP as an API. Below we go over the example python script *python_controlled_experiment.py* you should also read over the :ref:`first tutorial ` to get a general idea of how M-LOOP works.
+
+When integrating M-LOOP into your laboratory remember that it will be controlling you experiment, not vice versa. Hence, at the top level of your python script you will execute M-LOOP which will then call on your experiment when needed. Your experiment will not be making calls of M-LOOP.
+
+An example script for a python controlled experiment is given in the examples folder called *python_controlled_experiment.py*, which is copied below::
+
+ #Imports for python 2 compatibility
+ from __future__ import absolute_import, division, print_function
+ __metaclass__ = type
+
+ #Imports for M-LOOP
+ import mloop.interfaces as mli
+ import mloop.controllers as mlc
+ import mloop.visualizations as mlv
+
+ #Other imports
+ import numpy as np
+ import time
+
+ #Declare your custom class that inherits from the Interface class
+ class CustomInterface(mli.Interface):
+
+ #Initialization of the interface, including this method is optional
+ def __init__(self):
+ #You must include the super command to call the parent class, Interface, constructor
+ super(CustomInterface,self).__init__()
+
+ #Attributes of the interface can be added here
+ #If you want to pre-calculate any variables etc. this is the place to do it
+ #In this example we will just define the location of the minimum
+ self.minimum_params = np.array([0,0.1,-0.1])
+
+ #You must include the get_next_cost_dict method in your class
+ #this method is called whenever M-LOOP wants to run an experiment
+ def get_next_cost_dict(self,params_dict):
+
+ #Get parameters from the provided dictionary
+ params = params_dict['params']
+
+ #Here you can include the code to run your experiment given a particular set of parameters
+ #In this example we will just evaluate a sum of sinc functions
+ cost = -np.sum(np.sinc(params - self.minimum_params))
+ #There is no uncertainty in our result
+ uncer = 0
+ #The evaluation will always be a success
+ bad = False
+ #Add a small time delay to mimic a real experiment
+ time.sleep(1)
+
+ #The cost, uncertainty and bad boolean must all be returned as a dictionary
+ #You can include other variables you want to record as well if you want
+ cost_dict = {'cost':cost, 'uncer':uncer, 'bad':bad}
+ return cost_dict
+
+ def main():
+ #M-LOOP can be run with three commands
+
+ #First create your interface
+ interface = CustomInterface()
+ #Next create the controller, provide it with your controller and any options you want to set
+ controller = mlc.create_controller(interface, max_num_runs = 1000, target_cost = -2.99, num_params = 3, min_boundary = [-2,-2,-2], max_boundary = [2,2,2])
+ #To run M-LOOP and find the optimal parameters just use the controller method optimize
+ controller.optimize()
+
+ #The results of the optimization will be saved to files and can also be accessed as attributes of the controller.
+ print('Best parameters found:')
+ print(controller.best_params)
+
+ #You can also run the default sets of visualizations for the controller with one command
+ mlv.show_all_default_visualizations(controller)
+
+
+ #Ensures main is run when this code is run as a script
+ if __name__ == '__main__':
+ main()
+
+Each part of the code is explained in the following sections.
+
+Imports
+-------
+
+The start of the script imports the libraries that are necessary for M-LOOP to work::
+
+ #Imports for python 2 compatibility
+ from __future__ import absolute_import, division, print_function
+ __metaclass__ = type
+
+ #Imports for M-LOOP
+ import mloop.interfaces as mli
+ import mloop.controllers as mlc
+ import mloop.visualizations as mlv
+
+ #Other imports
+ import numpy as np
+ import time
+
+The first group of imports are just for python 2 compatibility. M-LOOP is targeted at python3, but has been designed to be bilingual. These imports ensure backward compatibility.
+
+The second group of imports are the most important modules M-LOOP needs to run. The interfaces and controllers modules are essential, while the visualizations module is only needed if you want to view your data afterwards.
+
+Lastly, you can add any other imports you may need.
+
+Custom Interface
+----------------
+
+M-LOOP takes an object oriented approach to controlling the experiment. This is different than the functional approach taken by other optimization packages, like scipy. When using M-LOOP you must make your own class that inherits from the Interface class in M-LOOP. This class must implement a method called *get_next_cost_dict* that takes a set of parameters, runs your experiment and then returns the appropriate cost and uncertainty.
+
+An example of the simplest implementation of a custom interface is provided below ::
+
+ #Declare your custom class that inherits from the Interface class
+ class SimpleInterface(mli.Interface):
+
+ #the method that runs the experiment given a set of parameters and returns a cost
+ def get_next_cost_dict(self,params_dict):
+
+ #The parameters come in a dictionary and are provided in a numpy array
+ params = params_dict['params']pre-calculate
+
+ #Here you can include the code to run your experiment given a particular set of parameters
+ #For this example we just evaluate a simple function
+ cost = np.sum(params**2)
+ uncer = 0
+ bad = False
+
+ #The cost, uncertainty and bad boolean must all be returned as a dictionary
+ cost_dict = {'cost':cost, 'uncer':uncer, 'bad':bad}
+ return cost_dict
+
+The code above defines a new class that inherits from the Interface class in M-LOOP. Note this code is different to the example above, we will consider this later. It is slightly more complicated than just defining a method, however there is a lot more flexibility when taking this approach. You should put the code you use to run your experiment in the *get_next_cost_dict* method. This method is executed by the interface whenever M-LOOP wants a cost corresponding to a set of parameters.
+
+When you actually run M-LOOP you will need to make an instance of your interface. To make an instance of the class above you would use::
+
+ interface = SimpleInterface()
+
+This interface is then provided to the controller, which is discussed in the next section.
+
+Dictionaries are used for both input and output of the method, to give the user flexibility. For example, if you had a bad run, you do not have to return a cost and uncertainty, you can just return a dictionary with bad set to True::
+
+ cost_dict = {'bad':True}
+ return cost_dict
+
+By taking an object oriented approach, M-LOOP can provide a lot more flexibility when controlling your experiment. For example if you wish to start up your experiment or perform some initial numerical analysis you can add a customized constructor or __init__ method for the class. We consider this in the main example::
+
+ class CustomInterface(mli.Interface):
+
+ #Initialization of the interface, including this method is optional
+ def __init__(self):
+ #You must include the super command to call the parent class, Interface, constructor
+ super(CustomInterface,self).__init__()
+
+ #Attributes of the interface can be added here
+ #If you want to pre-calculate any variables etc. this is the place to do it
+ #In this example we will just define the location of the minimum
+ self.minimum_params = np.array([0,0.1,-0.1])
+
+ #You must include the get_next_cost_dict method in your class
+ #this method is called whenever M-LOOP wants to run an experiment
+ def get_next_cost_dict(self,params_dict):
+
+ #Get parameters from the provided dictionary
+ params = params_dict['params']
+
+ #Here you can include the code to run your experiment given a particular set of parameters
+ #In this example we will just evaluate a sum of sinc functions
+ cost = -np.sum(np.sinc(params - self.minimum_params))
+ #There is no uncertainty in our result
+ uncer = 0
+ #The evaluation will always be a success
+ bad = False
+ #Add a small time delay to mimic a real experiment
+ time.sleep(1)
+
+ #The cost, uncertainty and bad boolean must all be returned as a dictionary
+ #You can include other variables you want to record as well if you want
+ cost_dict = {'cost':cost, 'uncer':uncer, 'bad':bad}
+ return cost_dict
+
+In this code snippet we also implement a constructor. Here we just define a numpy array which defines the minimum_parameter values. We can call this variable whenever we need in the *get_next_cost_dict method*. You can also define your own custom methods in your interface or even inherit from other classes.
+
+Once you have implemented your own Interface running M-LOOP can be done in three lines.
+
+Running M-LOOP
+--------------
+
+Once you have made your interface class running M-LOOP can be as simple as three lines. In the example script M-LOOP is run in the main method::
+
+ def main():
+ #M-LOOP can be run with three commands
+
+ #First create your interface
+ interface = CustomInterface()
+ #Next create the controller, provide it with your controller and any options you want to set
+ controller = mlc.create_controller(interface, max_num_runs = 1000, target_cost = -2.99, num_params = 3, min_boundary = [-2,-2,-2], max_boundary = [2,2,2])
+ #To run M-LOOP and find the optimal parameters just use the controller method optimize
+ controller.optimize()
+
+In the code snippet we first make an instance of our custom interface class called interface. We then create an instance of a controller. The controller will run the experiment and perform the optimization. You must provide the controller with the interface and any of the M-LOOP options you would normally provide in the configuration file. In this case we give five options, which do the following:
+
+1. *max_num_runs = 1000* sets the maximum number of runs to be 1000.
+2. *target_cost = -2.99* sets a cost that M-LOOP will halt at once it has been reached.
+3. *num_params = 3* sets the number of parameters to be 3.
+4. *min_boundary = [-2,-2,-2]* defines the minimum values of each of the parameters.
+5. *max_boundary = [2,2,2]* defines the maximum values of each of the parameters.
+
+There are many other options you can use. Have a look at :ref:`sec-configuration-file` for a detailed introduction into all the important configuration options. Remember you can include any option you would include in a configuration file as keywords for the controller. For more options you should look at all the config files in :ref:`sec-examples`, or for a comprehensive list look at the :ref:`sec-api`.
+
+Once you have created your interface and controller you can run M-LOOP by calling the optimize method of the controller. So in summary M-LOOP is executed in three lines::
+
+ interface = CustomInterface()
+ controller = mlc.create_controller(interface, [options])
+ controller.optimize()
+
+Results
+-------
+
+The results will be displayed on the console and also saved in a set of files. Have a read over :ref:`sec-results` for more details on the results displayed and saved. Also read :ref:`sec-data` for more details on data formats and how it is stored.
+
+Within the python environment you can also access the results as attributes of the controller after it has finished optimization. The example includes a simple demonstration of this::
+
+ #The results of the optimization will be saved to files and can also be accessed as attributes of the controller.
+ print('Best parameters found:')
+ print(controller.best_params)
+
+All of the results saved in the controller archive can be directly accessed as attributes of the controller object. For a comprehensive list of the attributes of the controller generated after an optimization run see the :ref:`sec-api`.
+
+Visualizations
+--------------
+
+For each controller there is normally a default set of visualizations available. The visualizations for the Gaussian Process, the default optimization algorithm, is described in :ref:`sec-visualizations`. Visualizations can be called through the visualization module. The example includes a simple demonstration of this::
+
+ #You can also run the default sets of visualizations for the controller with one command
+ mlv.show_all_default_visualizations(controller)
+
+This code snippet will display all the visualizations available for that controller. There are many other visualization methods and options available that let you control which plots are displayed and when, see the :ref:`sec-api` for details.
+
+
+
+
+
+
+
+
+
diff --git a/docs/visualizations.rst b/docs/visualizations.rst
index fd2c1ce..6355123 100644
--- a/docs/visualizations.rst
+++ b/docs/visualizations.rst
@@ -1,5 +1,6 @@
.. _sec-visualizations:
+==============
Visualizations
==============
@@ -27,7 +28,7 @@ The number of visualizations will depend on what controller you use. By default
The plots which start with *Controller:* are generated from the controller archive, while plots that start with *Learner:* are generated from the learner archive.
Reproducing visualizations
---------------------------
+==========================
If you have a controller and learner archive and would like to examine the visualizations again, it is best to do so using the :ref:`sec-api`. For example the following code will plot the visualizations again from the files *controller_archive_2016-08-23_13-59.mat* and *learner_archive_2016-08-18_12-18.pkl*::
diff --git a/examples/differential_evolution_complete_config.txt b/examples/differential_evolution_complete_config.txt
new file mode 100644
index 0000000..88a8547
--- /dev/null
+++ b/examples/differential_evolution_complete_config.txt
@@ -0,0 +1,19 @@
+#Differential Evolution Complete Options
+#---------------------------------------
+
+#General options
+max_num_runs = 500 #number of planned runs
+target_cost = 0.1 #cost to beat
+
+#Differential evolution controller options
+controller_type = 'differential_evolution'
+num_params = 2 #number of parameters
+min_boundary = [-1.2,-2] #minimum boundary
+max_boundary = [10.0,4] #maximum boundary
+trust_region = [3.2,3.1] #maximum move distance from best params
+first_params = None #first parameters to try if None a random set of parameters is chosen
+evolution_strategy='best2' #evolution strategy can be 'best1', 'best2', 'rand1' and 'rand2'. Best uses the best point, rand uses a random one, the number indicates the number of directions added.
+population_size=10 #a multiplier for the population size of a generation
+mutation_scale=(0.4, 1.1) #the minimum and maximum value for the mutation scale factor. Each generation is randomly selected from this. Each value must be between 0 and 2.
+cross_over_probability=0.8 #the probability a parameter will be resampled during a mutation in a new generation
+restart_tolerance=0.02 #the fraction the standard deviation in the costs of the population must reduce from the initial sample, before the search is restarted.
\ No newline at end of file
diff --git a/examples/differential_evolution_simple_config.txt b/examples/differential_evolution_simple_config.txt
new file mode 100644
index 0000000..d4615a0
--- /dev/null
+++ b/examples/differential_evolution_simple_config.txt
@@ -0,0 +1,15 @@
+#Differential Evolution Basic Options
+#------------------------------------
+
+#General options
+max_num_runs = 500 #number of planned runs
+target_cost = 0.1 #cost to beat
+
+#Differential evolution controller options
+controller_type = 'differential_evolution'
+num_params = 1 #number of parameters
+min_boundary = [-4.8] #minimum boundary
+max_boundary = [10.0] #maximum boundary
+trust_region = 0.6 #maximum % move distance from best params
+first_params = [5.3] #first parameters to try
+
diff --git a/examples/python__controlled_experiment.py b/examples/python__controlled_experiment.py
new file mode 100644
index 0000000..96572ec
--- /dev/null
+++ b/examples/python__controlled_experiment.py
@@ -0,0 +1,74 @@
+#Imports for python 2 compatibility
+from __future__ import absolute_import, division, print_function
+__metaclass__ = type
+
+#Imports for M-LOOP
+import mloop.interfaces as mli
+import mloop.controllers as mlc
+import mloop.visualizations as mlv
+
+#Other imports
+import numpy as np
+import time
+
+#Declare your custom class that inherets from the Interface class
+class CustomInterface(mli.Interface):
+
+ #Initialization of the interface, including this method is optional
+ def __init__(self):
+ #You must include the super command to call the parent class, Interface, constructor
+ super(CustomInterface,self).__init__()
+
+ #Attributes of the interface can be added here
+ #If you want to precalculate any variables etc. this is the place to do it
+ #In this example we will just define the location of the minimum
+ self.minimum_params = np.array([0,0.1,-0.1])
+
+ #You must include the get_next_cost_dict method in your class
+ #this method is called whenever M-LOOP wants to run an experiment
+ def get_next_cost_dict(self,params_dict):
+
+ #Get parameters from the provided dictionary
+ params = params_dict['params']
+
+ #Here you can include the code to run your experiment given a particular set of parameters
+ #In this example we will just evaluate a sum of sinc functions
+ cost = -np.sum(np.sinc(params - self.minimum_params))
+ #There is no uncertainty in our result
+ uncer = 0
+ #The evaluation will always be a success
+ bad = False
+ #Add a small time delay to mimic a real experiment
+ time.sleep(1)
+
+ #The cost, uncertainty and bad boolean must all be returned as a dictionary
+ #You can include other variables you want to record as well if you want
+ cost_dict = {'cost':cost, 'uncer':uncer, 'bad':bad}
+ return cost_dict
+
+def main():
+ #M-LOOP can be run with three commands
+
+ #First create your interface
+ interface = CustomInterface()
+ #Next create the controller, provide it with your controller and any options you want to set
+ controller = mlc.create_controller(interface,
+ max_num_runs = 1000,
+ target_cost = -2.99,
+ num_params = 3,
+ min_boundary = [-2,-2,-2],
+ max_boundary = [2,2,2])
+ #To run M-LOOP and find the optimal parameters just use the controller method optimize
+ controller.optimize()
+
+ #The results of the optimization will be saved to files and can also be accessed as attributes of the controller.
+ print('Best parameters found:')
+ print(controller.best_params)
+
+ #You can also run the default sets of visualizations for the controller with one command
+ mlv.show_all_default_visualizations(controller)
+
+
+#Ensures main is run when this code is run as a script
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/examples/shell_interface_config.txt b/examples/shell_interface_config.txt
new file mode 100644
index 0000000..7fa786e
--- /dev/null
+++ b/examples/shell_interface_config.txt
@@ -0,0 +1,6 @@
+#Command Line Interface Options
+#------------------------------
+
+interface_type = 'shell' #The type of interface
+command = 'python shell_script.py' #The command for the command line to run the experiment to get a cost from the parameters
+params_args_type = 'direct' #The format of the parameters when providing them on the command line. 'direct' simply appends them, e.g. python shell_script.py 7 2 1, 'named' names each parameter, e.g. python shell_script.py --param1 7 --param2 2 --param3 1
\ No newline at end of file
diff --git a/examples/tutorial_config.txt b/examples/tutorial_config.txt
index 112b504..cd07d29 100644
--- a/examples/tutorial_config.txt
+++ b/examples/tutorial_config.txt
@@ -1,19 +1,26 @@
#Tutorial Config
#---------------
+#Interface settings
+interface_type = 'file'
+
#Parameter settings
num_params = 2 #number of parameters
min_boundary = [-1,-1] #minimum boundary
max_boundary = [1,1] #maximum boundary
+first_params = [0.5,0.5] #first parameters to try
+trust_region = 0.4 #maximum % move distance from best params
#Halting conditions
max_num_runs = 1000 #maximum number of runs
max_num_runs_without_better_params = 50 #maximum number of runs without finding better parameters
target_cost = 0.01 #optimization halts when a cost below this target is found
-#Learner specific options
-first_params = [0.5,0.5] #first parameters to try
-trust_region = 0.4 #maximum % move distance from best params
+#Learner options
+cost_has_noise = True #whether the cost are corrupted by noise or not
+
+#Timing options
+no_delay = True #wait for learner to make generate new parameters or use training algorithms
#File format options
interface_file_type = 'txt' #file types of *exp_input.mat* and *exp_output.mat*
diff --git a/mloop/__init__.py b/mloop/__init__.py
index c1ec084..9e53155 100644
--- a/mloop/__init__.py
+++ b/mloop/__init__.py
@@ -12,5 +12,5 @@
import os
-__version__= "2.0.3"
-__all__ = ['controllers','interfaces','launchers','learners','testing','utilities','visualizations']
\ No newline at end of file
+__version__= "2.1.1"
+__all__ = ['controllers','interfaces','launchers','learners','testing','utilities','visualizations','cmd']
\ No newline at end of file
diff --git a/bin/M-LOOP b/mloop/cmd.py
similarity index 59%
rename from bin/M-LOOP
rename to mloop/cmd.py
index 3957d38..22ae662 100644
--- a/bin/M-LOOP
+++ b/mloop/cmd.py
@@ -1,18 +1,8 @@
-#! /usr/bin/env python
-
'''
-M-LOOP Launcher
-
-Starts an instance of M-LOOP configured using a configuration file.
-
-Takes the following command line options
-
--c filename for configuration file
-
--h display help
-
-the default name for the configuration is "ExpConfig.txt"
+Module of command line tools that can be used to execute mloop.
'''
+from __future__ import absolute_import, division, print_function
+__metaclass__ = type
import sys
import argparse
@@ -20,7 +10,20 @@
import mloop.launchers as mll
import multiprocessing as mp
-def main(argv):
+def run_mloop():
+ '''
+ M-LOOP Launcher
+
+ Starts an instance of M-LOOP configured using a configuration file.
+
+ Takes the following command line options
+
+ -c filename for configuration file
+
+ -h display help
+
+ the default name for the configuration is "ExpConfig.txt"
+ '''
parser = argparse.ArgumentParser(description='M-LOOP Launcher \n Version:' + ml.__version__+'\n \n Starts a new instance of M-LOOP based a on configuration file.',
formatter_class=argparse.RawDescriptionHelpFormatter)
@@ -33,9 +36,4 @@ def main(argv):
_ = mll.launch_from_file(config_filename)
-if __name__=="__main__":
- mp.freeze_support()
- main(sys.argv[1:])
-
-
-
+
\ No newline at end of file
diff --git a/mloop/controllers.py b/mloop/controllers.py
index 36410c8..d018367 100644
--- a/mloop/controllers.py
+++ b/mloop/controllers.py
@@ -8,12 +8,11 @@
import mloop.utilities as mlu
import mloop.learners as mll
import mloop.interfaces as mli
-import multiprocessing as mp
-import logging.handlers
+import logging
import os
-controller_dict = {'random':1,'nelder_mead':2,'gaussian_process':3}
-number_of_controllers = 3
+controller_dict = {'random':1,'nelder_mead':2,'gaussian_process':3,'differential_evolution':4}
+number_of_controllers = 4
default_controller_archive_filename = 'controller_archive'
default_controller_archive_file_type = 'txt'
@@ -48,6 +47,8 @@ def create_controller(interface,
controller_type = str(controller_type)
if controller_type=='gaussian_process':
controller = GaussianProcessController(interface, **controller_config_dict)
+ elif controller_type=='differential_evolution':
+ controller = DifferentialEvolutionController(interface, **controller_config_dict)
elif controller_type=='nelder_mead':
controller = NelderMeadController(interface, **controller_config_dict)
elif controller_type=='random':
@@ -281,7 +282,7 @@ def _get_cost_and_in_dict(self):
except ValueError:
self.log.error('One of the values you provided in the cost dict could not be converted into the right type.')
raise
- if self.curr_bad and 'cost' in dict:
+ if self.curr_bad and ('cost' in in_dict):
self.log.warning('The cost provided with the bad run will be saved, but not used by the learners.')
self.in_costs.append(self.curr_cost)
@@ -333,6 +334,8 @@ def optimize(self):
self._start_up()
self._optimization_routine()
log.info('Controller finished. Closing down M-LOOP. Please wait a moment...')
+ except ControllerInterrupt:
+ self.log.warning('Controller ended by interruption.')
except (KeyboardInterrupt,SystemExit):
log.warning('!!! Do not give the interrupt signal again !!! \n M-LOOP stopped with keyboard interupt or system exit. Please wait at least 1 minute for the threads to safely shut down. \n ')
log.warning('Closing down controller.')
@@ -391,22 +394,19 @@ def _optimization_routine(self):
Runs controller main loop. Gives parameters to experiment and saves costs returned.
'''
self.log.debug('Start controller loop.')
- try:
+ self.log.info('Run:' + str(self.num_in_costs +1))
+ next_params = self._first_params()
+ self._put_params_and_out_dict(next_params)
+ self.save_archive()
+ self._get_cost_and_in_dict()
+ while self.check_end_conditions():
self.log.info('Run:' + str(self.num_in_costs +1))
- next_params = self._first_params()
+ next_params = self._next_params()
self._put_params_and_out_dict(next_params)
self.save_archive()
self._get_cost_and_in_dict()
- while self.check_end_conditions():
- self.log.info('Run:' + str(self.num_in_costs +1))
- next_params = self._next_params()
- self._put_params_and_out_dict(next_params)
- self.save_archive()
- self._get_cost_and_in_dict()
- self.log.debug('End controller loop.')
- except ControllerInterrupt:
- self.log.warning('Controller ended by interruption.')
-
+ self.log.debug('End controller loop.')
+
def _first_params(self):
'''
Checks queue to get first parameters.
@@ -490,6 +490,37 @@ def _next_params(self):
self.learner_costs_queue.put(cost)
return self.learner_params_queue.get()
+class DifferentialEvolutionController(Controller):
+ '''
+ Controller for the differential evolution learner.
+
+ Args:
+ params_out_queue (queue): Queue for parameters to next be run by experiment.
+ costs_in_queue (queue): Queue for costs (and other details) that have been returned by experiment.
+ **kwargs (Optional [dict]): Dictionary of options to be passed to Controller parent class and differential evolution learner.
+ '''
+ def __init__(self, interface,
+ **kwargs):
+ super(DifferentialEvolutionController,self).__init__(interface, **kwargs)
+
+ self.learner = mll.DifferentialEvolutionLearner(start_datetime = self.start_datetime,
+ **self.remaining_kwargs)
+
+ self._update_controller_with_learner_attributes()
+ self.out_type.append('differential_evolution')
+
+ def _next_params(self):
+ '''
+ Gets next parameters from differential evolution learner.
+ '''
+ if self.curr_bad:
+ cost = float('inf')
+ else:
+ cost = self.curr_cost
+ self.learner_costs_queue.put(cost)
+ return self.learner_params_queue.get()
+
+
class GaussianProcessController(Controller):
@@ -507,7 +538,7 @@ class GaussianProcessController(Controller):
'''
def __init__(self, interface,
- training_type='random',
+ training_type='differential_evolution',
num_training_runs=None,
no_delay=True,
num_params=None,
@@ -551,9 +582,21 @@ def __init__(self, interface,
num_params=num_params,
min_boundary=min_boundary,
max_boundary=max_boundary,
- learner_archive_filename='training_learner_archive',
+ learner_archive_filename=None,
learner_archive_file_type=learner_archive_file_type,
**self.remaining_kwargs)
+
+ elif self.training_type == 'differential_evolution':
+ self.learner = mll.DifferentialEvolutionLearner(start_datetime=self.start_datetime,
+ num_params=num_params,
+ min_boundary=min_boundary,
+ max_boundary=max_boundary,
+ trust_region=trust_region,
+ evolution_strategy='rand2',
+ learner_archive_filename=None,
+ learner_archive_file_type=learner_archive_file_type,
+ **self.remaining_kwargs)
+
else:
self.log.error('Unknown training type provided to Gaussian process controller:' + repr(training_type))
@@ -575,7 +618,7 @@ def __init__(self, interface,
self.new_params_event = self.gp_learner.new_params_event
self.remaining_kwargs = self.gp_learner.remaining_kwargs
self.generation_num = self.gp_learner.generation_num
-
+
def _put_params_and_out_dict(self, params):
'''
Override _put_params_and_out_dict function, used when the training learner creates parameters. Makes the defualt param_type the training type and sets last_training_run_flag.
@@ -602,12 +645,12 @@ def _next_params(self):
'''
Gets next parameters from training learner.
'''
- if self.training_type == 'nelder_mead':
+ if self.training_type == 'differential_evolution' or self.training_type == 'nelder_mead':
#Copied from NelderMeadController
- if self.curr_bad:
+ if self.last_training_bad:
cost = float('inf')
else:
- cost = self.curr_cost
+ cost = self.last_training_cost
self.learner_costs_queue.put(cost)
temp = self.learner_params_queue.get()
@@ -632,27 +675,35 @@ def _optimization_routine(self):
'''
Overrides _optimization_routine. Uses the parent routine for the training runs. Implements a customized _optimization_rountine when running the Gaussian Process learner.
'''
- #Run the training runs using the standard optimization routine. Adjust the number of max_runs
- save_max_num_runs = self.max_num_runs
- self.max_num_runs = self.num_training_runs - 1
+ #Run the training runs using the standard optimization routine.
self.log.debug('Starting training optimization.')
- super(GaussianProcessController,self)._optimization_routine()
-
- #Start last training run
self.log.info('Run:' + str(self.num_in_costs +1))
- next_params = self._next_params()
+ next_params = self._first_params()
self._put_params_and_out_dict(next_params)
-
- #Begin GP optimization routine
- self.max_num_runs = save_max_num_runs
-
- self.log.debug('Starting GP optimization.')
- self.new_params_event.set()
self.save_archive()
self._get_cost_and_in_dict()
+ while (self.num_in_costs < self.num_training_runs) and self.check_end_conditions():
+ self.log.info('Run:' + str(self.num_in_costs +1))
+ next_params = self._next_params()
+ self._put_params_and_out_dict(next_params)
+ self.save_archive()
+ self._get_cost_and_in_dict()
+
+ if self.check_end_conditions():
+ #Start last training run
+ self.log.info('Run:' + str(self.num_in_costs +1))
+ next_params = self._next_params()
+ self._put_params_and_out_dict(next_params)
+
+ self.log.debug('Starting GP optimization.')
+ self.new_params_event.set()
+ self.save_archive()
+ self._get_cost_and_in_dict()
+ self.log.debug('End training runs.')
+
+ gp_consec = 0
+ gp_count = 0
- gp_consec = 0
- gp_count = 0
while self.check_end_conditions():
self.log.info('Run:' + str(self.num_in_costs +1))
if gp_consec==self.generation_num or (self.no_delay and self.gp_learner_params_queue.empty()):
@@ -679,12 +730,7 @@ def _shut_down(self):
self.log.debug('GP learner end set.')
self.end_gp_learner.set()
self.gp_learner.join()
- #self.gp_learner.join(self.gp_learner.learner_wait*3)
- '''
- if self.gp_learner.is_alive():
- self.log.warning('GP Learner did not join in time had to terminate.')
- self.gp_learner.terminate()
- '''
+
self.log.debug('GP learner joined')
last_dict = None
while not self.gp_learner_params_queue.empty():
@@ -706,7 +752,7 @@ def _shut_down(self):
self.archive_dict.update(last_dict)
else:
if self.gp_learner.predict_global_minima_at_end or self.gp_learner.predict_local_minima_at_end:
- self.log.warning('GP Learner may not have closed properly unable to get best and/or all minima.')
+ self.log.info('GP Learner did not provide best and/or all minima.')
super(GaussianProcessController,self)._shut_down()
def print_results(self):
diff --git a/mloop/interfaces.py b/mloop/interfaces.py
index 655a9e9..589fea7 100644
--- a/mloop/interfaces.py
+++ b/mloop/interfaces.py
@@ -5,7 +5,10 @@
__metaclass__ = type
import time
+import subprocess as sp
+import numpy as np
import os
+import sys
import threading
import multiprocessing as mp
import mloop.utilities as mlu
@@ -18,7 +21,7 @@ def create_interface(interface_type='file',
Start a new interface with the options provided.
Args:
- interface_type (Optional [str]): Defines the type of interface, currently the only option is 'file'. Default 'file'.
+ interface_type (Optional [str]): Defines the type of interface, can be 'file', 'shell' or 'test'. Default 'file'.
**interface_config_dict : Options to be passed to interface.
Returns:
@@ -27,13 +30,21 @@ def create_interface(interface_type='file',
log = logging.getLogger(__name__)
if interface_type=='file':
- file_interface = FileInterface(**interface_config_dict)
+ interface = FileInterface(**interface_config_dict)
log.info('Using the file interface with the experiment.')
+ elif interface_type == 'shell':
+ interface = ShellInterface(**interface_config_dict)
+ log.info('Using the command line interface with the experiment.')
+ elif interface_type == 'test':
+ interface = TestInterface(**interface_config_dict)
+ log.info('Using the test interface with the experiment.')
else:
log.error('Unknown interface type:' + repr(interface_type))
raise ValueError
- return file_interface
+
+
+ return interface
class InterfaceInterrupt(Exception):
'''
@@ -57,10 +68,9 @@ class Interface(threading.Thread):
params_out_queue (queue): Queue for parameters to next be run by experiment.
costs_in_queue (queue): Queue for costs (and other details) that have been returned by experiment.
end_event (event): Event which triggers the end of the interface.
-
-
+
'''
-
+
def __init__(self,
interface_wait = 1,
**kwargs):
@@ -93,14 +103,14 @@ def run(self):
except mlu.empty_exception:
continue
else:
- cost_dict = self._get_next_cost_dict(params_dict)
+ cost_dict = self.get_next_cost_dict(params_dict)
self.costs_in_queue.put(cost_dict)
except InterfaceInterrupt:
pass
self.log.debug('Interface ended')
#self.log = None
- def _get_next_cost_dict(self,params_dict):
+ def get_next_cost_dict(self,params_dict):
'''
Abstract method. This is the only method that needs to be implemented to make a working interface. Given the parameters the interface must then produce a new cost. This may occur by running an experiment or program. If you wish to abruptly end this interface for whatever rease please raise the exception InterfaceInterrupt, which will then be safely caught.
@@ -147,7 +157,7 @@ def __init__(self,
self.in_filename = str(interface_in_filename)
self.total_in_filename = self.in_filename + '.' + self.in_file_type
- def _get_next_cost_dict(self,params_dict):
+ def get_next_cost_dict(self,params_dict):
'''
Implementation of file read in and out. Put parameters into a file and wait for a cost file to be returned.
'''
@@ -201,7 +211,7 @@ def __init__(self,
self.test_landscape = test_landscape
self.test_count = 0
- def _get_next_cost_dict(self, params_dict):
+ def get_next_cost_dict(self, params_dict):
'''
Test implementation. Gets the next cost from the test_landscape.
'''
@@ -215,8 +225,96 @@ def _get_next_cost_dict(self, params_dict):
raise
cost_dict = self.test_landscape.get_cost_dict(params)
return cost_dict
+
+
+class ShellInterface(Interface):
+ '''
+ Interface for running programs from the shell.
+
+ Args:
+ params_out_queue (queue): Queue for parameters to next be run by experiment.
+ costs_in_queue (queue): Queue for costs (and other details) that have been returned by experiment.
+
+ Keyword Args:
+ command (Optional [string]): The command used to run the experiment. Default './run_exp'
+ params_args_type (Optional [string]): The style used to pass parameters. Can be 'direct' or 'named'. If 'direct' it is assumed the parameters are fed directly to the program. For example if I wanted to run the parameters [7,5,9] with the command './run_exp' I would use the syntax::
+
+ ./run_exp 7 5 9
+
+ 'named' on the other hand requires an option for each parameter. The options should be name --param1, --param2 etc. The same example as before would be ::
+
+ ./run_exp --param1 7 --param2 5 --param3 9
+
+ Default 'direct'.
+ '''
+
+ def __init__(self,
+ command = './run_exp',
+ params_args_type = 'direct',
+ **kwargs):
+ super(ShellInterface,self).__init__(**kwargs)
+ #User defined variables
+ self.command = str(command)
+ if params_args_type == 'direct' or params_args_type == 'named':
+ self.params_args_type = str(params_args_type)
+ else:
+ self.log.error('params_args_type not recognized: ' + repr(params_args_type))
+
+ #Counters
+ self.command_count = 0
+
+ def get_next_cost_dict(self,params_dict):
+ '''
+ Implementation of running a command with parameters on the command line and reading the result.
+ '''
+ self.command_count += 1
+ self.log.debug('Running command count' + repr(self.command_count))
+ self.last_params_dict = params_dict
+
+ params = params_dict['params']
+
+ curr_command = self.command
+
+ if self.params_args_type == 'direct':
+ for p in params:
+ curr_command += ' ' + str(p)
+ elif self.params_args_type == 'named':
+ for ind,p in enumerate(params):
+ curr_command += ' ' + '--param' + str(ind +1) + ' ' + str(p)
+ else:
+ self.log.error('THIS SHOULD NOT HAPPEN. params_args_type not recognized')
+
+ #execute command and look at output
+ cli_return = sp.check_output(curr_command.split()).decode(sys.stdout.encoding)
+ print(cli_return)
+
+ tdict_string = ''
+ take_flag = False
+ for line in cli_return.splitlines():
+ temp = (line.partition('#')[0]).strip('\n').strip()
+ if temp == 'M-LOOP_start' or temp == 'MLOOP_start':
+ take_flag = True
+ elif temp == 'M-LOOP_end' or temp == 'MLOOP_end':
+ take_flag = False
+ elif take_flag:
+ tdict_string += temp + ','
+
+ print(tdict_string)
+
+ #Setting up words for parsing a dict, ignore eclipse warnings
+ array = np.array #@UnusedVariable
+ inf = float('inf') #@UnusedVariable
+ nan = float('nan') #@UnusedVariable
+ tdict = eval('dict('+tdict_string+')')
+
+ return tdict
+
+
+
+
+
\ No newline at end of file
diff --git a/mloop/launchers.py b/mloop/launchers.py
index 475725c..a41e378 100644
--- a/mloop/launchers.py
+++ b/mloop/launchers.py
@@ -27,11 +27,8 @@ def launch_from_file(config_filename,
except (IOError, OSError):
print('Unable to open M-LOOP configuration file:' + repr(config_filename))
raise
-
file_kwargs.update(kwargs)
#Main run sequence
- #Create controller and extract unused keywords
- file_kwargs = mlu._config_logger(**file_kwargs)
#Create interface and extract unused keywords
interface = mli.create_interface(**file_kwargs)
file_kwargs = interface.remaining_kwargs
diff --git a/mloop/learners.py b/mloop/learners.py
index 08673bb..b4e8b76 100644
--- a/mloop/learners.py
+++ b/mloop/learners.py
@@ -8,15 +8,16 @@
import threading
import numpy as np
+import random
import numpy.random as nr
import scipy.optimize as so
-import logging.handlers
+import logging
import datetime
import os
import mloop.utilities as mlu
-import mloop.localsklearn.gaussian_process as skg
-import mloop.localsklearn.gaussian_process.kernels as skk
-import mloop.localsklearn.preprocessing as skp
+import sklearn.gaussian_process as skg
+import sklearn.gaussian_process.kernels as skk
+import sklearn.preprocessing as skp
import multiprocessing as mp
learner_thread_count = 0
@@ -259,6 +260,7 @@ class RandomLearner(Learner, threading.Thread):
Keyword Args:
min_boundary (Optional [array]): If set to None, overrides default learner values and sets it to a set of value 0. Default None.
max_boundary (Optional [array]): If set to None overides default learner values and sets it to an array of value 1. Default None.
+ first_params (Optional [array]): The first parameters to test. If None will just randomly sample the initial condition.
trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction.
'''
@@ -317,7 +319,6 @@ def run(self):
self._shut_down()
self.log.debug('Ended Random Learner')
-
class NelderMeadLearner(Learner, threading.Thread):
'''
Nelder-Mead learner. Executes the Nelder-Mead learner algorithm and stores the needed simplex to estimate the next points.
@@ -548,14 +549,305 @@ def run(self):
self._shut_down()
self.log.info('Ended Nelder-Mead')
-def update_archive(self):
+ def update_archive(self):
'''
Update the archive.
'''
- self.archive_dict.update({'archive_type':'nelder_mead_learner',
- 'simplex_parameters':self.simplex_params,
+ self.archive_dict.update({'simplex_parameters':self.simplex_params,
'simplex_costs':self.simplex_costs})
+class DifferentialEvolutionLearner(Learner, threading.Thread):
+ '''
+ Adaption of the differential evolution algorithm in scipy.
+
+ Args:
+ params_out_queue (queue): Queue for parameters sent to controller.
+ costs_in_queue (queue): Queue for costs for gaussian process. This must be tuple
+ end_event (event): Event to trigger end of learner.
+
+ Keyword Args:
+ first_params (Optional [array]): The first parameters to test. If None will just randomly sample the initial condition. Default None.
+ trust_region (Optional [float or array]): The trust region defines the maximum distance the learner will travel from the current best set of parameters. If None, the learner will search everywhere. If a float, this number must be between 0 and 1 and defines maximum distance the learner will venture as a percentage of the boundaries. If it is an array, it must have the same size as the number of parameters and the numbers define the maximum absolute distance that can be moved along each direction.
+ evolution_strategy (Optional [string]): the differential evolution strategy to use, options are 'best1', 'best1', 'rand1' and 'rand2'. The default is 'best2'.
+ population_size (Optional [int]): multiplier proportional to the number of parameters in a generation. The generation population is set to population_size * parameter_num. Default 15.
+ mutation_scale (Optional [tuple]): The mutation scale when picking new points. Otherwise known as differential weight. When provided as a tuple (min,max) a mutation constant is picked randomly in the interval. Default (0.5,1.0).
+ cross_over_probability (Optional [float]): The recombination constand or crossover probability, the probability a new points will be added to the population.
+ restart_tolerance (Optional [float]): when the current population have a spread less than the initial tolerance, namely stdev(curr_pop) < restart_tolerance stdev(init_pop), it is likely the population is now in a minima, and so the search is started again.
+
+ Attributes:
+ has_trust_region (bool): Whether the learner has a trust region.
+ num_population_members (int): The number of parameters in a generation.
+ params_generations (list): History of the parameters generations. A list of all the parameters in the population, for each generation created.
+ costs_generations (list): History of the costs generations. A list of all the costs in the population, for each generation created.
+ init_std (float): The initial standard deviation in costs of the population. Calucalted after sampling (or resampling) the initial population.
+ curr_std (float): The current standard devation in costs of the population. Calculated after sampling each generation.
+ '''
+
+ def __init__(self,
+ first_params = None,
+ trust_region = None,
+ evolution_strategy='best1',
+ population_size=15,
+ mutation_scale=(0.5, 1),
+ cross_over_probability=0.7,
+ restart_tolerance=0.01,
+ **kwargs):
+
+ super(DifferentialEvolutionLearner,self).__init__(**kwargs)
+
+ if first_params is None:
+ self.first_params = float('nan')
+ else:
+ self.first_params = np.array(first_params, dtype=float)
+ if not self.check_num_params(self.first_params):
+ self.log.error('first_params has the wrong number of parameters:' + repr(self.first_params))
+ raise ValueError
+ if not self.check_in_boundary(self.first_params):
+ self.log.error('first_params is not in the boundary:' + repr(self.first_params))
+ raise ValueError
+
+ self._set_trust_region(trust_region)
+
+ if evolution_strategy == 'best1':
+ self.mutation_func = self._best1
+ elif evolution_strategy == 'best2':
+ self.mutation_func = self._best2
+ elif evolution_strategy == 'rand1':
+ self.mutation_func = self._rand1
+ elif evolution_strategy == 'rand2':
+ self.mutation_func = self._rand2
+ else:
+ self.log.error('Please select a valid mutation strategy')
+ raise ValueError
+
+ self.evolution_strategy = evolution_strategy
+ self.restart_tolerance = restart_tolerance
+
+ if len(mutation_scale) == 2 and (np.any(np.array(mutation_scale) <= 2) or np.any(np.array(mutation_scale) > 0)):
+ self.mutation_scale = mutation_scale
+ else:
+ self.log.error('Mutation scale must be a tuple with (min,max) between 0 and 2. mutation_scale:' + repr(mutation_scale))
+ raise ValueError
+
+ if cross_over_probability <= 1 and cross_over_probability >= 0:
+ self.cross_over_probability = cross_over_probability
+ else:
+ self.log.error('Cross over probability must be between 0 and 1. cross_over_probability:' + repr(cross_over_probability))
+
+ if population_size >= 5:
+ self.population_size = population_size
+ else:
+ self.log.error('Population size must be greater or equal to 5:' + repr(population_size))
+
+ self.num_population_members = self.population_size * self.num_params
+
+ self.first_sample = True
+
+ self.params_generations = []
+ self.costs_generations = []
+ self.generation_count = 0
+
+ self.min_index = 0
+ self.init_std = 0
+ self.curr_std = 0
+
+ self.archive_dict.update({'archive_type':'differential_evolution',
+ 'evolution_strategy':self.evolution_strategy,
+ 'mutation_scale':self.mutation_scale,
+ 'cross_over_probability':self.cross_over_probability,
+ 'population_size':self.population_size,
+ 'num_population_members':self.num_population_members,
+ 'restart_tolerance':self.restart_tolerance,
+ 'first_params':self.first_params,
+ 'has_trust_region':self.has_trust_region,
+ 'trust_region':self.trust_region})
+
+
+ def run(self):
+ '''
+ Runs the Differential Evolution Learner.
+ '''
+ try:
+
+ self.generate_population()
+
+ while not self.end_event.is_set():
+
+ self.next_generation()
+
+ if self.curr_std < self.restart_tolerance * self.init_std:
+ self.generate_population()
+
+ except LearnerInterrupt:
+ return
+
+ def save_generation(self):
+ '''
+ Save history of generations.
+ '''
+ self.params_generations.append(np.copy(self.population))
+ self.costs_generations.append(np.copy(self.population_costs))
+ self.generation_count += 1
+
+ def generate_population(self):
+ '''
+ Sample a new random set of variables
+ '''
+
+ self.population = []
+ self.population_costs = []
+ self.min_index = 0
+
+ if np.all(np.isfinite(self.first_params)) and self.first_sample:
+ curr_params = self.first_params
+ self.first_sample = False
+ else:
+ curr_params = self.min_boundary + nr.rand(self.num_params) * self.diff_boundary
+
+ curr_cost = self.put_params_and_get_cost(curr_params)
+
+ self.population.append(curr_params)
+ self.population_costs.append(curr_cost)
+
+ for index in range(1, self.num_population_members):
+
+ if self.has_trust_region:
+ temp_min = np.maximum(self.min_boundary,self.population[self.min_index] - self.trust_region)
+ temp_max = np.minimum(self.max_boundary,self.population[self.min_index] + self.trust_region)
+ curr_params = temp_min + nr.rand(self.num_params) * (temp_max - temp_min)
+ else:
+ curr_params = self.min_boundary + nr.rand(self.num_params) * self.diff_boundary
+
+ curr_cost = self.put_params_and_get_cost(curr_params)
+
+ self.population.append(curr_params)
+ self.population_costs.append(curr_cost)
+
+ if curr_cost < self.population_costs[self.min_index]:
+ self.min_index = index
+
+ self.population = np.array(self.population)
+ self.population_costs = np.array(self.population_costs)
+
+ self.init_std = np.std(self.population_costs)
+ self.curr_std = self.init_std
+
+ self.save_generation()
+
+ def next_generation(self):
+ '''
+ Evolve the population by a single generation
+ '''
+
+ self.curr_scale = nr.uniform(self.mutation_scale[0], self.mutation_scale[1])
+
+ for index in range(self.num_population_members):
+
+ curr_params = self.mutate(index)
+
+ curr_cost = self.put_params_and_get_cost(curr_params)
+
+ if curr_cost < self.population_costs[index]:
+ self.population[index] = curr_params
+ self.population_costs[index] = curr_cost
+
+ if curr_cost < self.population_costs[self.min_index]:
+ self.min_index = index
+
+ self.curr_std = np.std(self.population_costs)
+
+ self.save_generation()
+
+ def mutate(self, index):
+ '''
+ Mutate the parameters at index.
+
+ Args:
+ index (int): Index of the point to be mutated.
+ '''
+
+ fill_point = nr.randint(0, self.num_params)
+ candidate_params = self.mutation_func(index)
+ crossovers = nr.rand(self.num_params) < self.cross_over_probability
+ crossovers[fill_point] = True
+ mutated_params = np.where(crossovers, candidate_params, self.population[index])
+
+ if self.has_trust_region:
+ temp_min = np.maximum(self.min_boundary,self.population[self.min_index] - self.trust_region)
+ temp_max = np.minimum(self.max_boundary,self.population[self.min_index] + self.trust_region)
+ rand_params = temp_min + nr.rand(self.num_params) * (temp_max - temp_min)
+ else:
+ rand_params = self.min_boundary + nr.rand(self.num_params) * self.diff_boundary
+
+ projected_params = np.where(np.logical_or(mutated_params < self.min_boundary, mutated_params > self.max_boundary), rand_params, mutated_params)
+
+ return projected_params
+
+ def _best1(self, index):
+ '''
+ Use best parameters and two others to generate mutation.
+
+ Args:
+ index (int): Index of member to mutate.
+ '''
+ r0, r1 = self.random_index_sample(index, 2)
+ return (self.population[self.min_index] + self.curr_scale *(self.population[r0] - self.population[r1]))
+
+ def _rand1(self, index):
+ '''
+ Use three random parameters to generate mutation.
+
+ Args:
+ index (int): Index of member to mutate.
+ '''
+ r0, r1, r2 = self.random_index_sample(index, 3)
+ return (self.population[r0] + self.curr_scale * (self.population[r1] - self.population[r2]))
+
+ def _best2(self, index):
+ '''
+ Use best parameters and four others to generate mutation.
+
+ Args:
+ index (int): Index of member to mutate.
+ '''
+ r0, r1, r2, r3 = self.random_index_sample(index, 4)
+ return self.population[self.min_index] + self.curr_scale * (self.population[r0] + self.population[r1] - self.population[r2] - self.population[r3])
+
+ def _rand2(self, index):
+ '''
+ Use five random parameters to generate mutation.
+
+ Args:
+ index (int): Index of member to mutate.
+ '''
+ r0, r1, r2, r3, r4 = self.random_index_sample(index, 5)
+ return self.population[r0] + self.curr_scale * (self.population[r1] + self.population[r2] - self.population[r3] - self.population[r4])
+
+ def random_index_sample(self, index, num_picks):
+ '''
+ Randomly select a num_picks of indexes, without index.
+
+ Args:
+ index(int): The index that is not included
+ num_picks(int): The number of picks.
+ '''
+ rand_indexes = list(range(self.num_population_members))
+ rand_indexes.remove(index)
+ return random.sample(rand_indexes, num_picks)
+
+ def update_archive(self):
+ '''
+ Update the archive.
+ '''
+ self.archive_dict.update({'params_generations':self.params_generations,
+ 'costs_generations':self.costs_generations,
+ 'population':self.population,
+ 'population_costs':self.population_costs,
+ 'init_std':self.init_std,
+ 'curr_std':self.curr_std,
+ 'generation_count':self.generation_count})
+
+
class GaussianProcessLearner(Learner, mp.Process):
'''
@@ -627,18 +919,15 @@ def __init__(self,
#Basic optimization settings
num_params = int(self.training_dict['num_params'])
- min_boundary = np.squeeze(np.array(self.training_dict['min_boundary'], dtype=float))
- max_boundary = np.squeeze(np.array(self.training_dict['max_boundary'], dtype=float))
+ min_boundary = mlu.safe_cast_to_array(self.training_dict['min_boundary'])
+ max_boundary = mlu.safe_cast_to_array(self.training_dict['max_boundary'])
#Configuration of the learner
self.cost_has_noise = bool(self.training_dict['cost_has_noise'])
- self.length_scale = np.squeeze(np.array(self.training_dict['length_scale']))
+ self.length_scale = mlu.safe_cast_to_array(self.training_dict['length_scale'])
self.length_scale_history = list(self.training_dict['length_scale_history'])
self.noise_level = float(self.training_dict['noise_level'])
- if isinstance(self.training_dict['noise_level_history'], np.ndarray):
- self.noise_level_history = list(np.squeeze(self.training_dict['noise_level_history']))
- else:
- self.noise_level_history = list( self.training_dict['noise_level_history'])
+ self.noise_level_history = mlu.safe_cast_to_list(self.training_dict['noise_level_history'])
#Counters
self.costs_count = int(self.training_dict['costs_count'])
@@ -646,48 +935,39 @@ def __init__(self,
self.params_count = int(self.training_dict['params_count'])
#Data from previous experiment
- self.all_params = np.array(self.training_dict['all_params'], dtype=float)
- self.all_costs = np.squeeze(np.array(self.training_dict['all_costs'], dtype=float))
- self.all_uncers = np.squeeze(np.array(self.training_dict['all_uncers'], dtype=float))
-
- if isinstance(self.training_dict['bad_run_indexs'], np.ndarray):
- self.bad_run_indexs = list(np.squeeze(self.training_dict['bad_run_indexs']))
- else:
- self.bad_run_indexs = list(self.training_dict['bad_run_indexs'])
+ self.all_params = np.array(self.training_dict['all_params'])
+ self.all_costs = mlu.safe_cast_to_array(self.training_dict['all_costs'])
+ self.all_uncers = mlu.safe_cast_to_array(self.training_dict['all_uncers'])
+ self.bad_run_indexs = mlu.safe_cast_to_list(self.training_dict['bad_run_indexs'])
#Derived properties
self.best_cost = float(self.training_dict['best_cost'])
- self.best_params = np.squeeze(np.array(self.training_dict['best_params'], dtype=float))
+ self.best_params = mlu.safe_cast_to_array(self.training_dict['best_params'])
self.best_index = int(self.training_dict['best_index'])
self.worst_cost = float(self.training_dict['worst_cost'])
self.worst_index = int(self.training_dict['worst_index'])
self.cost_range = float(self.training_dict['cost_range'])
try:
- self.predicted_best_parameters = np.squeeze(np.array(self.training_dict['predicted_best_parameters']))
+ self.predicted_best_parameters = mlu.safe_cast_to_array(self.training_dict['predicted_best_parameters'])
self.predicted_best_cost = float(self.training_dict['predicted_best_cost'])
self.predicted_best_uncertainty = float(self.training_dict['predicted_best_uncertainty'])
self.has_global_minima = True
except KeyError:
self.has_global_minima = False
try:
- self.local_minima_parameters = list(self.training_dict['local_minima_parameters'])
-
- if isinstance(self.training_dict['local_minima_costs'], np.ndarray):
- self.local_minima_costs = list(np.squeeze(self.training_dict['local_minima_costs']))
- else:
- self.local_minima_costs = list(self.training_dict['local_minima_costs'])
- if isinstance(self.training_dict['local_minima_uncers'], np.ndarray):
- self.local_minima_uncers = list(np.squeeze(self.training_dict['local_minima_uncers']))
- else:
- self.local_minima_uncers = list(self.training_dict['local_minima_uncers'])
+ self.local_minima_parameters = mlu.safe_cast_to_list(self.training_dict['local_minima_parameters'])
+ self.local_minima_costs = mlu.safe_cast_to_list(self.training_dict['local_minima_costs'])
+ self.local_minima_uncers = mlu.safe_cast_to_list(self.training_dict['local_minima_uncers'])
self.has_local_minima = True
except KeyError:
self.has_local_minima = False
-
- super(GaussianProcessLearner,self).__init__(num_params=num_params,
+ if 'num_params' in kwargs:
+ super(GaussianProcessLearner,self).__init__(**kwargs)
+ else:
+ super(GaussianProcessLearner,self).__init__(num_params=num_params,
min_boundary=min_boundary,
max_boundary=max_boundary,
**kwargs)
@@ -781,9 +1061,9 @@ def __init__(self,
if self.default_bad_uncertainty < 0:
self.log.error('Default bad uncertainty must be positive.')
raise ValueError
- if (self.default_bad_cost is None) and (self.default_bad_cost is None):
+ if (self.default_bad_cost is None) and (self.default_bad_uncertainty is None):
self.bad_defaults_set = False
- elif (self.default_bad_cost is not None) and (self.default_bad_cost is not None):
+ elif (self.default_bad_cost is not None) and (self.default_bad_uncertainty is not None):
self.bad_defaults_set = True
else:
self.log.error('Both the default cost and uncertainty must be set for a bad run or they must both be set to None.')
@@ -864,13 +1144,14 @@ def get_params_and_costs(self):
new_costs = []
new_uncers = []
new_bads = []
- new_costs_count = 0
update_bads_flag = False
while not self.costs_in_queue.empty():
(param, cost, uncer, bad) = self.costs_in_queue.get_nowait()
+ self.costs_count +=1
+
if bad:
- new_bads.append(self.data_count)
+ new_bads.append(self.costs_count-1)
if self.bad_defaults_set:
cost = self.default_bad_cost
uncer = self.default_bad_uncertainty
@@ -889,18 +1170,15 @@ def get_params_and_costs(self):
self.log.error('Provided uncertainty must be larger or equal to zero:' + repr(uncer))
uncer = max(float(uncer), self.minimum_uncertainty)
- new_costs_count += 1
- self.costs_count +=1
-
cost_change_flag = False
if cost > self.worst_cost:
self.worst_cost = cost
- self.worst_index = self.costs_count
+ self.worst_index = self.costs_count-1
cost_change_flag = True
if cost < self.best_cost:
self.best_cost = cost
self.best_params = param
- self.best_index = self.costs_count
+ self.best_index = self.costs_count-1
cost_change_flag = True
if cost_change_flag:
self.cost_range = self.worst_cost - self.best_cost
@@ -910,7 +1188,8 @@ def get_params_and_costs(self):
new_params.append(param)
new_costs.append(cost)
new_uncers.append(uncer)
-
+
+
if self.all_params.size==0:
self.all_params = np.array(new_params, dtype=float)
self.all_costs = np.array(new_costs, dtype=float)
@@ -920,13 +1199,15 @@ def get_params_and_costs(self):
self.all_costs = np.concatenate((self.all_costs, np.array(new_costs, dtype=float)))
self.all_uncers = np.concatenate((self.all_uncers, np.array(new_uncers, dtype=float)))
+ self.bad_run_indexs.append(new_bads)
+
if self.all_params.shape != (self.costs_count,self.num_params):
self.log('Saved GP params are the wrong size. THIS SHOULD NOT HAPPEN:' + repr(self.all_params))
if self.all_costs.shape != (self.costs_count,):
self.log('Saved GP costs are the wrong size. THIS SHOULD NOT HAPPEN:' + repr(self.all_costs))
if self.all_uncers.shape != (self.costs_count,):
self.log('Saved GP uncertainties are the wrong size. THIS SHOULD NOT HAPPEN:' + repr(self.all_uncers))
-
+
if update_bads_flag:
self.update_bads()
@@ -1004,7 +1285,7 @@ def fit_gaussian_process(self):
if self.cost_has_noise:
self.length_scale = last_hyperparameters['k1__length_scale']
if isinstance(self.length_scale, float):
- self.length_scale = np.array([self.length_scale])
+ self.length_scale = np.array([self.length_scale])
self.length_scale_history.append(self.length_scale)
self.noise_level = last_hyperparameters['k2__noise_level']
self.noise_level_history.append(self.noise_level)
@@ -1074,6 +1355,9 @@ def run(self):
raise LearnerInterrupt()
except LearnerInterrupt:
pass
+ if self.predict_global_minima_at_end or self.predict_local_minima_at_end:
+ self.get_params_and_costs()
+ self.fit_gaussian_process()
end_dict = {}
if self.predict_global_minima_at_end:
self.find_global_minima()
@@ -1178,7 +1462,11 @@ def find_local_minima(self):
self.has_local_minima = True
self.log.info('Search completed')
-
-
+
+
+
+
+
+
diff --git a/mloop/localsklearn/__init__.py b/mloop/localsklearn/__init__.py
deleted file mode 100644
index 9b721d4..0000000
--- a/mloop/localsklearn/__init__.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-Machine learning module for Python
-==================================
-
-sklearn is a Python module integrating classical machine
-learning algorithms in the tightly-knit world of scientific Python
-packages (numpy, scipy, matplotlib).
-
-It aims to provide simple and efficient solutions to learning problems
-that are accessible to everybody and reusable in various contexts:
-machine-learning as a versatile tool for science and engineering.
-
-See http://scikit-learn.org for complete documentation.
-"""
-import sys
-import re
-import warnings
-
-
-# Make sure that DeprecationWarning within this package always gets printed
-warnings.filterwarnings('always', category=DeprecationWarning,
- module='^{0}\.'.format(re.escape(__name__)))
-
-# PEP0440 compatible formatted version, see:
-# https://www.python.org/dev/peps/pep-0440/
-#
-# Generic release markers:
-# X.Y
-# X.Y.Z # For bugfix releases
-#
-# Admissible pre-release markers:
-# X.YaN # Alpha release
-# X.YbN # Beta release
-# X.YrcN # Release Candidate
-# X.Y # Final release
-#
-# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
-# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
-#
-__version__ = '0.18.dev0'
-
-
-try:
- # This variable is injected in the __builtins__ by the build
- # process. It used to enable importing subpackages of sklearn when
- # the binaries are not built
- __SKLEARN_SETUP__
-except NameError:
- __SKLEARN_SETUP__ = False
-
-if __SKLEARN_SETUP__:
- sys.stderr.write('Partial import of sklearn during the build process.\n')
- # We are not importing the rest of the scikit during the build
- # process, as it may not be compiled yet
-else:
- __all__ = ['gaussian_process', 'metrics', 'preprocessing']
-
-def setup_module(module):
- """Fixture for the tests to assure globally controllable seeding of RNGs"""
- import os
- import numpy as np
- import random
-
- # It could have been provided in the environment
- _random_seed = os.environ.get('SKLEARN_SEED', None)
- if _random_seed is None:
- _random_seed = np.random.uniform() * (2 ** 31 - 1)
- _random_seed = int(_random_seed)
- print("I: Seeding RNGs with %r" % _random_seed)
- np.random.seed(_random_seed)
- random.seed(_random_seed)
diff --git a/mloop/localsklearn/base.py b/mloop/localsklearn/base.py
deleted file mode 100644
index 30be667..0000000
--- a/mloop/localsklearn/base.py
+++ /dev/null
@@ -1,510 +0,0 @@
-"""Base classes for all estimators."""
-
-# Author: Gael Varoquaux
-# License: BSD 3 clause
-
-import copy
-import warnings
-
-import numpy as np
-from scipy import sparse
-from .externals import six
-from .utils.fixes import signature
-from .utils.deprecation import deprecated
-from .exceptions import ChangedBehaviorWarning as _ChangedBehaviorWarning
-
-
-@deprecated("ChangedBehaviorWarning has been moved into the sklearn.exceptions"
- " module. It will not be available here from version 0.19")
-class ChangedBehaviorWarning(_ChangedBehaviorWarning):
- pass
-
-
-##############################################################################
-def _first_and_last_element(arr):
- """Returns first and last element of numpy array or sparse matrix."""
- if isinstance(arr, np.ndarray) or hasattr(arr, 'data'):
- # numpy array or sparse matrix with .data attribute
- data = arr.data if sparse.issparse(arr) else arr
- return data.flat[0], data.flat[-1]
- else:
- # Sparse matrices without .data attribute. Only dok_matrix at
- # the time of writing, in this case indexing is fast
- return arr[0, 0], arr[-1, -1]
-
-
-def clone(estimator, safe=True):
- """Constructs a new estimator with the same parameters.
-
- Clone does a deep copy of the model in an estimator
- without actually copying attached data. It yields a new estimator
- with the same parameters that has not been fit on any data.
-
- Parameters
- ----------
- estimator: estimator object, or list, tuple or set of objects
- The estimator or group of estimators to be cloned
-
- safe: boolean, optional
- If safe is false, clone will fall back to a deepcopy on objects
- that are not estimators.
-
- """
- estimator_type = type(estimator)
- # XXX: not handling dictionaries
- if estimator_type in (list, tuple, set, frozenset):
- return estimator_type([clone(e, safe=safe) for e in estimator])
- elif not hasattr(estimator, 'get_params'):
- if not safe:
- return copy.deepcopy(estimator)
- else:
- raise TypeError("Cannot clone object '%s' (type %s): "
- "it does not seem to be a scikit-learn estimator "
- "as it does not implement a 'get_params' methods."
- % (repr(estimator), type(estimator)))
- klass = estimator.__class__
- new_object_params = estimator.get_params(deep=False)
- for name, param in six.iteritems(new_object_params):
- new_object_params[name] = clone(param, safe=False)
- new_object = klass(**new_object_params)
- params_set = new_object.get_params(deep=False)
-
- # quick sanity check of the parameters of the clone
- for name in new_object_params:
- param1 = new_object_params[name]
- param2 = params_set[name]
- if isinstance(param1, np.ndarray):
- # For most ndarrays, we do not test for complete equality
- if not isinstance(param2, type(param1)):
- equality_test = False
- elif (param1.ndim > 0
- and param1.shape[0] > 0
- and isinstance(param2, np.ndarray)
- and param2.ndim > 0
- and param2.shape[0] > 0):
- equality_test = (
- param1.shape == param2.shape
- and param1.dtype == param2.dtype
- and (_first_and_last_element(param1) ==
- _first_and_last_element(param2))
- )
- else:
- equality_test = np.all(param1 == param2)
- elif sparse.issparse(param1):
- # For sparse matrices equality doesn't work
- if not sparse.issparse(param2):
- equality_test = False
- elif param1.size == 0 or param2.size == 0:
- equality_test = (
- param1.__class__ == param2.__class__
- and param1.size == 0
- and param2.size == 0
- )
- else:
- equality_test = (
- param1.__class__ == param2.__class__
- and (_first_and_last_element(param1) ==
- _first_and_last_element(param2))
- and param1.nnz == param2.nnz
- and param1.shape == param2.shape
- )
- else:
- new_obj_val = new_object_params[name]
- params_set_val = params_set[name]
- # The following construct is required to check equality on special
- # singletons such as np.nan that are not equal to them-selves:
- equality_test = (new_obj_val == params_set_val or
- new_obj_val is params_set_val)
- if not equality_test:
- raise RuntimeError('Cannot clone object %s, as the constructor '
- 'does not seem to set parameter %s' %
- (estimator, name))
-
- return new_object
-
-
-###############################################################################
-def _pprint(params, offset=0, printer=repr):
- """Pretty print the dictionary 'params'
-
- Parameters
- ----------
- params: dict
- The dictionary to pretty print
-
- offset: int
- The offset in characters to add at the begin of each line.
-
- printer:
- The function to convert entries to strings, typically
- the builtin str or repr
-
- """
- # Do a multi-line justified repr:
- options = np.get_printoptions()
- np.set_printoptions(precision=5, threshold=64, edgeitems=2)
- params_list = list()
- this_line_length = offset
- line_sep = ',\n' + (1 + offset // 2) * ' '
- for i, (k, v) in enumerate(sorted(six.iteritems(params))):
- if type(v) is float:
- # use str for representing floating point numbers
- # this way we get consistent representation across
- # architectures and versions.
- this_repr = '%s=%s' % (k, str(v))
- else:
- # use repr of the rest
- this_repr = '%s=%s' % (k, printer(v))
- if len(this_repr) > 500:
- this_repr = this_repr[:300] + '...' + this_repr[-100:]
- if i > 0:
- if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr):
- params_list.append(line_sep)
- this_line_length = len(line_sep)
- else:
- params_list.append(', ')
- this_line_length += 2
- params_list.append(this_repr)
- this_line_length += len(this_repr)
-
- np.set_printoptions(**options)
- lines = ''.join(params_list)
- # Strip trailing space to avoid nightmare in doctests
- lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n'))
- return lines
-
-
-###############################################################################
-class BaseEstimator(object):
- """Base class for all estimators in scikit-learn
-
- Notes
- -----
- All estimators should specify all the parameters that can be set
- at the class level in their ``__init__`` as explicit keyword
- arguments (no ``*args`` or ``**kwargs``).
- """
-
- @classmethod
- def _get_param_names(cls):
- """Get parameter names for the estimator"""
- # fetch the constructor or the original constructor before
- # deprecation wrapping if any
- init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
- if init is object.__init__:
- # No explicit constructor to introspect
- return []
-
- # introspect the constructor arguments to find the model parameters
- # to represent
- init_signature = signature(init)
- # Consider the constructor parameters excluding 'self'
- parameters = [p for p in init_signature.parameters.values()
- if p.name != 'self' and p.kind != p.VAR_KEYWORD]
- for p in parameters:
- if p.kind == p.VAR_POSITIONAL:
- raise RuntimeError("scikit-learn estimators should always "
- "specify their parameters in the signature"
- " of their __init__ (no varargs)."
- " %s with constructor %s doesn't "
- " follow this convention."
- % (cls, init_signature))
- # Extract and sort argument names excluding 'self'
- return sorted([p.name for p in parameters])
-
- def get_params(self, deep=True):
- """Get parameters for this estimator.
-
- Parameters
- ----------
- deep: boolean, optional
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
-
- Returns
- -------
- params : mapping of string to any
- Parameter names mapped to their values.
- """
- out = dict()
- for key in self._get_param_names():
- # We need deprecation warnings to always be on in order to
- # catch deprecated param values.
- # This is set in utils/__init__.py but it gets overwritten
- # when running under python3 somehow.
- warnings.simplefilter("always", DeprecationWarning)
- try:
- with warnings.catch_warnings(record=True) as w:
- value = getattr(self, key, None)
- if len(w) and w[0].category == DeprecationWarning:
- # if the parameter is deprecated, don't show it
- continue
- finally:
- warnings.filters.pop(0)
-
- # XXX: should we rather test if instance of estimator?
- if deep and hasattr(value, 'get_params'):
- deep_items = value.get_params().items()
- out.update((key + '__' + k, val) for k, val in deep_items)
- out[key] = value
- return out
-
- def set_params(self, **params):
- """Set the parameters of this estimator.
-
- The method works on simple estimators as well as on nested objects
- (such as pipelines). The latter have parameters of the form
- ``__`` so that it's possible to update each
- component of a nested object.
-
- Returns
- -------
- self
- """
- if not params:
- # Simple optimisation to gain speed (inspect is slow)
- return self
- valid_params = self.get_params(deep=True)
- for key, value in six.iteritems(params):
- split = key.split('__', 1)
- if len(split) > 1:
- # nested objects case
- name, sub_name = split
- if name not in valid_params:
- raise ValueError('Invalid parameter %s for estimator %s. '
- 'Check the list of available parameters '
- 'with `estimator.get_params().keys()`.' %
- (name, self))
- sub_object = valid_params[name]
- sub_object.set_params(**{sub_name: value})
- else:
- # simple objects case
- if key not in valid_params:
- raise ValueError('Invalid parameter %s for estimator %s. '
- 'Check the list of available parameters '
- 'with `estimator.get_params().keys()`.' %
- (key, self.__class__.__name__))
- setattr(self, key, value)
- return self
-
- def __repr__(self):
- class_name = self.__class__.__name__
- return '%s(%s)' % (class_name, _pprint(self.get_params(deep=False),
- offset=len(class_name),),)
-
-
-###############################################################################
-class ClassifierMixin(object):
- """Mixin class for all classifiers in scikit-learn."""
- _estimator_type = "classifier"
-
- def score(self, X, y, sample_weight=None):
- """Returns the mean accuracy on the given test data and labels.
-
- In multi-label classification, this is the subset accuracy
- which is a harsh metric since you require for each sample that
- each label set be correctly predicted.
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
- Test samples.
-
- y : array-like, shape = (n_samples) or (n_samples, n_outputs)
- True labels for X.
-
- sample_weight : array-like, shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- score : float
- Mean accuracy of self.predict(X) wrt. y.
-
- """
- from .metrics import accuracy_score
- return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
-
-
-###############################################################################
-class RegressorMixin(object):
- """Mixin class for all regression estimators in scikit-learn."""
- _estimator_type = "regressor"
-
- def score(self, X, y, sample_weight=None):
- """Returns the coefficient of determination R^2 of the prediction.
-
- The coefficient R^2 is defined as (1 - u/v), where u is the regression
- sum of squares ((y_true - y_pred) ** 2).sum() and v is the residual
- sum of squares ((y_true - y_true.mean()) ** 2).sum().
- Best possible score is 1.0 and it can be negative (because the
- model can be arbitrarily worse). A constant model that always
- predicts the expected value of y, disregarding the input features,
- would get a R^2 score of 0.0.
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
- Test samples.
-
- y : array-like, shape = (n_samples) or (n_samples, n_outputs)
- True values for X.
-
- sample_weight : array-like, shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- score : float
- R^2 of self.predict(X) wrt. y.
- """
-
- from .metrics import r2_score
- return r2_score(y, self.predict(X), sample_weight=sample_weight,
- multioutput='variance_weighted')
-
-
-###############################################################################
-class ClusterMixin(object):
- """Mixin class for all cluster estimators in scikit-learn."""
- _estimator_type = "clusterer"
-
- def fit_predict(self, X, y=None):
- """Performs clustering on X and returns cluster labels.
-
- Parameters
- ----------
- X : ndarray, shape (n_samples, n_features)
- Input data.
-
- Returns
- -------
- y : ndarray, shape (n_samples,)
- cluster labels
- """
- # non-optimized default implementation; override when a better
- # method is possible for a given clustering algorithm
- self.fit(X)
- return self.labels_
-
-
-class BiclusterMixin(object):
- """Mixin class for all bicluster estimators in scikit-learn"""
-
- @property
- def biclusters_(self):
- """Convenient way to get row and column indicators together.
-
- Returns the ``rows_`` and ``columns_`` members.
- """
- return self.rows_, self.columns_
-
- def get_indices(self, i):
- """Row and column indices of the i'th bicluster.
-
- Only works if ``rows_`` and ``columns_`` attributes exist.
-
- Returns
- -------
- row_ind : np.array, dtype=np.intp
- Indices of rows in the dataset that belong to the bicluster.
- col_ind : np.array, dtype=np.intp
- Indices of columns in the dataset that belong to the bicluster.
-
- """
- rows = self.rows_[i]
- columns = self.columns_[i]
- return np.nonzero(rows)[0], np.nonzero(columns)[0]
-
- def get_shape(self, i):
- """Shape of the i'th bicluster.
-
- Returns
- -------
- shape : (int, int)
- Number of rows and columns (resp.) in the bicluster.
- """
- indices = self.get_indices(i)
- return tuple(len(i) for i in indices)
-
- def get_submatrix(self, i, data):
- """Returns the submatrix corresponding to bicluster `i`.
-
- Works with sparse matrices. Only works if ``rows_`` and
- ``columns_`` attributes exist.
-
- """
- from .utils.validation import check_array
- data = check_array(data, accept_sparse='csr')
- row_ind, col_ind = self.get_indices(i)
- return data[row_ind[:, np.newaxis], col_ind]
-
-
-###############################################################################
-class TransformerMixin(object):
- """Mixin class for all transformers in scikit-learn."""
-
- def fit_transform(self, X, y=None, **fit_params):
- """Fit to data, then transform it.
-
- Fits transformer to X and y with optional parameters fit_params
- and returns a transformed version of X.
-
- Parameters
- ----------
- X : numpy array of shape [n_samples, n_features]
- Training set.
-
- y : numpy array of shape [n_samples]
- Target values.
-
- Returns
- -------
- X_new : numpy array of shape [n_samples, n_features_new]
- Transformed array.
-
- """
- # non-optimized default implementation; override when a better
- # method is possible for a given clustering algorithm
- if y is None:
- # fit method of arity 1 (unsupervised transformation)
- return self.fit(X, **fit_params).transform(X)
- else:
- # fit method of arity 2 (supervised transformation)
- return self.fit(X, y, **fit_params).transform(X)
-
-
-class DensityMixin(object):
- """Mixin class for all density estimators in scikit-learn."""
- _estimator_type = "DensityEstimator"
-
- def score(self, X, y=None):
- """Returns the score of the model on the data X
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
-
- Returns
- -------
- score: float
- """
- pass
-
-
-###############################################################################
-class MetaEstimatorMixin(object):
- """Mixin class for all meta estimators in scikit-learn."""
- # this is just a tag for the moment
-
-
-###############################################################################
-
-def is_classifier(estimator):
- """Returns True if the given estimator is (probably) a classifier."""
- return getattr(estimator, "_estimator_type", None) == "classifier"
-
-
-def is_regressor(estimator):
- """Returns True if the given estimator is (probably) a regressor."""
- return getattr(estimator, "_estimator_type", None) == "regressor"
diff --git a/mloop/localsklearn/exceptions.py b/mloop/localsklearn/exceptions.py
deleted file mode 100644
index c830ef8..0000000
--- a/mloop/localsklearn/exceptions.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""
-The :mod:`sklearn.exceptions` module includes all custom warnings and error
-classes used across scikit-learn.
-"""
-
-__all__ = ['NotFittedError',
- 'ChangedBehaviorWarning',
- 'ConvergenceWarning',
- 'DataConversionWarning',
- 'DataDimensionalityWarning',
- 'EfficiencyWarning',
- 'FitFailedWarning',
- 'NonBLASDotWarning',
- 'UndefinedMetricWarning']
-
-
-class NotFittedError(ValueError, AttributeError):
- """Exception class to raise if estimator is used before fitting.
-
- This class inherits from both ValueError and AttributeError to help with
- exception handling and backward compatibility.
-
- Examples
- --------
- >>> from sklearn.svm import LinearSVC
- >>> from sklearn.exceptions import NotFittedError
- >>> try:
- ... LinearSVC().predict([[1, 2], [2, 3], [3, 4]])
- ... except NotFittedError as e:
- ... print(repr(e))
- ... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
- NotFittedError('This LinearSVC instance is not fitted yet',)
- """
-
-
-class ChangedBehaviorWarning(UserWarning):
- """Warning class used to notify the user of any change in the behavior."""
-
-
-class ConvergenceWarning(UserWarning):
- """Custom warning to capture convergence problems"""
-
-
-class DataConversionWarning(UserWarning):
- """Warning used to notify implicit data conversions happening in the code.
-
- This warning occurs when some input data needs to be converted or
- interpreted in a way that may not match the user's expectations.
-
- For example, this warning may occur when the user
- - passes an integer array to a function which expects float input and
- will convert the input
- - requests a non-copying operation, but a copy is required to meet the
- implementation's data-type expectations;
- - passes an input whose shape can be interpreted ambiguously.
- """
-
-
-class DataDimensionalityWarning(UserWarning):
- """Custom warning to notify potential issues with data dimensionality.
-
- For example, in random projection, this warning is raised when the
- number of components, which quantifies the dimensionality of the target
- projection space, is higher than the number of features, which quantifies
- the dimensionality of the original source space, to imply that the
- dimensionality of the problem will not be reduced.
- """
-
-
-class EfficiencyWarning(UserWarning):
- """Warning used to notify the user of inefficient computation.
-
- This warning notifies the user that the efficiency may not be optimal due
- to some reason which may be included as a part of the warning message.
- This may be subclassed into a more specific Warning class.
- """
-
-
-class FitFailedWarning(RuntimeWarning):
- """Warning class used if there is an error while fitting the estimator.
-
- This Warning is used in meta estimators GridSearchCV and RandomizedSearchCV
- and the cross-validation helper function cross_val_score to warn when there
- is an error while fitting the estimator.
-
- Examples
- --------
- >>> from sklearn.model_selection import GridSearchCV
- >>> from sklearn.svm import LinearSVC
- >>> from sklearn.exceptions import FitFailedWarning
- >>> import warnings
- >>> warnings.simplefilter('always', FitFailedWarning)
- >>> gs = GridSearchCV(LinearSVC(), {'C': [-1, -2]}, error_score=0)
- >>> X, y = [[1, 2], [3, 4], [5, 6], [7, 8], [8, 9]], [0, 0, 0, 1, 1]
- >>> with warnings.catch_warnings(record=True) as w:
- ... try:
- ... gs.fit(X, y) # This will raise a ValueError since C is < 0
- ... except ValueError:
- ... pass
- ... print(repr(w[-1].message))
- ... # doctest: +NORMALIZE_WHITESPACE
- FitFailedWarning("Classifier fit failed. The score on this train-test
- partition for these parameters will be set to 0.000000. Details:
- \\nValueError('Penalty term must be positive; got (C=-2)',)",)
- """
-
-
-class NonBLASDotWarning(EfficiencyWarning):
- """Warning used when the dot operation does not use BLAS.
-
- This warning is used to notify the user that BLAS was not used for dot
- operation and hence the efficiency may be affected.
- """
-
-
-class UndefinedMetricWarning(UserWarning):
- """Warning used when the metric is invalid"""
diff --git a/mloop/localsklearn/externals/README b/mloop/localsklearn/externals/README
deleted file mode 100644
index eef7ba7..0000000
--- a/mloop/localsklearn/externals/README
+++ /dev/null
@@ -1,7 +0,0 @@
-This directory contains bundled external dependencies that are updated
-every once in a while.
-
-Note for distribution packagers: if you want to remove the duplicated
-code and depend on a packaged version, we suggest that you simply do a
-symbolic link in this directory.
-
diff --git a/mloop/localsklearn/externals/__init__.py b/mloop/localsklearn/externals/__init__.py
deleted file mode 100644
index 97cda18..0000000
--- a/mloop/localsklearn/externals/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-
-"""
-External, bundled dependencies.
-
-"""
diff --git a/mloop/localsklearn/externals/copy_joblib.sh b/mloop/localsklearn/externals/copy_joblib.sh
deleted file mode 100755
index 6e5d2a5..0000000
--- a/mloop/localsklearn/externals/copy_joblib.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/sh
-# Script to do a local install of joblib
-export LC_ALL=C
-rm -rf tmp joblib
-PYTHON_VERSION=$(python -c 'import sys; print("{0[0]}.{0[1]}".format(sys.version_info))')
-SITE_PACKAGES="$PWD/tmp/lib/python$PYTHON_VERSION/site-packages"
-
-mkdir -p $SITE_PACKAGES
-mkdir -p tmp/bin
-export PYTHONPATH="$SITE_PACKAGES"
-easy_install -Zeab tmp joblib
-
-cd tmp/joblib/
-python setup.py install --prefix $OLDPWD/tmp
-cd $OLDPWD
-cp -r $SITE_PACKAGES/joblib-*.egg/joblib .
-rm -rf tmp
-# Needed to rewrite the doctests
-# Note: BSD sed -i needs an argument unders OSX
-# so first renaming to .bak and then deleting backup files
-find joblib -name "*.py" | xargs sed -i.bak "s/from joblib/from sklearn.externals.joblib/"
-find joblib -name "*.bak" | xargs rm
-
-# Remove the tests folders to speed-up test time for scikit-learn.
-# joblib is already tested on its own CI infrastructure upstream.
-rm -r joblib/test
-
-chmod -x joblib/*.py
diff --git a/mloop/localsklearn/externals/funcsigs.py b/mloop/localsklearn/externals/funcsigs.py
deleted file mode 100644
index 413e310..0000000
--- a/mloop/localsklearn/externals/funcsigs.py
+++ /dev/null
@@ -1,818 +0,0 @@
-# Copyright 2001-2013 Python Software Foundation; All Rights Reserved
-"""Function signature objects for callables
-
-Back port of Python 3.3's function signature tools from the inspect module,
-modified to be compatible with Python 2.6, 2.7 and 3.2+.
-"""
-from __future__ import absolute_import, division, print_function
-import itertools
-import functools
-import re
-import types
-
-try:
- from collections import OrderedDict
-except ImportError:
- from .odict import OrderedDict
-
-__version__ = "0.4"
-
-__all__ = ['BoundArguments', 'Parameter', 'Signature', 'signature']
-
-
-_WrapperDescriptor = type(type.__call__)
-_MethodWrapper = type(all.__call__)
-
-_NonUserDefinedCallables = (_WrapperDescriptor,
- _MethodWrapper,
- types.BuiltinFunctionType)
-
-
-def formatannotation(annotation, base_module=None):
- if isinstance(annotation, type):
- if annotation.__module__ in ('builtins', '__builtin__', base_module):
- return annotation.__name__
- return annotation.__module__+'.'+annotation.__name__
- return repr(annotation)
-
-
-def _get_user_defined_method(cls, method_name, *nested):
- try:
- if cls is type:
- return
- meth = getattr(cls, method_name)
- for name in nested:
- meth = getattr(meth, name, meth)
- except AttributeError:
- return
- else:
- if not isinstance(meth, _NonUserDefinedCallables):
- # Once '__signature__' will be added to 'C'-level
- # callables, this check won't be necessary
- return meth
-
-
-def signature(obj):
- '''Get a signature object for the passed callable.'''
-
- if not callable(obj):
- raise TypeError('{0!r} is not a callable object'.format(obj))
-
- if isinstance(obj, types.MethodType):
- sig = signature(obj.__func__)
- if obj.__self__ is None:
- # Unbound method: the first parameter becomes positional-only
- if sig.parameters:
- first = sig.parameters.values()[0].replace(
- kind=_POSITIONAL_ONLY)
- return sig.replace(
- parameters=(first,) + tuple(sig.parameters.values())[1:])
- else:
- return sig
- else:
- # In this case we skip the first parameter of the underlying
- # function (usually `self` or `cls`).
- return sig.replace(parameters=tuple(sig.parameters.values())[1:])
-
- try:
- sig = obj.__signature__
- except AttributeError:
- pass
- else:
- if sig is not None:
- return sig
-
- try:
- # Was this function wrapped by a decorator?
- wrapped = obj.__wrapped__
- except AttributeError:
- pass
- else:
- return signature(wrapped)
-
- if isinstance(obj, types.FunctionType):
- return Signature.from_function(obj)
-
- if isinstance(obj, functools.partial):
- sig = signature(obj.func)
-
- new_params = OrderedDict(sig.parameters.items())
-
- partial_args = obj.args or ()
- partial_keywords = obj.keywords or {}
- try:
- ba = sig.bind_partial(*partial_args, **partial_keywords)
- except TypeError as ex:
- msg = 'partial object {0!r} has incorrect arguments'.format(obj)
- raise ValueError(msg)
-
- for arg_name, arg_value in ba.arguments.items():
- param = new_params[arg_name]
- if arg_name in partial_keywords:
- # We set a new default value, because the following code
- # is correct:
- #
- # >>> def foo(a): print(a)
- # >>> print(partial(partial(foo, a=10), a=20)())
- # 20
- # >>> print(partial(partial(foo, a=10), a=20)(a=30))
- # 30
- #
- # So, with 'partial' objects, passing a keyword argument is
- # like setting a new default value for the corresponding
- # parameter
- #
- # We also mark this parameter with '_partial_kwarg'
- # flag. Later, in '_bind', the 'default' value of this
- # parameter will be added to 'kwargs', to simulate
- # the 'functools.partial' real call.
- new_params[arg_name] = param.replace(default=arg_value,
- _partial_kwarg=True)
-
- elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and
- not param._partial_kwarg):
- new_params.pop(arg_name)
-
- return sig.replace(parameters=new_params.values())
-
- sig = None
- if isinstance(obj, type):
- # obj is a class or a metaclass
-
- # First, let's see if it has an overloaded __call__ defined
- # in its metaclass
- call = _get_user_defined_method(type(obj), '__call__')
- if call is not None:
- sig = signature(call)
- else:
- # Now we check if the 'obj' class has a '__new__' method
- new = _get_user_defined_method(obj, '__new__')
- if new is not None:
- sig = signature(new)
- else:
- # Finally, we should have at least __init__ implemented
- init = _get_user_defined_method(obj, '__init__')
- if init is not None:
- sig = signature(init)
- elif not isinstance(obj, _NonUserDefinedCallables):
- # An object with __call__
- # We also check that the 'obj' is not an instance of
- # _WrapperDescriptor or _MethodWrapper to avoid
- # infinite recursion (and even potential segfault)
- call = _get_user_defined_method(type(obj), '__call__', 'im_func')
- if call is not None:
- sig = signature(call)
-
- if sig is not None:
- # For classes and objects we skip the first parameter of their
- # __call__, __new__, or __init__ methods
- return sig.replace(parameters=tuple(sig.parameters.values())[1:])
-
- if isinstance(obj, types.BuiltinFunctionType):
- # Raise a nicer error message for builtins
- msg = 'no signature found for builtin function {0!r}'.format(obj)
- raise ValueError(msg)
-
- raise ValueError('callable {0!r} is not supported by signature'.format(obj))
-
-
-class _void(object):
- '''A private marker - used in Parameter & Signature'''
-
-
-class _empty(object):
- pass
-
-
-class _ParameterKind(int):
- def __new__(self, *args, **kwargs):
- obj = int.__new__(self, *args)
- obj._name = kwargs['name']
- return obj
-
- def __str__(self):
- return self._name
-
- def __repr__(self):
- return '<_ParameterKind: {0!r}>'.format(self._name)
-
-
-_POSITIONAL_ONLY = _ParameterKind(0, name='POSITIONAL_ONLY')
-_POSITIONAL_OR_KEYWORD = _ParameterKind(1, name='POSITIONAL_OR_KEYWORD')
-_VAR_POSITIONAL = _ParameterKind(2, name='VAR_POSITIONAL')
-_KEYWORD_ONLY = _ParameterKind(3, name='KEYWORD_ONLY')
-_VAR_KEYWORD = _ParameterKind(4, name='VAR_KEYWORD')
-
-
-class Parameter(object):
- '''Represents a parameter in a function signature.
-
- Has the following public attributes:
-
- * name : str
- The name of the parameter as a string.
- * default : object
- The default value for the parameter if specified. If the
- parameter has no default value, this attribute is not set.
- * annotation
- The annotation for the parameter if specified. If the
- parameter has no annotation, this attribute is not set.
- * kind : str
- Describes how argument values are bound to the parameter.
- Possible values: `Parameter.POSITIONAL_ONLY`,
- `Parameter.POSITIONAL_OR_KEYWORD`, `Parameter.VAR_POSITIONAL`,
- `Parameter.KEYWORD_ONLY`, `Parameter.VAR_KEYWORD`.
- '''
-
- __slots__ = ('_name', '_kind', '_default', '_annotation', '_partial_kwarg')
-
- POSITIONAL_ONLY = _POSITIONAL_ONLY
- POSITIONAL_OR_KEYWORD = _POSITIONAL_OR_KEYWORD
- VAR_POSITIONAL = _VAR_POSITIONAL
- KEYWORD_ONLY = _KEYWORD_ONLY
- VAR_KEYWORD = _VAR_KEYWORD
-
- empty = _empty
-
- def __init__(self, name, kind, default=_empty, annotation=_empty,
- _partial_kwarg=False):
-
- if kind not in (_POSITIONAL_ONLY, _POSITIONAL_OR_KEYWORD,
- _VAR_POSITIONAL, _KEYWORD_ONLY, _VAR_KEYWORD):
- raise ValueError("invalid value for 'Parameter.kind' attribute")
- self._kind = kind
-
- if default is not _empty:
- if kind in (_VAR_POSITIONAL, _VAR_KEYWORD):
- msg = '{0} parameters cannot have default values'.format(kind)
- raise ValueError(msg)
- self._default = default
- self._annotation = annotation
-
- if name is None:
- if kind != _POSITIONAL_ONLY:
- raise ValueError("None is not a valid name for a "
- "non-positional-only parameter")
- self._name = name
- else:
- name = str(name)
- if kind != _POSITIONAL_ONLY and not re.match(r'[a-z_]\w*$', name, re.I):
- msg = '{0!r} is not a valid parameter name'.format(name)
- raise ValueError(msg)
- self._name = name
-
- self._partial_kwarg = _partial_kwarg
-
- @property
- def name(self):
- return self._name
-
- @property
- def default(self):
- return self._default
-
- @property
- def annotation(self):
- return self._annotation
-
- @property
- def kind(self):
- return self._kind
-
- def replace(self, name=_void, kind=_void, annotation=_void,
- default=_void, _partial_kwarg=_void):
- '''Creates a customized copy of the Parameter.'''
-
- if name is _void:
- name = self._name
-
- if kind is _void:
- kind = self._kind
-
- if annotation is _void:
- annotation = self._annotation
-
- if default is _void:
- default = self._default
-
- if _partial_kwarg is _void:
- _partial_kwarg = self._partial_kwarg
-
- return type(self)(name, kind, default=default, annotation=annotation,
- _partial_kwarg=_partial_kwarg)
-
- def __str__(self):
- kind = self.kind
-
- formatted = self._name
- if kind == _POSITIONAL_ONLY:
- if formatted is None:
- formatted = ''
- formatted = '<{0}>'.format(formatted)
-
- # Add annotation and default value
- if self._annotation is not _empty:
- formatted = '{0}:{1}'.format(formatted,
- formatannotation(self._annotation))
-
- if self._default is not _empty:
- formatted = '{0}={1}'.format(formatted, repr(self._default))
-
- if kind == _VAR_POSITIONAL:
- formatted = '*' + formatted
- elif kind == _VAR_KEYWORD:
- formatted = '**' + formatted
-
- return formatted
-
- def __repr__(self):
- return '<{0} at {1:#x} {2!r}>'.format(self.__class__.__name__,
- id(self), self.name)
-
- def __hash__(self):
- msg = "unhashable type: '{0}'".format(self.__class__.__name__)
- raise TypeError(msg)
-
- def __eq__(self, other):
- return (issubclass(other.__class__, Parameter) and
- self._name == other._name and
- self._kind == other._kind and
- self._default == other._default and
- self._annotation == other._annotation)
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
-
-class BoundArguments(object):
- '''Result of `Signature.bind` call. Holds the mapping of arguments
- to the function's parameters.
-
- Has the following public attributes:
-
- * arguments : OrderedDict
- An ordered mutable mapping of parameters' names to arguments' values.
- Does not contain arguments' default values.
- * signature : Signature
- The Signature object that created this instance.
- * args : tuple
- Tuple of positional arguments values.
- * kwargs : dict
- Dict of keyword arguments values.
- '''
-
- def __init__(self, signature, arguments):
- self.arguments = arguments
- self._signature = signature
-
- @property
- def signature(self):
- return self._signature
-
- @property
- def args(self):
- args = []
- for param_name, param in self._signature.parameters.items():
- if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
- param._partial_kwarg):
- # Keyword arguments mapped by 'functools.partial'
- # (Parameter._partial_kwarg is True) are mapped
- # in 'BoundArguments.kwargs', along with VAR_KEYWORD &
- # KEYWORD_ONLY
- break
-
- try:
- arg = self.arguments[param_name]
- except KeyError:
- # We're done here. Other arguments
- # will be mapped in 'BoundArguments.kwargs'
- break
- else:
- if param.kind == _VAR_POSITIONAL:
- # *args
- args.extend(arg)
- else:
- # plain argument
- args.append(arg)
-
- return tuple(args)
-
- @property
- def kwargs(self):
- kwargs = {}
- kwargs_started = False
- for param_name, param in self._signature.parameters.items():
- if not kwargs_started:
- if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
- param._partial_kwarg):
- kwargs_started = True
- else:
- if param_name not in self.arguments:
- kwargs_started = True
- continue
-
- if not kwargs_started:
- continue
-
- try:
- arg = self.arguments[param_name]
- except KeyError:
- pass
- else:
- if param.kind == _VAR_KEYWORD:
- # **kwargs
- kwargs.update(arg)
- else:
- # plain keyword argument
- kwargs[param_name] = arg
-
- return kwargs
-
- def __hash__(self):
- msg = "unhashable type: '{0}'".format(self.__class__.__name__)
- raise TypeError(msg)
-
- def __eq__(self, other):
- return (issubclass(other.__class__, BoundArguments) and
- self.signature == other.signature and
- self.arguments == other.arguments)
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
-
-class Signature(object):
- '''A Signature object represents the overall signature of a function.
- It stores a Parameter object for each parameter accepted by the
- function, as well as information specific to the function itself.
-
- A Signature object has the following public attributes and methods:
-
- * parameters : OrderedDict
- An ordered mapping of parameters' names to the corresponding
- Parameter objects (keyword-only arguments are in the same order
- as listed in `code.co_varnames`).
- * return_annotation : object
- The annotation for the return type of the function if specified.
- If the function has no annotation for its return type, this
- attribute is not set.
- * bind(*args, **kwargs) -> BoundArguments
- Creates a mapping from positional and keyword arguments to
- parameters.
- * bind_partial(*args, **kwargs) -> BoundArguments
- Creates a partial mapping from positional and keyword arguments
- to parameters (simulating 'functools.partial' behavior.)
- '''
-
- __slots__ = ('_return_annotation', '_parameters')
-
- _parameter_cls = Parameter
- _bound_arguments_cls = BoundArguments
-
- empty = _empty
-
- def __init__(self, parameters=None, return_annotation=_empty,
- __validate_parameters__=True):
- '''Constructs Signature from the given list of Parameter
- objects and 'return_annotation'. All arguments are optional.
- '''
-
- if parameters is None:
- params = OrderedDict()
- else:
- if __validate_parameters__:
- params = OrderedDict()
- top_kind = _POSITIONAL_ONLY
-
- for idx, param in enumerate(parameters):
- kind = param.kind
- if kind < top_kind:
- msg = 'wrong parameter order: {0} before {1}'
- msg = msg.format(top_kind, param.kind)
- raise ValueError(msg)
- else:
- top_kind = kind
-
- name = param.name
- if name is None:
- name = str(idx)
- param = param.replace(name=name)
-
- if name in params:
- msg = 'duplicate parameter name: {0!r}'.format(name)
- raise ValueError(msg)
- params[name] = param
- else:
- params = OrderedDict(((param.name, param)
- for param in parameters))
-
- self._parameters = params
- self._return_annotation = return_annotation
-
- @classmethod
- def from_function(cls, func):
- '''Constructs Signature for the given python function'''
-
- if not isinstance(func, types.FunctionType):
- raise TypeError('{0!r} is not a Python function'.format(func))
-
- Parameter = cls._parameter_cls
-
- # Parameter information.
- func_code = func.__code__
- pos_count = func_code.co_argcount
- arg_names = func_code.co_varnames
- positional = tuple(arg_names[:pos_count])
- keyword_only_count = getattr(func_code, 'co_kwonlyargcount', 0)
- keyword_only = arg_names[pos_count:(pos_count + keyword_only_count)]
- annotations = getattr(func, '__annotations__', {})
- defaults = func.__defaults__
- kwdefaults = getattr(func, '__kwdefaults__', None)
-
- if defaults:
- pos_default_count = len(defaults)
- else:
- pos_default_count = 0
-
- parameters = []
-
- # Non-keyword-only parameters w/o defaults.
- non_default_count = pos_count - pos_default_count
- for name in positional[:non_default_count]:
- annotation = annotations.get(name, _empty)
- parameters.append(Parameter(name, annotation=annotation,
- kind=_POSITIONAL_OR_KEYWORD))
-
- # ... w/ defaults.
- for offset, name in enumerate(positional[non_default_count:]):
- annotation = annotations.get(name, _empty)
- parameters.append(Parameter(name, annotation=annotation,
- kind=_POSITIONAL_OR_KEYWORD,
- default=defaults[offset]))
-
- # *args
- if func_code.co_flags & 0x04:
- name = arg_names[pos_count + keyword_only_count]
- annotation = annotations.get(name, _empty)
- parameters.append(Parameter(name, annotation=annotation,
- kind=_VAR_POSITIONAL))
-
- # Keyword-only parameters.
- for name in keyword_only:
- default = _empty
- if kwdefaults is not None:
- default = kwdefaults.get(name, _empty)
-
- annotation = annotations.get(name, _empty)
- parameters.append(Parameter(name, annotation=annotation,
- kind=_KEYWORD_ONLY,
- default=default))
- # **kwargs
- if func_code.co_flags & 0x08:
- index = pos_count + keyword_only_count
- if func_code.co_flags & 0x04:
- index += 1
-
- name = arg_names[index]
- annotation = annotations.get(name, _empty)
- parameters.append(Parameter(name, annotation=annotation,
- kind=_VAR_KEYWORD))
-
- return cls(parameters,
- return_annotation=annotations.get('return', _empty),
- __validate_parameters__=False)
-
- @property
- def parameters(self):
- try:
- return types.MappingProxyType(self._parameters)
- except AttributeError:
- return OrderedDict(self._parameters.items())
-
- @property
- def return_annotation(self):
- return self._return_annotation
-
- def replace(self, parameters=_void, return_annotation=_void):
- '''Creates a customized copy of the Signature.
- Pass 'parameters' and/or 'return_annotation' arguments
- to override them in the new copy.
- '''
-
- if parameters is _void:
- parameters = self.parameters.values()
-
- if return_annotation is _void:
- return_annotation = self._return_annotation
-
- return type(self)(parameters,
- return_annotation=return_annotation)
-
- def __hash__(self):
- msg = "unhashable type: '{0}'".format(self.__class__.__name__)
- raise TypeError(msg)
-
- def __eq__(self, other):
- if (not issubclass(type(other), Signature) or
- self.return_annotation != other.return_annotation or
- len(self.parameters) != len(other.parameters)):
- return False
-
- other_positions = dict((param, idx)
- for idx, param in enumerate(other.parameters.keys()))
-
- for idx, (param_name, param) in enumerate(self.parameters.items()):
- if param.kind == _KEYWORD_ONLY:
- try:
- other_param = other.parameters[param_name]
- except KeyError:
- return False
- else:
- if param != other_param:
- return False
- else:
- try:
- other_idx = other_positions[param_name]
- except KeyError:
- return False
- else:
- if (idx != other_idx or
- param != other.parameters[param_name]):
- return False
-
- return True
-
- def __ne__(self, other):
- return not self.__eq__(other)
-
- def _bind(self, args, kwargs, partial=False):
- '''Private method. Don't use directly.'''
-
- arguments = OrderedDict()
-
- parameters = iter(self.parameters.values())
- parameters_ex = ()
- arg_vals = iter(args)
-
- if partial:
- # Support for binding arguments to 'functools.partial' objects.
- # See 'functools.partial' case in 'signature()' implementation
- # for details.
- for param_name, param in self.parameters.items():
- if (param._partial_kwarg and param_name not in kwargs):
- # Simulating 'functools.partial' behavior
- kwargs[param_name] = param.default
-
- while True:
- # Let's iterate through the positional arguments and corresponding
- # parameters
- try:
- arg_val = next(arg_vals)
- except StopIteration:
- # No more positional arguments
- try:
- param = next(parameters)
- except StopIteration:
- # No more parameters. That's it. Just need to check that
- # we have no `kwargs` after this while loop
- break
- else:
- if param.kind == _VAR_POSITIONAL:
- # That's OK, just empty *args. Let's start parsing
- # kwargs
- break
- elif param.name in kwargs:
- if param.kind == _POSITIONAL_ONLY:
- msg = '{arg!r} parameter is positional only, ' \
- 'but was passed as a keyword'
- msg = msg.format(arg=param.name)
- raise TypeError(msg)
- parameters_ex = (param,)
- break
- elif (param.kind == _VAR_KEYWORD or
- param.default is not _empty):
- # That's fine too - we have a default value for this
- # parameter. So, lets start parsing `kwargs`, starting
- # with the current parameter
- parameters_ex = (param,)
- break
- else:
- if partial:
- parameters_ex = (param,)
- break
- else:
- msg = '{arg!r} parameter lacking default value'
- msg = msg.format(arg=param.name)
- raise TypeError(msg)
- else:
- # We have a positional argument to process
- try:
- param = next(parameters)
- except StopIteration:
- raise TypeError('too many positional arguments')
- else:
- if param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY):
- # Looks like we have no parameter for this positional
- # argument
- raise TypeError('too many positional arguments')
-
- if param.kind == _VAR_POSITIONAL:
- # We have an '*args'-like argument, let's fill it with
- # all positional arguments we have left and move on to
- # the next phase
- values = [arg_val]
- values.extend(arg_vals)
- arguments[param.name] = tuple(values)
- break
-
- if param.name in kwargs:
- raise TypeError('multiple values for argument '
- '{arg!r}'.format(arg=param.name))
-
- arguments[param.name] = arg_val
-
- # Now, we iterate through the remaining parameters to process
- # keyword arguments
- kwargs_param = None
- for param in itertools.chain(parameters_ex, parameters):
- if param.kind == _POSITIONAL_ONLY:
- # This should never happen in case of a properly built
- # Signature object (but let's have this check here
- # to ensure correct behaviour just in case)
- raise TypeError('{arg!r} parameter is positional only, '
- 'but was passed as a keyword'. \
- format(arg=param.name))
-
- if param.kind == _VAR_KEYWORD:
- # Memorize that we have a '**kwargs'-like parameter
- kwargs_param = param
- continue
-
- param_name = param.name
- try:
- arg_val = kwargs.pop(param_name)
- except KeyError:
- # We have no value for this parameter. It's fine though,
- # if it has a default value, or it is an '*args'-like
- # parameter, left alone by the processing of positional
- # arguments.
- if (not partial and param.kind != _VAR_POSITIONAL and
- param.default is _empty):
- raise TypeError('{arg!r} parameter lacking default value'. \
- format(arg=param_name))
-
- else:
- arguments[param_name] = arg_val
-
- if kwargs:
- if kwargs_param is not None:
- # Process our '**kwargs'-like parameter
- arguments[kwargs_param.name] = kwargs
- else:
- raise TypeError('too many keyword arguments')
-
- return self._bound_arguments_cls(self, arguments)
-
- def bind(self, *args, **kwargs):
- '''Get a BoundArguments object, that maps the passed `args`
- and `kwargs` to the function's signature. Raises `TypeError`
- if the passed arguments can not be bound.
- '''
- return self._bind(args, kwargs)
-
- def bind_partial(self, *args, **kwargs):
- '''Get a BoundArguments object, that partially maps the
- passed `args` and `kwargs` to the function's signature.
- Raises `TypeError` if the passed arguments can not be bound.
- '''
- return self._bind(args, kwargs, partial=True)
-
- def __str__(self):
- result = []
- render_kw_only_separator = True
- for idx, param in enumerate(self.parameters.values()):
- formatted = str(param)
-
- kind = param.kind
- if kind == _VAR_POSITIONAL:
- # OK, we have an '*args'-like parameter, so we won't need
- # a '*' to separate keyword-only arguments
- render_kw_only_separator = False
- elif kind == _KEYWORD_ONLY and render_kw_only_separator:
- # We have a keyword-only parameter to render and we haven't
- # rendered an '*args'-like parameter before, so add a '*'
- # separator to the parameters list ("foo(arg1, *, arg2)" case)
- result.append('*')
- # This condition should be only triggered once, so
- # reset the flag
- render_kw_only_separator = False
-
- result.append(formatted)
-
- rendered = '({0})'.format(', '.join(result))
-
- if self.return_annotation is not _empty:
- anno = formatannotation(self.return_annotation)
- rendered += ' -> {0}'.format(anno)
-
- return rendered
diff --git a/mloop/localsklearn/externals/joblib/__init__.py b/mloop/localsklearn/externals/joblib/__init__.py
deleted file mode 100644
index 970ccd6..0000000
--- a/mloop/localsklearn/externals/joblib/__init__.py
+++ /dev/null
@@ -1,137 +0,0 @@
-""" Joblib is a set of tools to provide **lightweight pipelining in
-Python**. In particular, joblib offers:
-
- 1. transparent disk-caching of the output values and lazy re-evaluation
- (memoize pattern)
-
- 2. easy simple parallel computing
-
- 3. logging and tracing of the execution
-
-Joblib is optimized to be **fast** and **robust** in particular on large
-data and has specific optimizations for `numpy` arrays. It is
-**BSD-licensed**.
-
-
- ============================== ============================================
- **User documentation**: http://pythonhosted.org/joblib
-
- **Download packages**: http://pypi.python.org/pypi/joblib#downloads
-
- **Source code**: http://github.com/joblib/joblib
-
- **Report issues**: http://github.com/joblib/joblib/issues
- ============================== ============================================
-
-
-Vision
---------
-
-The vision is to provide tools to easily achieve better performance and
-reproducibility when working with long running jobs.
-
- * **Avoid computing twice the same thing**: code is rerun over an
- over, for instance when prototyping computational-heavy jobs (as in
- scientific development), but hand-crafted solution to alleviate this
- issue is error-prone and often leads to unreproducible results
-
- * **Persist to disk transparently**: persisting in an efficient way
- arbitrary objects containing large data is hard. Using
- joblib's caching mechanism avoids hand-written persistence and
- implicitly links the file on disk to the execution context of
- the original Python object. As a result, joblib's persistence is
- good for resuming an application status or computational job, eg
- after a crash.
-
-Joblib strives to address these problems while **leaving your code and
-your flow control as unmodified as possible** (no framework, no new
-paradigms).
-
-Main features
-------------------
-
-1) **Transparent and fast disk-caching of output value:** a memoize or
- make-like functionality for Python functions that works well for
- arbitrary Python objects, including very large numpy arrays. Separate
- persistence and flow-execution logic from domain logic or algorithmic
- code by writing the operations as a set of steps with well-defined
- inputs and outputs: Python functions. Joblib can save their
- computation to disk and rerun it only if necessary::
-
- >>> from sklearn.externals.joblib import Memory
- >>> mem = Memory(cachedir='/tmp/joblib')
- >>> import numpy as np
- >>> a = np.vander(np.arange(3)).astype(np.float)
- >>> square = mem.cache(np.square)
- >>> b = square(a) # doctest: +ELLIPSIS
- ________________________________________________________________________________
- [Memory] Calling square...
- square(array([[ 0., 0., 1.],
- [ 1., 1., 1.],
- [ 4., 2., 1.]]))
- ___________________________________________________________square - 0...s, 0.0min
-
- >>> c = square(a)
- >>> # The above call did not trigger an evaluation
-
-2) **Embarrassingly parallel helper:** to make it easy to write readable
- parallel code and debug it quickly::
-
- >>> from sklearn.externals.joblib import Parallel, delayed
- >>> from math import sqrt
- >>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
- [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
-
-
-3) **Logging/tracing:** The different functionalities will
- progressively acquire better logging mechanism to help track what
- has been ran, and capture I/O easily. In addition, Joblib will
- provide a few I/O primitives, to easily define logging and
- display streams, and provide a way of compiling a report.
- We want to be able to quickly inspect what has been run.
-
-4) **Fast compressed Persistence**: a replacement for pickle to work
- efficiently on Python objects containing large data (
- *joblib.dump* & *joblib.load* ).
-
-..
- >>> import shutil ; shutil.rmtree('/tmp/joblib/')
-
-"""
-
-# PEP0440 compatible formatted version, see:
-# https://www.python.org/dev/peps/pep-0440/
-#
-# Generic release markers:
-# X.Y
-# X.Y.Z # For bugfix releases
-#
-# Admissible pre-release markers:
-# X.YaN # Alpha release
-# X.YbN # Beta release
-# X.YrcN # Release Candidate
-# X.Y # Final release
-#
-# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
-# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
-#
-__version__ = '0.10.0'
-
-
-from .memory import Memory, MemorizedResult
-from .logger import PrintTime
-from .logger import Logger
-from .hashing import hash
-from .numpy_pickle import dump
-from .numpy_pickle import load
-from .parallel import Parallel
-from .parallel import delayed
-from .parallel import cpu_count
-from .parallel import register_parallel_backend
-from .parallel import parallel_backend
-from .parallel import effective_n_jobs
-
-
-__all__ = ['Memory', 'MemorizedResult', 'PrintTime', 'Logger', 'hash', 'dump',
- 'load', 'Parallel', 'delayed', 'cpu_count', 'effective_n_jobs',
- 'register_parallel_backend', 'parallel_backend']
diff --git a/mloop/localsklearn/externals/joblib/_compat.py b/mloop/localsklearn/externals/joblib/_compat.py
deleted file mode 100644
index 6309fa5..0000000
--- a/mloop/localsklearn/externals/joblib/_compat.py
+++ /dev/null
@@ -1,20 +0,0 @@
-"""
-Compatibility layer for Python 3/Python 2 single codebase
-"""
-import sys
-
-PY3_OR_LATER = sys.version_info[0] >= 3
-PY26 = sys.version_info[:2] == (2, 6)
-PY27 = sys.version_info[:2] == (2, 7)
-
-try:
- _basestring = basestring
- _bytes_or_unicode = (str, unicode)
-except NameError:
- _basestring = str
- _bytes_or_unicode = (bytes, str)
-
-
-def with_metaclass(meta, *bases):
- """Create a base class with a metaclass."""
- return meta("NewBase", bases, {})
diff --git a/mloop/localsklearn/externals/joblib/_memory_helpers.py b/mloop/localsklearn/externals/joblib/_memory_helpers.py
deleted file mode 100644
index 857ad29..0000000
--- a/mloop/localsklearn/externals/joblib/_memory_helpers.py
+++ /dev/null
@@ -1,105 +0,0 @@
-try:
- # Available in Python 3
- from tokenize import open as open_py_source
-
-except ImportError:
- # Copied from python3 tokenize
- from codecs import lookup, BOM_UTF8
- import re
- from io import TextIOWrapper, open
- cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
-
- def _get_normal_name(orig_enc):
- """Imitates get_normal_name in tokenizer.c."""
- # Only care about the first 12 characters.
- enc = orig_enc[:12].lower().replace("_", "-")
- if enc == "utf-8" or enc.startswith("utf-8-"):
- return "utf-8"
- if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
- enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
- return "iso-8859-1"
- return orig_enc
-
- def _detect_encoding(readline):
- """
- The detect_encoding() function is used to detect the encoding that
- should be used to decode a Python source file. It requires one
- argment, readline, in the same way as the tokenize() generator.
-
- It will call readline a maximum of twice, and return the encoding used
- (as a string) and a list of any lines (left as bytes) it has read in.
-
- It detects the encoding from the presence of a utf-8 bom or an encoding
- cookie as specified in pep-0263. If both a bom and a cookie are
- present, but disagree, a SyntaxError will be raised. If the encoding
- cookie is an invalid charset, raise a SyntaxError. Note that if a
- utf-8 bom is found, 'utf-8-sig' is returned.
-
- If no encoding is specified, then the default of 'utf-8' will be
- returned.
- """
- bom_found = False
- encoding = None
- default = 'utf-8'
-
- def read_or_stop():
- try:
- return readline()
- except StopIteration:
- return b''
-
- def find_cookie(line):
- try:
- line_string = line.decode('ascii')
- except UnicodeDecodeError:
- return None
-
- matches = cookie_re.findall(line_string)
- if not matches:
- return None
- encoding = _get_normal_name(matches[0])
- try:
- codec = lookup(encoding)
- except LookupError:
- # This behaviour mimics the Python interpreter
- raise SyntaxError("unknown encoding: " + encoding)
-
- if bom_found:
- if codec.name != 'utf-8':
- # This behaviour mimics the Python interpreter
- raise SyntaxError('encoding problem: utf-8')
- encoding += '-sig'
- return encoding
-
- first = read_or_stop()
- if first.startswith(BOM_UTF8):
- bom_found = True
- first = first[3:]
- default = 'utf-8-sig'
- if not first:
- return default, []
-
- encoding = find_cookie(first)
- if encoding:
- return encoding, [first]
-
- second = read_or_stop()
- if not second:
- return default, [first]
-
- encoding = find_cookie(second)
- if encoding:
- return encoding, [first, second]
-
- return default, [first, second]
-
- def open_py_source(filename):
- """Open a file in read only mode using the encoding detected by
- detect_encoding().
- """
- buffer = open(filename, 'rb')
- encoding, lines = _detect_encoding(buffer.readline)
- buffer.seek(0)
- text = TextIOWrapper(buffer, encoding, line_buffering=True)
- text.mode = 'r'
- return text
diff --git a/mloop/localsklearn/externals/joblib/_multiprocessing_helpers.py b/mloop/localsklearn/externals/joblib/_multiprocessing_helpers.py
deleted file mode 100644
index 4111a26..0000000
--- a/mloop/localsklearn/externals/joblib/_multiprocessing_helpers.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""Helper module to factorize the conditional multiprocessing import logic
-
-We use a distinct module to simplify import statements and avoid introducing
-circular dependencies (for instance for the assert_spawning name).
-"""
-import os
-import warnings
-
-
-# Obtain possible configuration from the environment, assuming 1 (on)
-# by default, upon 0 set to None. Should instructively fail if some non
-# 0/1 value is set.
-mp = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None
-if mp:
- try:
- import multiprocessing as mp
- except ImportError:
- mp = None
-
-# 2nd stage: validate that locking is available on the system and
-# issue a warning if not
-if mp is not None:
- try:
- _sem = mp.Semaphore()
- del _sem # cleanup
- except (ImportError, OSError) as e:
- mp = None
- warnings.warn('%s. joblib will operate in serial mode' % (e,))
-
-
-# 3rd stage: backward compat for the assert_spawning helper
-if mp is not None:
- try:
- # Python 3.4+
- from multiprocessing.context import assert_spawning
- except ImportError:
- from multiprocessing.forking import assert_spawning
-else:
- assert_spawning = None
diff --git a/mloop/localsklearn/externals/joblib/_parallel_backends.py b/mloop/localsklearn/externals/joblib/_parallel_backends.py
deleted file mode 100644
index e281bd5..0000000
--- a/mloop/localsklearn/externals/joblib/_parallel_backends.py
+++ /dev/null
@@ -1,356 +0,0 @@
-"""
-Backends for embarrassingly parallel code.
-"""
-
-import gc
-import os
-import sys
-import warnings
-import threading
-from abc import ABCMeta, abstractmethod
-
-from .format_stack import format_exc
-from .my_exceptions import WorkerInterrupt, TransportableException
-from ._multiprocessing_helpers import mp
-from ._compat import with_metaclass
-if mp is not None:
- from .pool import MemmapingPool
- from multiprocessing.pool import ThreadPool
-
-
-class ParallelBackendBase(with_metaclass(ABCMeta)):
- """Helper abc which defines all methods a ParallelBackend must implement"""
-
- @abstractmethod
- def effective_n_jobs(self, n_jobs):
- """Determine the number of jobs that can actually run in parallel
-
- n_jobs is the is the number of workers requested by the callers.
- Passing n_jobs=-1 means requesting all available workers for instance
- matching the number of CPU cores on the worker host(s).
-
- This method should return a guesstimate of the number of workers that
- can actually perform work concurrently. The primary use case is to make
- it possible for the caller to know in how many chunks to slice the
- work.
-
- In general working on larger data chunks is more efficient (less
- scheduling overhead and better use of CPU cache prefetching heuristics)
- as long as all the workers have enough work to do.
- """
-
- @abstractmethod
- def apply_async(self, func, callback=None):
- """Schedule a func to be run"""
-
- def configure(self, n_jobs=1, parallel=None, **backend_args):
- """Reconfigure the backend and return the number of workers.
-
- This makes it possible to reuse an existing backend instance for
- successive independent calls to Parallel with different parameters.
- """
- self.parallel = parallel
- return self.effective_n_jobs(n_jobs)
-
- def terminate(self):
- """Shutdown the process or thread pool"""
-
- def compute_batch_size(self):
- """Determine the optimal batch size"""
- return 1
-
- def batch_completed(self, batch_size, duration):
- """Callback indicate how long it took to run a batch"""
-
- def get_exceptions(self):
- """List of exception types to be captured."""
- return []
-
- def abort_everything(self, ensure_ready=True):
- """Abort any running tasks
-
- This is called when an exception has been raised when executing a tasks
- and all the remaining tasks will be ignored and can therefore be
- aborted to spare computation resources.
-
- If ensure_ready is True, the backend should be left in an operating
- state as future tasks might be re-submitted via that same backend
- instance.
-
- If ensure_ready is False, the implementer of this method can decide
- to leave the backend in a closed / terminated state as no new task
- are expected to be submitted to this backend.
-
- Setting ensure_ready to False is an optimization that can be leveraged
- when aborting tasks via killing processes from a local process pool
- managed by the backend it-self: if we expect no new tasks, there is no
- point in re-creating a new working pool.
- """
- # Does nothing by default: to be overriden in subclasses when canceling
- # tasks is possible.
- pass
-
-
-class SequentialBackend(ParallelBackendBase):
- """A ParallelBackend which will execute all batches sequentially.
-
- Does not use/create any threading objects, and hence has minimal
- overhead. Used when n_jobs == 1.
- """
-
- def effective_n_jobs(self, n_jobs):
- """Determine the number of jobs which are going to run in parallel"""
- if n_jobs == 0:
- raise ValueError('n_jobs == 0 in Parallel has no meaning')
- return 1
-
- def apply_async(self, func, callback=None):
- """Schedule a func to be run"""
- result = ImmediateResult(func)
- if callback:
- callback(result)
- return result
-
-
-class PoolManagerMixin(object):
- """A helper class for managing pool of workers."""
-
- def effective_n_jobs(self, n_jobs):
- """Determine the number of jobs which are going to run in parallel"""
- if n_jobs == 0:
- raise ValueError('n_jobs == 0 in Parallel has no meaning')
- elif mp is None or n_jobs is None:
- # multiprocessing is not available or disabled, fallback
- # to sequential mode
- return 1
- elif n_jobs < 0:
- n_jobs = max(mp.cpu_count() + 1 + n_jobs, 1)
- return n_jobs
-
- def terminate(self):
- """Shutdown the process or thread pool"""
- if self._pool is not None:
- self._pool.close()
- self._pool.terminate() # terminate does a join()
- self._pool = None
-
- def apply_async(self, func, callback=None):
- """Schedule a func to be run"""
- return self._pool.apply_async(SafeFunction(func), callback=callback)
-
- def abort_everything(self, ensure_ready=True):
- """Shutdown the pool and restart a new one with the same parameters"""
- self.terminate()
- if ensure_ready:
- self.configure(n_jobs=self.parallel.n_jobs, parallel=self.parallel,
- **self.parallel._backend_args)
-
-
-class AutoBatchingMixin(object):
- """A helper class for automagically batching jobs."""
-
- # In seconds, should be big enough to hide multiprocessing dispatching
- # overhead.
- # This settings was found by running benchmarks/bench_auto_batching.py
- # with various parameters on various platforms.
- MIN_IDEAL_BATCH_DURATION = .2
-
- # Should not be too high to avoid stragglers: long jobs running alone
- # on a single worker while other workers have no work to process any more.
- MAX_IDEAL_BATCH_DURATION = 2
-
- # Batching counters
- _effective_batch_size = 1
- _smoothed_batch_duration = 0.0
-
- def compute_batch_size(self):
- """Determine the optimal batch size"""
- old_batch_size = self._effective_batch_size
- batch_duration = self._smoothed_batch_duration
- if (batch_duration > 0 and
- batch_duration < self.MIN_IDEAL_BATCH_DURATION):
- # The current batch size is too small: the duration of the
- # processing of a batch of task is not large enough to hide
- # the scheduling overhead.
- ideal_batch_size = int(old_batch_size *
- self.MIN_IDEAL_BATCH_DURATION /
- batch_duration)
- # Multiply by two to limit oscilations between min and max.
- batch_size = max(2 * ideal_batch_size, 1)
- self._effective_batch_size = batch_size
- if self.parallel.verbose >= 10:
- self.parallel._print(
- "Batch computation too fast (%.4fs.) "
- "Setting batch_size=%d.", (batch_duration, batch_size))
- elif (batch_duration > self.MAX_IDEAL_BATCH_DURATION and
- old_batch_size >= 2):
- # The current batch size is too big. If we schedule overly long
- # running batches some CPUs might wait with nothing left to do
- # while a couple of CPUs a left processing a few long running
- # batches. Better reduce the batch size a bit to limit the
- # likelihood of scheduling such stragglers.
- batch_size = old_batch_size // 2
- self._effective_batch_size = batch_size
- if self.parallel.verbose >= 10:
- self.parallel._print(
- "Batch computation too slow (%.4fs.) "
- "Setting batch_size=%d.", (batch_duration, batch_size))
- else:
- # No batch size adjustment
- batch_size = old_batch_size
-
- if batch_size != old_batch_size:
- # Reset estimation of the smoothed mean batch duration: this
- # estimate is updated in the multiprocessing apply_async
- # CallBack as long as the batch_size is constant. Therefore
- # we need to reset the estimate whenever we re-tune the batch
- # size.
- self._smoothed_batch_duration = 0
-
- return batch_size
-
- def batch_completed(self, batch_size, duration):
- """Callback indicate how long it took to run a batch"""
- if batch_size == self._effective_batch_size:
- # Update the smoothed streaming estimate of the duration of a batch
- # from dispatch to completion
- old_duration = self._smoothed_batch_duration
- if old_duration == 0:
- # First record of duration for this batch size after the last
- # reset.
- new_duration = duration
- else:
- # Update the exponentially weighted average of the duration of
- # batch for the current effective size.
- new_duration = 0.8 * old_duration + 0.2 * duration
- self._smoothed_batch_duration = new_duration
-
-
-class ThreadingBackend(PoolManagerMixin, ParallelBackendBase):
- """A ParallelBackend which will use a thread pool to execute batches in.
-
- This is a low-overhead backend but it suffers from the Python Global
- Interpreter Lock if the called function relies a lot on Python objects.
- Mostly useful when the execution bottleneck is a compiled extension that
- explicitly releases the GIL (for instance a Cython loop wrapped in a
- "with nogil" block or an expensive call to a library such as NumPy).
- """
-
- def configure(self, n_jobs=1, parallel=None, **backend_args):
- """Build a process or thread pool and return the number of workers"""
- n_jobs = self.effective_n_jobs(n_jobs)
- if n_jobs == 1:
- # Avoid unnecessary overhead and use sequential backend instead.
- raise FallbackToBackend(SequentialBackend())
- self.parallel = parallel
- self._pool = ThreadPool(n_jobs)
- return n_jobs
-
-
-class MultiprocessingBackend(PoolManagerMixin, AutoBatchingMixin,
- ParallelBackendBase):
- """A ParallelBackend which will use a multiprocessing.Pool.
-
- Will introduce some communication and memory overhead when exchanging
- input and output data with the with the worker Python processes.
- However, does not suffer from the Python Global Interpreter Lock.
- """
-
- # Environment variables to protect against bad situations when nesting
- JOBLIB_SPAWNED_PROCESS = "__JOBLIB_SPAWNED_PARALLEL__"
-
- def effective_n_jobs(self, n_jobs):
- """Determine the number of jobs which are going to run in parallel.
-
- This also checks if we are attempting to create a nested parallel
- loop.
- """
- if mp.current_process().daemon:
- # Daemonic processes cannot have children
- warnings.warn(
- 'Multiprocessing-backed parallel loops cannot be nested,'
- ' setting n_jobs=1',
- stacklevel=3)
- return 1
-
- elif threading.current_thread().name != 'MainThread':
- # Prevent posix fork inside in non-main posix threads
- warnings.warn(
- 'Multiprocessing backed parallel loops cannot be nested'
- ' below threads, setting n_jobs=1',
- stacklevel=3)
- return 1
-
- return super(MultiprocessingBackend, self).effective_n_jobs(n_jobs)
-
- def configure(self, n_jobs=1, parallel=None, **backend_args):
- """Build a process or thread pool and return the number of workers"""
- n_jobs = self.effective_n_jobs(n_jobs)
- if n_jobs == 1:
- raise FallbackToBackend(SequentialBackend())
-
- already_forked = int(os.environ.get(self.JOBLIB_SPAWNED_PROCESS, 0))
- if already_forked:
- raise ImportError(
- '[joblib] Attempting to do parallel computing '
- 'without protecting your import on a system that does '
- 'not support forking. To use parallel-computing in a '
- 'script, you must protect your main loop using "if '
- "__name__ == '__main__'"
- '". Please see the joblib documentation on Parallel '
- 'for more information')
- # Set an environment variable to avoid infinite loops
- os.environ[self.JOBLIB_SPAWNED_PROCESS] = '1'
-
- # Make sure to free as much memory as possible before forking
- gc.collect()
- self._pool = MemmapingPool(n_jobs, **backend_args)
- self.parallel = parallel
- return n_jobs
-
- def terminate(self):
- """Shutdown the process or thread pool"""
- super(MultiprocessingBackend, self).terminate()
- if self.JOBLIB_SPAWNED_PROCESS in os.environ:
- del os.environ[self.JOBLIB_SPAWNED_PROCESS]
-
-
-class ImmediateResult(object):
- def __init__(self, batch):
- # Don't delay the application, to avoid keeping the input
- # arguments in memory
- self.results = batch()
-
- def get(self):
- return self.results
-
-
-class SafeFunction(object):
- """Wrapper that handles the serialization of exception tracebacks.
-
- If an exception is triggered when calling the inner function, a copy of
- the full traceback is captured to make it possible to serialize
- it so that it can be rendered in a different Python process.
- """
- def __init__(self, func):
- self.func = func
-
- def __call__(self, *args, **kwargs):
- try:
- return self.func(*args, **kwargs)
- except KeyboardInterrupt:
- # We capture the KeyboardInterrupt and reraise it as
- # something different, as multiprocessing does not
- # interrupt processing for a KeyboardInterrupt
- raise WorkerInterrupt()
- except:
- e_type, e_value, e_tb = sys.exc_info()
- text = format_exc(e_type, e_value, e_tb, context=10, tb_offset=1)
- raise TransportableException(text, e_type)
-
-
-class FallbackToBackend(Exception):
- """Raised when configuration should fallback to another backend"""
-
- def __init__(self, backend):
- self.backend = backend
diff --git a/mloop/localsklearn/externals/joblib/disk.py b/mloop/localsklearn/externals/joblib/disk.py
deleted file mode 100644
index 30ad100..0000000
--- a/mloop/localsklearn/externals/joblib/disk.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""
-Disk management utilities.
-"""
-
-# Authors: Gael Varoquaux
-# Lars Buitinck
-# Copyright (c) 2010 Gael Varoquaux
-# License: BSD Style, 3 clauses.
-
-
-import errno
-import os
-import shutil
-import sys
-import time
-
-
-def disk_used(path):
- """ Return the disk usage in a directory."""
- size = 0
- for file in os.listdir(path) + ['.']:
- stat = os.stat(os.path.join(path, file))
- if hasattr(stat, 'st_blocks'):
- size += stat.st_blocks * 512
- else:
- # on some platform st_blocks is not available (e.g., Windows)
- # approximate by rounding to next multiple of 512
- size += (stat.st_size // 512 + 1) * 512
- # We need to convert to int to avoid having longs on some systems (we
- # don't want longs to avoid problems we SQLite)
- return int(size / 1024.)
-
-
-def memstr_to_bytes(text):
- """ Convert a memory text to its value in bytes.
- """
- kilo = 1024
- units = dict(K=kilo, M=kilo ** 2, G=kilo ** 3)
- try:
- size = int(units[text[-1]] * float(text[:-1]))
- except (KeyError, ValueError):
- raise ValueError(
- "Invalid literal for size give: %s (type %s) should be "
- "alike '10G', '500M', '50K'." % (text, type(text)))
- return size
-
-
-def mkdirp(d):
- """Ensure directory d exists (like mkdir -p on Unix)
- No guarantee that the directory is writable.
- """
- try:
- os.makedirs(d)
- except OSError as e:
- if e.errno != errno.EEXIST:
- raise
-
-
-# if a rmtree operation fails in rm_subdirs, wait for this much time (in secs),
-# then retry once. if it still fails, raise the exception
-RM_SUBDIRS_RETRY_TIME = 0.1
-
-
-def rm_subdirs(path, onerror=None):
- """Remove all subdirectories in this path.
-
- The directory indicated by `path` is left in place, and its subdirectories
- are erased.
-
- If onerror is set, it is called to handle the error with arguments (func,
- path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
- path is the argument to that function that caused it to fail; and
- exc_info is a tuple returned by sys.exc_info(). If onerror is None,
- an exception is raised.
- """
-
- # NOTE this code is adapted from the one in shutil.rmtree, and is
- # just as fast
-
- names = []
- try:
- names = os.listdir(path)
- except os.error as err:
- if onerror is not None:
- onerror(os.listdir, path, sys.exc_info())
- else:
- raise
-
- for name in names:
- fullname = os.path.join(path, name)
- if os.path.isdir(fullname):
- if onerror is not None:
- shutil.rmtree(fullname, False, onerror)
- else:
- # allow the rmtree to fail once, wait and re-try.
- # if the error is raised again, fail
- err_count = 0
- while True:
- try:
- shutil.rmtree(fullname, False, None)
- break
- except os.error:
- if err_count > 0:
- raise
- err_count += 1
- time.sleep(RM_SUBDIRS_RETRY_TIME)
diff --git a/mloop/localsklearn/externals/joblib/format_stack.py b/mloop/localsklearn/externals/joblib/format_stack.py
deleted file mode 100644
index ad28a86..0000000
--- a/mloop/localsklearn/externals/joblib/format_stack.py
+++ /dev/null
@@ -1,415 +0,0 @@
-"""
-Represent an exception with a lot of information.
-
-Provides 2 useful functions:
-
-format_exc: format an exception into a complete traceback, with full
- debugging instruction.
-
-format_outer_frames: format the current position in the stack call.
-
-Adapted from IPython's VerboseTB.
-"""
-# Authors: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
-# Nathaniel Gray
-# Fernando Perez
-# Copyright: 2010, Gael Varoquaux
-# 2001-2004, Fernando Perez
-# 2001 Nathaniel Gray
-# License: BSD 3 clause
-
-
-import inspect
-import keyword
-import linecache
-import os
-import pydoc
-import sys
-import time
-import tokenize
-import traceback
-
-try: # Python 2
- generate_tokens = tokenize.generate_tokens
-except AttributeError: # Python 3
- generate_tokens = tokenize.tokenize
-
-INDENT = ' ' * 8
-
-
-###############################################################################
-# some internal-use functions
-def safe_repr(value):
- """Hopefully pretty robust repr equivalent."""
- # this is pretty horrible but should always return *something*
- try:
- return pydoc.text.repr(value)
- except KeyboardInterrupt:
- raise
- except:
- try:
- return repr(value)
- except KeyboardInterrupt:
- raise
- except:
- try:
- # all still in an except block so we catch
- # getattr raising
- name = getattr(value, '__name__', None)
- if name:
- # ick, recursion
- return safe_repr(name)
- klass = getattr(value, '__class__', None)
- if klass:
- return '%s instance' % safe_repr(klass)
- except KeyboardInterrupt:
- raise
- except:
- return 'UNRECOVERABLE REPR FAILURE'
-
-
-def eq_repr(value, repr=safe_repr):
- return '=%s' % repr(value)
-
-
-###############################################################################
-def uniq_stable(elems):
- """uniq_stable(elems) -> list
-
- Return from an iterable, a list of all the unique elements in the input,
- but maintaining the order in which they first appear.
-
- A naive solution to this problem which just makes a dictionary with the
- elements as keys fails to respect the stability condition, since
- dictionaries are unsorted by nature.
-
- Note: All elements in the input must be hashable.
- """
- unique = []
- unique_set = set()
- for nn in elems:
- if nn not in unique_set:
- unique.append(nn)
- unique_set.add(nn)
- return unique
-
-
-###############################################################################
-def fix_frame_records_filenames(records):
- """Try to fix the filenames in each record from inspect.getinnerframes().
-
- Particularly, modules loaded from within zip files have useless filenames
- attached to their code object, and inspect.getinnerframes() just uses it.
- """
- fixed_records = []
- for frame, filename, line_no, func_name, lines, index in records:
- # Look inside the frame's globals dictionary for __file__, which should
- # be better.
- better_fn = frame.f_globals.get('__file__', None)
- if isinstance(better_fn, str):
- # Check the type just in case someone did something weird with
- # __file__. It might also be None if the error occurred during
- # import.
- filename = better_fn
- fixed_records.append((frame, filename, line_no, func_name, lines,
- index))
- return fixed_records
-
-
-def _fixed_getframes(etb, context=1, tb_offset=0):
- LNUM_POS, LINES_POS, INDEX_POS = 2, 4, 5
-
- records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
-
- # If the error is at the console, don't build any context, since it would
- # otherwise produce 5 blank lines printed out (there is no file at the
- # console)
- rec_check = records[tb_offset:]
- try:
- rname = rec_check[0][1]
- if rname == '' or rname.endswith(''):
- return rec_check
- except IndexError:
- pass
-
- aux = traceback.extract_tb(etb)
- assert len(records) == len(aux)
- for i, (file, lnum, _, _) in enumerate(aux):
- maybeStart = lnum - 1 - context // 2
- start = max(maybeStart, 0)
- end = start + context
- lines = linecache.getlines(file)[start:end]
- # pad with empty lines if necessary
- if maybeStart < 0:
- lines = (['\n'] * -maybeStart) + lines
- if len(lines) < context:
- lines += ['\n'] * (context - len(lines))
- buf = list(records[i])
- buf[LNUM_POS] = lnum
- buf[INDEX_POS] = lnum - 1 - start
- buf[LINES_POS] = lines
- records[i] = tuple(buf)
- return records[tb_offset:]
-
-
-def _format_traceback_lines(lnum, index, lines, lvals=None):
- numbers_width = 7
- res = []
- i = lnum - index
-
- for line in lines:
- if i == lnum:
- # This is the line with the error
- pad = numbers_width - len(str(i))
- if pad >= 3:
- marker = '-' * (pad - 3) + '-> '
- elif pad == 2:
- marker = '> '
- elif pad == 1:
- marker = '>'
- else:
- marker = ''
- num = marker + str(i)
- else:
- num = '%*s' % (numbers_width, i)
- line = '%s %s' % (num, line)
-
- res.append(line)
- if lvals and i == lnum:
- res.append(lvals + '\n')
- i = i + 1
- return res
-
-
-def format_records(records): # , print_globals=False):
- # Loop over all records printing context and info
- frames = []
- abspath = os.path.abspath
- for frame, file, lnum, func, lines, index in records:
- try:
- file = file and abspath(file) or '?'
- except OSError:
- # if file is '' or something not in the filesystem,
- # the abspath call will throw an OSError. Just ignore it and
- # keep the original file string.
- pass
-
- if file.endswith('.pyc'):
- file = file[:-4] + '.py'
-
- link = file
-
- args, varargs, varkw, locals = inspect.getargvalues(frame)
-
- if func == '?':
- call = ''
- else:
- # Decide whether to include variable details or not
- try:
- call = 'in %s%s' % (func, inspect.formatargvalues(args,
- varargs, varkw, locals,
- formatvalue=eq_repr))
- except KeyError:
- # Very odd crash from inspect.formatargvalues(). The
- # scenario under which it appeared was a call to
- # view(array,scale) in NumTut.view.view(), where scale had
- # been defined as a scalar (it should be a tuple). Somehow
- # inspect messes up resolving the argument list of view()
- # and barfs out. At some point I should dig into this one
- # and file a bug report about it.
- print("\nJoblib's exception reporting continues...\n")
- call = 'in %s(***failed resolving arguments***)' % func
-
- # Initialize a list of names on the current line, which the
- # tokenizer below will populate.
- names = []
-
- def tokeneater(token_type, token, start, end, line):
- """Stateful tokeneater which builds dotted names.
-
- The list of names it appends to (from the enclosing scope) can
- contain repeated composite names. This is unavoidable, since
- there is no way to disambiguate partial dotted structures until
- the full list is known. The caller is responsible for pruning
- the final list of duplicates before using it."""
-
- # build composite names
- if token == '.':
- try:
- names[-1] += '.'
- # store state so the next token is added for x.y.z names
- tokeneater.name_cont = True
- return
- except IndexError:
- pass
- if token_type == tokenize.NAME and token not in keyword.kwlist:
- if tokeneater.name_cont:
- # Dotted names
- names[-1] += token
- tokeneater.name_cont = False
- else:
- # Regular new names. We append everything, the caller
- # will be responsible for pruning the list later. It's
- # very tricky to try to prune as we go, b/c composite
- # names can fool us. The pruning at the end is easy
- # to do (or the caller can print a list with repeated
- # names if so desired.
- names.append(token)
- elif token_type == tokenize.NEWLINE:
- raise IndexError
- # we need to store a bit of state in the tokenizer to build
- # dotted names
- tokeneater.name_cont = False
-
- def linereader(file=file, lnum=[lnum], getline=linecache.getline):
- line = getline(file, lnum[0])
- lnum[0] += 1
- return line
-
- # Build the list of names on this line of code where the exception
- # occurred.
- try:
- # This builds the names list in-place by capturing it from the
- # enclosing scope.
- for token in generate_tokens(linereader):
- tokeneater(*token)
- except (IndexError, UnicodeDecodeError):
- # signals exit of tokenizer
- pass
- except tokenize.TokenError as msg:
- _m = ("An unexpected error occurred while tokenizing input file %s\n"
- "The following traceback may be corrupted or invalid\n"
- "The error message is: %s\n" % (file, msg))
- print(_m)
-
- # prune names list of duplicates, but keep the right order
- unique_names = uniq_stable(names)
-
- # Start loop over vars
- lvals = []
- for name_full in unique_names:
- name_base = name_full.split('.', 1)[0]
- if name_base in frame.f_code.co_varnames:
- if name_base in locals.keys():
- try:
- value = safe_repr(eval(name_full, locals))
- except:
- value = "undefined"
- else:
- value = "undefined"
- name = name_full
- lvals.append('%s = %s' % (name, value))
- #elif print_globals:
- # if frame.f_globals.has_key(name_base):
- # try:
- # value = safe_repr(eval(name_full,frame.f_globals))
- # except:
- # value = "undefined"
- # else:
- # value = "undefined"
- # name = 'global %s' % name_full
- # lvals.append('%s = %s' % (name,value))
- if lvals:
- lvals = '%s%s' % (INDENT, ('\n%s' % INDENT).join(lvals))
- else:
- lvals = ''
-
- level = '%s\n%s %s\n' % (75 * '.', link, call)
-
- if index is None:
- frames.append(level)
- else:
- frames.append('%s%s' % (level, ''.join(
- _format_traceback_lines(lnum, index, lines, lvals))))
-
- return frames
-
-
-###############################################################################
-def format_exc(etype, evalue, etb, context=5, tb_offset=0):
- """ Return a nice text document describing the traceback.
-
- Parameters
- -----------
- etype, evalue, etb: as returned by sys.exc_info
- context: number of lines of the source file to plot
- tb_offset: the number of stack frame not to use (0 = use all)
-
- """
- # some locals
- try:
- etype = etype.__name__
- except AttributeError:
- pass
-
- # Header with the exception type, python version, and date
- pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable
- date = time.ctime(time.time())
- pid = 'PID: %i' % os.getpid()
-
- head = '%s%s%s\n%s%s%s' % (
- etype, ' ' * (75 - len(str(etype)) - len(date)),
- date, pid, ' ' * (75 - len(str(pid)) - len(pyver)),
- pyver)
-
- # Drop topmost frames if requested
- try:
- records = _fixed_getframes(etb, context, tb_offset)
- except:
- raise
- print('\nUnfortunately, your original traceback can not be '
- 'constructed.\n')
- return ''
-
- # Get (safely) a string form of the exception info
- try:
- etype_str, evalue_str = map(str, (etype, evalue))
- except:
- # User exception is improperly defined.
- etype, evalue = str, sys.exc_info()[:2]
- etype_str, evalue_str = map(str, (etype, evalue))
- # ... and format it
- exception = ['%s: %s' % (etype_str, evalue_str)]
- frames = format_records(records)
- return '%s\n%s\n%s' % (head, '\n'.join(frames), ''.join(exception[0]))
-
-
-###############################################################################
-def format_outer_frames(context=5, stack_start=None, stack_end=None,
- ignore_ipython=True):
- LNUM_POS, LINES_POS, INDEX_POS = 2, 4, 5
- records = inspect.getouterframes(inspect.currentframe())
- output = list()
-
- for i, (frame, filename, line_no, func_name, lines, index) \
- in enumerate(records):
- # Look inside the frame's globals dictionary for __file__, which should
- # be better.
- better_fn = frame.f_globals.get('__file__', None)
- if isinstance(better_fn, str):
- # Check the type just in case someone did something weird with
- # __file__. It might also be None if the error occurred during
- # import.
- filename = better_fn
- if filename.endswith('.pyc'):
- filename = filename[:-4] + '.py'
- if ignore_ipython:
- # Hack to avoid printing the internals of IPython
- if (os.path.basename(filename) == 'iplib.py'
- and func_name in ('safe_execfile', 'runcode')):
- break
- maybeStart = line_no - 1 - context // 2
- start = max(maybeStart, 0)
- end = start + context
- lines = linecache.getlines(filename)[start:end]
- # pad with empty lines if necessary
- if maybeStart < 0:
- lines = (['\n'] * -maybeStart) + lines
- if len(lines) < context:
- lines += ['\n'] * (context - len(lines))
- buf = list(records[i])
- buf[LNUM_POS] = line_no
- buf[INDEX_POS] = line_no - 1 - start
- buf[LINES_POS] = lines
- output.append(tuple(buf))
- return '\n'.join(format_records(output[stack_end:stack_start:-1]))
diff --git a/mloop/localsklearn/externals/joblib/func_inspect.py b/mloop/localsklearn/externals/joblib/func_inspect.py
deleted file mode 100644
index 9fb67f0..0000000
--- a/mloop/localsklearn/externals/joblib/func_inspect.py
+++ /dev/null
@@ -1,355 +0,0 @@
-"""
-My own variation on function-specific inspect-like features.
-"""
-
-# Author: Gael Varoquaux
-# Copyright (c) 2009 Gael Varoquaux
-# License: BSD Style, 3 clauses.
-
-from itertools import islice
-import inspect
-import warnings
-import re
-import os
-
-from ._compat import _basestring
-from .logger import pformat
-from ._memory_helpers import open_py_source
-from ._compat import PY3_OR_LATER
-
-
-def get_func_code(func):
- """ Attempts to retrieve a reliable function code hash.
-
- The reason we don't use inspect.getsource is that it caches the
- source, whereas we want this to be modified on the fly when the
- function is modified.
-
- Returns
- -------
- func_code: string
- The function code
- source_file: string
- The path to the file in which the function is defined.
- first_line: int
- The first line of the code in the source file.
-
- Notes
- ------
- This function does a bit more magic than inspect, and is thus
- more robust.
- """
- source_file = None
- try:
- code = func.__code__
- source_file = code.co_filename
- if not os.path.exists(source_file):
- # Use inspect for lambda functions and functions defined in an
- # interactive shell, or in doctests
- source_code = ''.join(inspect.getsourcelines(func)[0])
- line_no = 1
- if source_file.startswith('',
- source_file).groups()
- line_no = int(line_no)
- source_file = '' % source_file
- return source_code, source_file, line_no
- # Try to retrieve the source code.
- with open_py_source(source_file) as source_file_obj:
- first_line = code.co_firstlineno
- # All the lines after the function definition:
- source_lines = list(islice(source_file_obj, first_line - 1, None))
- return ''.join(inspect.getblock(source_lines)), source_file, first_line
- except:
- # If the source code fails, we use the hash. This is fragile and
- # might change from one session to another.
- if hasattr(func, '__code__'):
- # Python 3.X
- return str(func.__code__.__hash__()), source_file, -1
- else:
- # Weird objects like numpy ufunc don't have __code__
- # This is fragile, as quite often the id of the object is
- # in the repr, so it might not persist across sessions,
- # however it will work for ufuncs.
- return repr(func), source_file, -1
-
-
-def _clean_win_chars(string):
- """Windows cannot encode some characters in filename."""
- import urllib
- if hasattr(urllib, 'quote'):
- quote = urllib.quote
- else:
- # In Python 3, quote is elsewhere
- import urllib.parse
- quote = urllib.parse.quote
- for char in ('<', '>', '!', ':', '\\'):
- string = string.replace(char, quote(char))
- return string
-
-
-def get_func_name(func, resolv_alias=True, win_characters=True):
- """ Return the function import path (as a list of module names), and
- a name for the function.
-
- Parameters
- ----------
- func: callable
- The func to inspect
- resolv_alias: boolean, optional
- If true, possible local aliases are indicated.
- win_characters: boolean, optional
- If true, substitute special characters using urllib.quote
- This is useful in Windows, as it cannot encode some filenames
- """
- if hasattr(func, '__module__'):
- module = func.__module__
- else:
- try:
- module = inspect.getmodule(func)
- except TypeError:
- if hasattr(func, '__class__'):
- module = func.__class__.__module__
- else:
- module = 'unknown'
- if module is None:
- # Happens in doctests, eg
- module = ''
- if module == '__main__':
- try:
- filename = os.path.abspath(inspect.getsourcefile(func))
- except:
- filename = None
- if filename is not None:
- # mangling of full path to filename
- parts = filename.split(os.sep)
- if parts[-1].startswith(' 1500:
- arg = '%s...' % arg[:700]
- if previous_length > 80:
- arg = '\n%s' % arg
- previous_length = len(arg)
- arg_str.append(arg)
- arg_str.extend(['%s=%s' % (v, pformat(i)) for v, i in kwargs.items()])
- arg_str = ', '.join(arg_str)
-
- signature = '%s(%s)' % (name, arg_str)
- return module_path, signature
-
-
-def format_call(func, args, kwargs, object_name="Memory"):
- """ Returns a nicely formatted statement displaying the function
- call with the given arguments.
- """
- path, signature = format_signature(func, *args, **kwargs)
- msg = '%s\n[%s] Calling %s...\n%s' % (80 * '_', object_name,
- path, signature)
- return msg
- # XXX: Not using logging framework
- #self.debug(msg)
diff --git a/mloop/localsklearn/externals/joblib/hashing.py b/mloop/localsklearn/externals/joblib/hashing.py
deleted file mode 100644
index ced817b..0000000
--- a/mloop/localsklearn/externals/joblib/hashing.py
+++ /dev/null
@@ -1,262 +0,0 @@
-"""
-Fast cryptographic hash of Python objects, with a special case for fast
-hashing of numpy arrays.
-"""
-
-# Author: Gael Varoquaux
-# Copyright (c) 2009 Gael Varoquaux
-# License: BSD Style, 3 clauses.
-
-import pickle
-import hashlib
-import sys
-import types
-import struct
-import io
-
-from ._compat import _bytes_or_unicode, PY3_OR_LATER
-
-
-if PY3_OR_LATER:
- Pickler = pickle._Pickler
-else:
- Pickler = pickle.Pickler
-
-
-class _ConsistentSet(object):
- """ Class used to ensure the hash of Sets is preserved
- whatever the order of its items.
- """
- def __init__(self, set_sequence):
- # Forces order of elements in set to ensure consistent hash.
- try:
- # Trying first to order the set assuming the type of elements is
- # consistent and orderable.
- # This fails on python 3 when elements are unorderable
- # but we keep it in a try as it's faster.
- self._sequence = sorted(set_sequence)
- except TypeError:
- # If elements are unorderable, sorting them using their hash.
- # This is slower but works in any case.
- self._sequence = sorted((hash(e) for e in set_sequence))
-
-
-class _MyHash(object):
- """ Class used to hash objects that won't normally pickle """
-
- def __init__(self, *args):
- self.args = args
-
-
-class Hasher(Pickler):
- """ A subclass of pickler, to do cryptographic hashing, rather than
- pickling.
- """
-
- def __init__(self, hash_name='md5'):
- self.stream = io.BytesIO()
- # By default we want a pickle protocol that only changes with
- # the major python version and not the minor one
- protocol = (pickle.DEFAULT_PROTOCOL if PY3_OR_LATER
- else pickle.HIGHEST_PROTOCOL)
- Pickler.__init__(self, self.stream, protocol=protocol)
- # Initialise the hash obj
- self._hash = hashlib.new(hash_name)
-
- def hash(self, obj, return_digest=True):
- try:
- self.dump(obj)
- except pickle.PicklingError as e:
- e.args += ('PicklingError while hashing %r: %r' % (obj, e),)
- raise
- dumps = self.stream.getvalue()
- self._hash.update(dumps)
- if return_digest:
- return self._hash.hexdigest()
-
- def save(self, obj):
- if isinstance(obj, (types.MethodType, type({}.pop))):
- # the Pickler cannot pickle instance methods; here we decompose
- # them into components that make them uniquely identifiable
- if hasattr(obj, '__func__'):
- func_name = obj.__func__.__name__
- else:
- func_name = obj.__name__
- inst = obj.__self__
- if type(inst) == type(pickle):
- obj = _MyHash(func_name, inst.__name__)
- elif inst is None:
- # type(None) or type(module) do not pickle
- obj = _MyHash(func_name, inst)
- else:
- cls = obj.__self__.__class__
- obj = _MyHash(func_name, inst, cls)
- Pickler.save(self, obj)
-
- def memoize(self, obj):
- # We want hashing to be sensitive to value instead of reference.
- # For example we want ['aa', 'aa'] and ['aa', 'aaZ'[:2]]
- # to hash to the same value and that's why we disable memoization
- # for strings
- if isinstance(obj, _bytes_or_unicode):
- return
- Pickler.memoize(self, obj)
-
- # The dispatch table of the pickler is not accessible in Python
- # 3, as these lines are only bugware for IPython, we skip them.
- def save_global(self, obj, name=None, pack=struct.pack):
- # We have to override this method in order to deal with objects
- # defined interactively in IPython that are not injected in
- # __main__
- kwargs = dict(name=name, pack=pack)
- if sys.version_info >= (3, 4):
- del kwargs['pack']
- try:
- Pickler.save_global(self, obj, **kwargs)
- except pickle.PicklingError:
- Pickler.save_global(self, obj, **kwargs)
- module = getattr(obj, "__module__", None)
- if module == '__main__':
- my_name = name
- if my_name is None:
- my_name = obj.__name__
- mod = sys.modules[module]
- if not hasattr(mod, my_name):
- # IPython doesn't inject the variables define
- # interactively in __main__
- setattr(mod, my_name, obj)
-
- dispatch = Pickler.dispatch.copy()
- # builtin
- dispatch[type(len)] = save_global
- # type
- dispatch[type(object)] = save_global
- # classobj
- dispatch[type(Pickler)] = save_global
- # function
- dispatch[type(pickle.dump)] = save_global
-
- def _batch_setitems(self, items):
- # forces order of keys in dict to ensure consistent hash.
- try:
- # Trying first to compare dict assuming the type of keys is
- # consistent and orderable.
- # This fails on python 3 when keys are unorderable
- # but we keep it in a try as it's faster.
- Pickler._batch_setitems(self, iter(sorted(items)))
- except TypeError:
- # If keys are unorderable, sorting them using their hash. This is
- # slower but works in any case.
- Pickler._batch_setitems(self, iter(sorted((hash(k), v)
- for k, v in items)))
-
- def save_set(self, set_items):
- # forces order of items in Set to ensure consistent hash
- Pickler.save(self, _ConsistentSet(set_items))
-
- dispatch[type(set())] = save_set
-
-
-class NumpyHasher(Hasher):
- """ Special case the hasher for when numpy is loaded.
- """
-
- def __init__(self, hash_name='md5', coerce_mmap=False):
- """
- Parameters
- ----------
- hash_name: string
- The hash algorithm to be used
- coerce_mmap: boolean
- Make no difference between np.memmap and np.ndarray
- objects.
- """
- self.coerce_mmap = coerce_mmap
- Hasher.__init__(self, hash_name=hash_name)
- # delayed import of numpy, to avoid tight coupling
- import numpy as np
- self.np = np
- if hasattr(np, 'getbuffer'):
- self._getbuffer = np.getbuffer
- else:
- self._getbuffer = memoryview
-
- def save(self, obj):
- """ Subclass the save method, to hash ndarray subclass, rather
- than pickling them. Off course, this is a total abuse of
- the Pickler class.
- """
- if isinstance(obj, self.np.ndarray) and not obj.dtype.hasobject:
- # Compute a hash of the object
- # The update function of the hash requires a c_contiguous buffer.
- if obj.shape == ():
- # 0d arrays need to be flattened because viewing them as bytes
- # raises a ValueError exception.
- obj_c_contiguous = obj.flatten()
- elif obj.flags.c_contiguous:
- obj_c_contiguous = obj
- elif obj.flags.f_contiguous:
- obj_c_contiguous = obj.T
- else:
- # Cater for non-single-segment arrays: this creates a
- # copy, and thus aleviates this issue.
- # XXX: There might be a more efficient way of doing this
- obj_c_contiguous = obj.flatten()
-
- # memoryview is not supported for some dtypes, e.g. datetime64, see
- # https://github.com/numpy/numpy/issues/4983. The
- # workaround is to view the array as bytes before
- # taking the memoryview.
- self._hash.update(
- self._getbuffer(obj_c_contiguous.view(self.np.uint8)))
-
- # We store the class, to be able to distinguish between
- # Objects with the same binary content, but different
- # classes.
- if self.coerce_mmap and isinstance(obj, self.np.memmap):
- # We don't make the difference between memmap and
- # normal ndarrays, to be able to reload previously
- # computed results with memmap.
- klass = self.np.ndarray
- else:
- klass = obj.__class__
- # We also return the dtype and the shape, to distinguish
- # different views on the same data with different dtypes.
-
- # The object will be pickled by the pickler hashed at the end.
- obj = (klass, ('HASHED', obj.dtype, obj.shape, obj.strides))
- elif isinstance(obj, self.np.dtype):
- # Atomic dtype objects are interned by their default constructor:
- # np.dtype('f8') is np.dtype('f8')
- # This interning is not maintained by a
- # pickle.loads + pickle.dumps cycle, because __reduce__
- # uses copy=True in the dtype constructor. This
- # non-deterministic behavior causes the internal memoizer
- # of the hasher to generate different hash values
- # depending on the history of the dtype object.
- # To prevent the hash from being sensitive to this, we use
- # .descr which is a full (and never interned) description of
- # the array dtype according to the numpy doc.
- klass = obj.__class__
- obj = (klass, ('HASHED', obj.descr))
- Hasher.save(self, obj)
-
-
-def hash(obj, hash_name='md5', coerce_mmap=False):
- """ Quick calculation of a hash to identify uniquely Python objects
- containing numpy arrays.
-
-
- Parameters
- -----------
- hash_name: 'md5' or 'sha1'
- Hashing algorithm used. sha1 is supposedly safer, but md5 is
- faster.
- coerce_mmap: boolean
- Make no difference between np.memmap and np.ndarray
- """
- if 'numpy' in sys.modules:
- hasher = NumpyHasher(hash_name=hash_name, coerce_mmap=coerce_mmap)
- else:
- hasher = Hasher(hash_name=hash_name)
- return hasher.hash(obj)
diff --git a/mloop/localsklearn/externals/joblib/logger.py b/mloop/localsklearn/externals/joblib/logger.py
deleted file mode 100644
index 41b5864..0000000
--- a/mloop/localsklearn/externals/joblib/logger.py
+++ /dev/null
@@ -1,157 +0,0 @@
-"""
-Helpers for logging.
-
-This module needs much love to become useful.
-"""
-
-# Author: Gael Varoquaux
-# Copyright (c) 2008 Gael Varoquaux
-# License: BSD Style, 3 clauses.
-
-from __future__ import print_function
-
-import time
-import sys
-import os
-import shutil
-import logging
-import pprint
-
-from .disk import mkdirp
-
-
-def _squeeze_time(t):
- """Remove .1s to the time under Windows: this is the time it take to
- stat files. This is needed to make results similar to timings under
- Unix, for tests
- """
- if sys.platform.startswith('win'):
- return max(0, t - .1)
- else:
- return t
-
-
-def format_time(t):
- t = _squeeze_time(t)
- return "%.1fs, %.1fmin" % (t, t / 60.)
-
-
-def short_format_time(t):
- t = _squeeze_time(t)
- if t > 60:
- return "%4.1fmin" % (t / 60.)
- else:
- return " %5.1fs" % (t)
-
-
-def pformat(obj, indent=0, depth=3):
- if 'numpy' in sys.modules:
- import numpy as np
- print_options = np.get_printoptions()
- np.set_printoptions(precision=6, threshold=64, edgeitems=1)
- else:
- print_options = None
- out = pprint.pformat(obj, depth=depth, indent=indent)
- if print_options:
- np.set_printoptions(**print_options)
- return out
-
-
-###############################################################################
-# class `Logger`
-###############################################################################
-class Logger(object):
- """ Base class for logging messages.
- """
-
- def __init__(self, depth=3):
- """
- Parameters
- ----------
- depth: int, optional
- The depth of objects printed.
- """
- self.depth = depth
-
- def warn(self, msg):
- logging.warn("[%s]: %s" % (self, msg))
-
- def debug(self, msg):
- # XXX: This conflicts with the debug flag used in children class
- logging.debug("[%s]: %s" % (self, msg))
-
- def format(self, obj, indent=0):
- """ Return the formated representation of the object.
- """
- return pformat(obj, indent=indent, depth=self.depth)
-
-
-###############################################################################
-# class `PrintTime`
-###############################################################################
-class PrintTime(object):
- """ Print and log messages while keeping track of time.
- """
-
- def __init__(self, logfile=None, logdir=None):
- if logfile is not None and logdir is not None:
- raise ValueError('Cannot specify both logfile and logdir')
- # XXX: Need argument docstring
- self.last_time = time.time()
- self.start_time = self.last_time
- if logdir is not None:
- logfile = os.path.join(logdir, 'joblib.log')
- self.logfile = logfile
- if logfile is not None:
- mkdirp(os.path.dirname(logfile))
- if os.path.exists(logfile):
- # Rotate the logs
- for i in range(1, 9):
- try:
- shutil.move(logfile + '.%i' % i,
- logfile + '.%i' % (i + 1))
- except:
- "No reason failing here"
- # Use a copy rather than a move, so that a process
- # monitoring this file does not get lost.
- try:
- shutil.copy(logfile, logfile + '.1')
- except:
- "No reason failing here"
- try:
- with open(logfile, 'w') as logfile:
- logfile.write('\nLogging joblib python script\n')
- logfile.write('\n---%s---\n' % time.ctime(self.last_time))
- except:
- """ Multiprocessing writing to files can create race
- conditions. Rather fail silently than crash the
- computation.
- """
- # XXX: We actually need a debug flag to disable this
- # silent failure.
-
- def __call__(self, msg='', total=False):
- """ Print the time elapsed between the last call and the current
- call, with an optional message.
- """
- if not total:
- time_lapse = time.time() - self.last_time
- full_msg = "%s: %s" % (msg, format_time(time_lapse))
- else:
- # FIXME: Too much logic duplicated
- time_lapse = time.time() - self.start_time
- full_msg = "%s: %.2fs, %.1f min" % (msg, time_lapse,
- time_lapse / 60)
- print(full_msg, file=sys.stderr)
- if self.logfile is not None:
- try:
- with open(self.logfile, 'a') as f:
- print(full_msg, file=f)
- except:
- """ Multiprocessing writing to files can create race
- conditions. Rather fail silently than crash the
- calculation.
- """
- # XXX: We actually need a debug flag to disable this
- # silent failure.
- self.last_time = time.time()
diff --git a/mloop/localsklearn/externals/joblib/memory.py b/mloop/localsklearn/externals/joblib/memory.py
deleted file mode 100644
index fff84ad..0000000
--- a/mloop/localsklearn/externals/joblib/memory.py
+++ /dev/null
@@ -1,918 +0,0 @@
-"""
-A context object for caching a function's return value each time it
-is called with the same input arguments.
-
-"""
-
-# Author: Gael Varoquaux
-# Copyright (c) 2009 Gael Varoquaux
-# License: BSD Style, 3 clauses.
-
-
-from __future__ import with_statement
-import os
-import shutil
-import time
-import pydoc
-import re
-import sys
-try:
- import cPickle as pickle
-except ImportError:
- import pickle
-import functools
-import traceback
-import warnings
-import inspect
-import json
-import weakref
-import io
-
-# Local imports
-from . import hashing
-from .func_inspect import get_func_code, get_func_name, filter_args
-from .func_inspect import format_signature, format_call
-from ._memory_helpers import open_py_source
-from .logger import Logger, format_time, pformat
-from . import numpy_pickle
-from .disk import mkdirp, rm_subdirs
-from ._compat import _basestring, PY3_OR_LATER
-
-FIRST_LINE_TEXT = "# first line:"
-
-# TODO: The following object should have a data store object as a sub
-# object, and the interface to persist and query should be separated in
-# the data store.
-#
-# This would enable creating 'Memory' objects with a different logic for
-# pickling that would simply span a MemorizedFunc with the same
-# store (or do we want to copy it to avoid cross-talks?), for instance to
-# implement HDF5 pickling.
-
-# TODO: Same remark for the logger, and probably use the Python logging
-# mechanism.
-
-
-def extract_first_line(func_code):
- """ Extract the first line information from the function code
- text if available.
- """
- if func_code.startswith(FIRST_LINE_TEXT):
- func_code = func_code.split('\n')
- first_line = int(func_code[0][len(FIRST_LINE_TEXT):])
- func_code = '\n'.join(func_code[1:])
- else:
- first_line = -1
- return func_code, first_line
-
-
-class JobLibCollisionWarning(UserWarning):
- """ Warn that there might be a collision between names of functions.
- """
-
-
-def _get_func_fullname(func):
- """Compute the part of part associated with a function.
-
- See code of_cache_key_to_dir() for details
- """
- modules, funcname = get_func_name(func)
- modules.append(funcname)
- return os.path.join(*modules)
-
-
-def _cache_key_to_dir(cachedir, func, argument_hash):
- """Compute directory associated with a given cache key.
-
- func can be a function or a string as returned by _get_func_fullname().
- """
- parts = [cachedir]
- if isinstance(func, _basestring):
- parts.append(func)
- else:
- parts.append(_get_func_fullname(func))
-
- if argument_hash is not None:
- parts.append(argument_hash)
- return os.path.join(*parts)
-
-
-def _load_output(output_dir, func_name, timestamp=None, metadata=None,
- mmap_mode=None, verbose=0):
- """Load output of a computation."""
- if verbose > 1:
- signature = ""
- try:
- if metadata is not None:
- args = ", ".join(['%s=%s' % (name, value)
- for name, value
- in metadata['input_args'].items()])
- signature = "%s(%s)" % (os.path.basename(func_name),
- args)
- else:
- signature = os.path.basename(func_name)
- except KeyError:
- pass
-
- if timestamp is not None:
- t = "% 16s" % format_time(time.time() - timestamp)
- else:
- t = ""
-
- if verbose < 10:
- print('[Memory]%s: Loading %s...' % (t, str(signature)))
- else:
- print('[Memory]%s: Loading %s from %s' % (
- t, str(signature), output_dir))
-
- filename = os.path.join(output_dir, 'output.pkl')
- if not os.path.isfile(filename):
- raise KeyError(
- "Non-existing cache value (may have been cleared).\n"
- "File %s does not exist" % filename)
- return numpy_pickle.load(filename, mmap_mode=mmap_mode)
-
-
-# An in-memory store to avoid looking at the disk-based function
-# source code to check if a function definition has changed
-_FUNCTION_HASHES = weakref.WeakKeyDictionary()
-
-
-###############################################################################
-# class `MemorizedResult`
-###############################################################################
-class MemorizedResult(Logger):
- """Object representing a cached value.
-
- Attributes
- ----------
- cachedir: string
- path to root of joblib cache
-
- func: function or string
- function whose output is cached. The string case is intended only for
- instanciation based on the output of repr() on another instance.
- (namely eval(repr(memorized_instance)) works).
-
- argument_hash: string
- hash of the function arguments
-
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}
- The memmapping mode used when loading from cache numpy arrays. See
- numpy.load for the meaning of the different values.
-
- verbose: int
- verbosity level (0 means no message)
-
- timestamp, metadata: string
- for internal use only
- """
- def __init__(self, cachedir, func, argument_hash,
- mmap_mode=None, verbose=0, timestamp=None, metadata=None):
- Logger.__init__(self)
- if isinstance(func, _basestring):
- self.func = func
- else:
- self.func = _get_func_fullname(func)
- self.argument_hash = argument_hash
- self.cachedir = cachedir
- self.mmap_mode = mmap_mode
-
- self._output_dir = _cache_key_to_dir(cachedir, self.func,
- argument_hash)
-
- if metadata is not None:
- self.metadata = metadata
- else:
- self.metadata = {}
- # No error is relevant here.
- try:
- with open(os.path.join(self._output_dir, 'metadata.json'),
- 'rb') as f:
- self.metadata = json.load(f)
- except:
- pass
-
- self.duration = self.metadata.get('duration', None)
- self.verbose = verbose
- self.timestamp = timestamp
-
- def get(self):
- """Read value from cache and return it."""
- return _load_output(self._output_dir, _get_func_fullname(self.func),
- timestamp=self.timestamp,
- metadata=self.metadata, mmap_mode=self.mmap_mode,
- verbose=self.verbose)
-
- def clear(self):
- """Clear value from cache"""
- shutil.rmtree(self._output_dir, ignore_errors=True)
-
- def __repr__(self):
- return ('{class_name}(cachedir="{cachedir}", func="{func}", '
- 'argument_hash="{argument_hash}")'.format(
- class_name=self.__class__.__name__,
- cachedir=self.cachedir,
- func=self.func,
- argument_hash=self.argument_hash
- ))
-
- def __reduce__(self):
- return (self.__class__, (self.cachedir, self.func, self.argument_hash),
- {'mmap_mode': self.mmap_mode})
-
-
-class NotMemorizedResult(object):
- """Class representing an arbitrary value.
-
- This class is a replacement for MemorizedResult when there is no cache.
- """
- __slots__ = ('value', 'valid')
-
- def __init__(self, value):
- self.value = value
- self.valid = True
-
- def get(self):
- if self.valid:
- return self.value
- else:
- raise KeyError("No value stored.")
-
- def clear(self):
- self.valid = False
- self.value = None
-
- def __repr__(self):
- if self.valid:
- return '{class_name}({value})'.format(
- class_name=self.__class__.__name__,
- value=pformat(self.value)
- )
- else:
- return self.__class__.__name__ + ' with no value'
-
- # __getstate__ and __setstate__ are required because of __slots__
- def __getstate__(self):
- return {"valid": self.valid, "value": self.value}
-
- def __setstate__(self, state):
- self.valid = state["valid"]
- self.value = state["value"]
-
-
-###############################################################################
-# class `NotMemorizedFunc`
-###############################################################################
-class NotMemorizedFunc(object):
- """No-op object decorating a function.
-
- This class replaces MemorizedFunc when there is no cache. It provides an
- identical API but does not write anything on disk.
-
- Attributes
- ----------
- func: callable
- Original undecorated function.
- """
- # Should be a light as possible (for speed)
- def __init__(self, func):
- self.func = func
-
- def __call__(self, *args, **kwargs):
- return self.func(*args, **kwargs)
-
- def call_and_shelve(self, *args, **kwargs):
- return NotMemorizedResult(self.func(*args, **kwargs))
-
- def __reduce__(self):
- return (self.__class__, (self.func,))
-
- def __repr__(self):
- return '%s(func=%s)' % (
- self.__class__.__name__,
- self.func
- )
-
- def clear(self, warn=True):
- # Argument "warn" is for compatibility with MemorizedFunc.clear
- pass
-
-
-###############################################################################
-# class `MemorizedFunc`
-###############################################################################
-class MemorizedFunc(Logger):
- """ Callable object decorating a function for caching its return value
- each time it is called.
-
- All values are cached on the filesystem, in a deep directory
- structure. Methods are provided to inspect the cache or clean it.
-
- Attributes
- ----------
- func: callable
- The original, undecorated, function.
-
- cachedir: string
- Path to the base cache directory of the memory context.
-
- ignore: list or None
- List of variable names to ignore when choosing whether to
- recompute.
-
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}
- The memmapping mode used when loading from cache
- numpy arrays. See numpy.load for the meaning of the different
- values.
-
- compress: boolean, or integer
- Whether to zip the stored data on disk. If an integer is
- given, it should be between 1 and 9, and sets the amount
- of compression. Note that compressed arrays cannot be
- read by memmapping.
-
- verbose: int, optional
- The verbosity flag, controls messages that are issued as
- the function is evaluated.
- """
- #-------------------------------------------------------------------------
- # Public interface
- #-------------------------------------------------------------------------
-
- def __init__(self, func, cachedir, ignore=None, mmap_mode=None,
- compress=False, verbose=1, timestamp=None):
- """
- Parameters
- ----------
- func: callable
- The function to decorate
- cachedir: string
- The path of the base directory to use as a data store
- ignore: list or None
- List of variable names to ignore.
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
- The memmapping mode used when loading from cache
- numpy arrays. See numpy.load for the meaning of the
- arguments.
- compress : boolean, or integer
- Whether to zip the stored data on disk. If an integer is
- given, it should be between 1 and 9, and sets the amount
- of compression. Note that compressed arrays cannot be
- read by memmapping.
- verbose: int, optional
- Verbosity flag, controls the debug messages that are issued
- as functions are evaluated. The higher, the more verbose
- timestamp: float, optional
- The reference time from which times in tracing messages
- are reported.
- """
- Logger.__init__(self)
- self.mmap_mode = mmap_mode
- self.func = func
- if ignore is None:
- ignore = []
- self.ignore = ignore
-
- self._verbose = verbose
- self.cachedir = cachedir
- self.compress = compress
- if compress and self.mmap_mode is not None:
- warnings.warn('Compressed results cannot be memmapped',
- stacklevel=2)
- if timestamp is None:
- timestamp = time.time()
- self.timestamp = timestamp
- mkdirp(self.cachedir)
- try:
- functools.update_wrapper(self, func)
- except:
- " Objects like ufunc don't like that "
- if inspect.isfunction(func):
- doc = pydoc.TextDoc().document(func)
- # Remove blank line
- doc = doc.replace('\n', '\n\n', 1)
- # Strip backspace-overprints for compatibility with autodoc
- doc = re.sub('\x08.', '', doc)
- else:
- # Pydoc does a poor job on other objects
- doc = func.__doc__
- self.__doc__ = 'Memoized version of %s' % doc
-
- def _cached_call(self, args, kwargs):
- """Call wrapped function and cache result, or read cache if available.
-
- This function returns the wrapped function output and some metadata.
-
- Returns
- -------
- output: value or tuple
- what is returned by wrapped function
-
- argument_hash: string
- hash of function arguments
-
- metadata: dict
- some metadata about wrapped function call (see _persist_input())
- """
- # Compare the function code with the previous to see if the
- # function code has changed
- output_dir, argument_hash = self._get_output_dir(*args, **kwargs)
- metadata = None
- # FIXME: The statements below should be try/excepted
- if not (self._check_previous_func_code(stacklevel=4) and
- os.path.exists(output_dir)):
- if self._verbose > 10:
- _, name = get_func_name(self.func)
- self.warn('Computing func %s, argument hash %s in '
- 'directory %s'
- % (name, argument_hash, output_dir))
- out, metadata = self.call(*args, **kwargs)
- if self.mmap_mode is not None:
- # Memmap the output at the first call to be consistent with
- # later calls
- out = _load_output(output_dir, _get_func_fullname(self.func),
- timestamp=self.timestamp,
- mmap_mode=self.mmap_mode,
- verbose=self._verbose)
- else:
- try:
- t0 = time.time()
- out = _load_output(output_dir, _get_func_fullname(self.func),
- timestamp=self.timestamp,
- metadata=metadata, mmap_mode=self.mmap_mode,
- verbose=self._verbose)
- if self._verbose > 4:
- t = time.time() - t0
- _, name = get_func_name(self.func)
- msg = '%s cache loaded - %s' % (name, format_time(t))
- print(max(0, (80 - len(msg))) * '_' + msg)
- except Exception:
- # XXX: Should use an exception logger
- self.warn('Exception while loading results for '
- '(args=%s, kwargs=%s)\n %s' %
- (args, kwargs, traceback.format_exc()))
-
- shutil.rmtree(output_dir, ignore_errors=True)
- out, metadata = self.call(*args, **kwargs)
- argument_hash = None
- return (out, argument_hash, metadata)
-
- def call_and_shelve(self, *args, **kwargs):
- """Call wrapped function, cache result and return a reference.
-
- This method returns a reference to the cached result instead of the
- result itself. The reference object is small and pickeable, allowing
- to send or store it easily. Call .get() on reference object to get
- result.
-
- Returns
- -------
- cached_result: MemorizedResult or NotMemorizedResult
- reference to the value returned by the wrapped function. The
- class "NotMemorizedResult" is used when there is no cache
- activated (e.g. cachedir=None in Memory).
- """
- _, argument_hash, metadata = self._cached_call(args, kwargs)
-
- return MemorizedResult(self.cachedir, self.func, argument_hash,
- metadata=metadata, verbose=self._verbose - 1,
- timestamp=self.timestamp)
-
- def __call__(self, *args, **kwargs):
- return self._cached_call(args, kwargs)[0]
-
- def __reduce__(self):
- """ We don't store the timestamp when pickling, to avoid the hash
- depending from it.
- In addition, when unpickling, we run the __init__
- """
- return (self.__class__, (self.func, self.cachedir, self.ignore,
- self.mmap_mode, self.compress, self._verbose))
-
- def format_signature(self, *args, **kwargs):
- warnings.warn("MemorizedFunc.format_signature will be removed in a "
- "future version of joblib.", DeprecationWarning)
- return format_signature(self.func, *args, **kwargs)
-
- def format_call(self, *args, **kwargs):
- warnings.warn("MemorizedFunc.format_call will be removed in a "
- "future version of joblib.", DeprecationWarning)
- return format_call(self.func, args, kwargs)
-
- #-------------------------------------------------------------------------
- # Private interface
- #-------------------------------------------------------------------------
-
- def _get_argument_hash(self, *args, **kwargs):
- return hashing.hash(filter_args(self.func, self.ignore,
- args, kwargs),
- coerce_mmap=(self.mmap_mode is not None))
-
- def _get_output_dir(self, *args, **kwargs):
- """ Return the directory in which are persisted the result
- of the function called with the given arguments.
- """
- argument_hash = self._get_argument_hash(*args, **kwargs)
- output_dir = os.path.join(self._get_func_dir(self.func),
- argument_hash)
- return output_dir, argument_hash
-
- get_output_dir = _get_output_dir # backward compatibility
-
- def _get_func_dir(self, mkdir=True):
- """ Get the directory corresponding to the cache for the
- function.
- """
- func_dir = _cache_key_to_dir(self.cachedir, self.func, None)
- if mkdir:
- mkdirp(func_dir)
- return func_dir
-
- def _hash_func(self):
- """Hash a function to key the online cache"""
- func_code_h = hash(getattr(self.func, '__code__', None))
- return id(self.func), hash(self.func), func_code_h
-
- def _write_func_code(self, filename, func_code, first_line):
- """ Write the function code and the filename to a file.
- """
- # We store the first line because the filename and the function
- # name is not always enough to identify a function: people
- # sometimes have several functions named the same way in a
- # file. This is bad practice, but joblib should be robust to bad
- # practice.
- func_code = u'%s %i\n%s' % (FIRST_LINE_TEXT, first_line, func_code)
- with io.open(filename, 'w', encoding="UTF-8") as out:
- out.write(func_code)
- # Also store in the in-memory store of function hashes
- is_named_callable = False
- if PY3_OR_LATER:
- is_named_callable = (hasattr(self.func, '__name__')
- and self.func.__name__ != '')
- else:
- is_named_callable = (hasattr(self.func, 'func_name')
- and self.func.func_name != '')
- if is_named_callable:
- # Don't do this for lambda functions or strange callable
- # objects, as it ends up being too fragile
- func_hash = self._hash_func()
- try:
- _FUNCTION_HASHES[self.func] = func_hash
- except TypeError:
- # Some callable are not hashable
- pass
-
- def _check_previous_func_code(self, stacklevel=2):
- """
- stacklevel is the depth a which this function is called, to
- issue useful warnings to the user.
- """
- # First check if our function is in the in-memory store.
- # Using the in-memory store not only makes things faster, but it
- # also renders us robust to variations of the files when the
- # in-memory version of the code does not vary
- try:
- if self.func in _FUNCTION_HASHES:
- # We use as an identifier the id of the function and its
- # hash. This is more likely to falsely change than have hash
- # collisions, thus we are on the safe side.
- func_hash = self._hash_func()
- if func_hash == _FUNCTION_HASHES[self.func]:
- return True
- except TypeError:
- # Some callables are not hashable
- pass
-
- # Here, we go through some effort to be robust to dynamically
- # changing code and collision. We cannot inspect.getsource
- # because it is not reliable when using IPython's magic "%run".
- func_code, source_file, first_line = get_func_code(self.func)
- func_dir = self._get_func_dir()
- func_code_file = os.path.join(func_dir, 'func_code.py')
-
- try:
- with io.open(func_code_file, encoding="UTF-8") as infile:
- old_func_code, old_first_line = \
- extract_first_line(infile.read())
- except IOError:
- self._write_func_code(func_code_file, func_code, first_line)
- return False
- if old_func_code == func_code:
- return True
-
- # We have differing code, is this because we are referring to
- # different functions, or because the function we are referring to has
- # changed?
-
- _, func_name = get_func_name(self.func, resolv_alias=False,
- win_characters=False)
- if old_first_line == first_line == -1 or func_name == '':
- if not first_line == -1:
- func_description = '%s (%s:%i)' % (func_name,
- source_file, first_line)
- else:
- func_description = func_name
- warnings.warn(JobLibCollisionWarning(
- "Cannot detect name collisions for function '%s'"
- % func_description), stacklevel=stacklevel)
-
- # Fetch the code at the old location and compare it. If it is the
- # same than the code store, we have a collision: the code in the
- # file has not changed, but the name we have is pointing to a new
- # code block.
- if not old_first_line == first_line and source_file is not None:
- possible_collision = False
- if os.path.exists(source_file):
- _, func_name = get_func_name(self.func, resolv_alias=False)
- num_lines = len(func_code.split('\n'))
- with open_py_source(source_file) as f:
- on_disk_func_code = f.readlines()[
- old_first_line - 1:old_first_line - 1 + num_lines - 1]
- on_disk_func_code = ''.join(on_disk_func_code)
- possible_collision = (on_disk_func_code.rstrip()
- == old_func_code.rstrip())
- else:
- possible_collision = source_file.startswith(' 10:
- _, func_name = get_func_name(self.func, resolv_alias=False)
- self.warn("Function %s (stored in %s) has changed." %
- (func_name, func_dir))
- self.clear(warn=True)
- return False
-
- def clear(self, warn=True):
- """ Empty the function's cache.
- """
- func_dir = self._get_func_dir(mkdir=False)
- if self._verbose > 0 and warn:
- self.warn("Clearing cache %s" % func_dir)
- if os.path.exists(func_dir):
- shutil.rmtree(func_dir, ignore_errors=True)
- mkdirp(func_dir)
- func_code, _, first_line = get_func_code(self.func)
- func_code_file = os.path.join(func_dir, 'func_code.py')
- self._write_func_code(func_code_file, func_code, first_line)
-
- def call(self, *args, **kwargs):
- """ Force the execution of the function with the given arguments and
- persist the output values.
- """
- start_time = time.time()
- output_dir, _ = self._get_output_dir(*args, **kwargs)
- if self._verbose > 0:
- print(format_call(self.func, args, kwargs))
- output = self.func(*args, **kwargs)
- self._persist_output(output, output_dir)
- duration = time.time() - start_time
- metadata = self._persist_input(output_dir, duration, args, kwargs)
-
- if self._verbose > 0:
- _, name = get_func_name(self.func)
- msg = '%s - %s' % (name, format_time(duration))
- print(max(0, (80 - len(msg))) * '_' + msg)
- return output, metadata
-
- # Make public
- def _persist_output(self, output, dir):
- """ Persist the given output tuple in the directory.
- """
- try:
- mkdirp(dir)
- filename = os.path.join(dir, 'output.pkl')
- numpy_pickle.dump(output, filename, compress=self.compress)
- if self._verbose > 10:
- print('Persisting in %s' % dir)
- except OSError:
- " Race condition in the creation of the directory "
-
- def _persist_input(self, output_dir, duration, args, kwargs,
- this_duration_limit=0.5):
- """ Save a small summary of the call using json format in the
- output directory.
-
- output_dir: string
- directory where to write metadata.
-
- duration: float
- time taken by hashing input arguments, calling the wrapped
- function and persisting its output.
-
- args, kwargs: list and dict
- input arguments for wrapped function
-
- this_duration_limit: float
- Max execution time for this function before issuing a warning.
- """
- start_time = time.time()
- argument_dict = filter_args(self.func, self.ignore,
- args, kwargs)
-
- input_repr = dict((k, repr(v)) for k, v in argument_dict.items())
- # This can fail due to race-conditions with multiple
- # concurrent joblibs removing the file or the directory
- metadata = {"duration": duration, "input_args": input_repr}
- try:
- mkdirp(output_dir)
- with open(os.path.join(output_dir, 'metadata.json'), 'w') as f:
- json.dump(metadata, f)
- except:
- pass
-
- this_duration = time.time() - start_time
- if this_duration > this_duration_limit:
- # This persistence should be fast. It will not be if repr() takes
- # time and its output is large, because json.dump will have to
- # write a large file. This should not be an issue with numpy arrays
- # for which repr() always output a short representation, but can
- # be with complex dictionaries. Fixing the problem should be a
- # matter of replacing repr() above by something smarter.
- warnings.warn("Persisting input arguments took %.2fs to run.\n"
- "If this happens often in your code, it can cause "
- "performance problems \n"
- "(results will be correct in all cases). \n"
- "The reason for this is probably some large input "
- "arguments for a wrapped\n"
- " function (e.g. large strings).\n"
- "THIS IS A JOBLIB ISSUE. If you can, kindly provide "
- "the joblib's team with an\n"
- " example so that they can fix the problem."
- % this_duration, stacklevel=5)
- return metadata
-
- def load_output(self, output_dir):
- """ Read the results of a previous calculation from the directory
- it was cached in.
- """
- warnings.warn("MemorizedFunc.load_output is deprecated and will be "
- "removed in a future version\n"
- "of joblib. A MemorizedResult provides similar features",
- DeprecationWarning)
- # No metadata available here.
- return _load_output(output_dir, _get_func_fullname(self.func),
- timestamp=self.timestamp,
- mmap_mode=self.mmap_mode, verbose=self._verbose)
-
- # XXX: Need a method to check if results are available.
-
- #-------------------------------------------------------------------------
- # Private `object` interface
- #-------------------------------------------------------------------------
-
- def __repr__(self):
- return '%s(func=%s, cachedir=%s)' % (
- self.__class__.__name__,
- self.func,
- repr(self.cachedir),
- )
-
-
-###############################################################################
-# class `Memory`
-###############################################################################
-class Memory(Logger):
- """ A context object for caching a function's return value each time it
- is called with the same input arguments.
-
- All values are cached on the filesystem, in a deep directory
- structure.
-
- see :ref:`memory_reference`
- """
- #-------------------------------------------------------------------------
- # Public interface
- #-------------------------------------------------------------------------
-
- def __init__(self, cachedir, mmap_mode=None, compress=False, verbose=1):
- """
- Parameters
- ----------
- cachedir: string or None
- The path of the base directory to use as a data store
- or None. If None is given, no caching is done and
- the Memory object is completely transparent.
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
- The memmapping mode used when loading from cache
- numpy arrays. See numpy.load for the meaning of the
- arguments.
- compress: boolean, or integer
- Whether to zip the stored data on disk. If an integer is
- given, it should be between 1 and 9, and sets the amount
- of compression. Note that compressed arrays cannot be
- read by memmapping.
- verbose: int, optional
- Verbosity flag, controls the debug messages that are issued
- as functions are evaluated.
- """
- # XXX: Bad explanation of the None value of cachedir
- Logger.__init__(self)
- self._verbose = verbose
- self.mmap_mode = mmap_mode
- self.timestamp = time.time()
- self.compress = compress
- if compress and mmap_mode is not None:
- warnings.warn('Compressed results cannot be memmapped',
- stacklevel=2)
- if cachedir is None:
- self.cachedir = None
- else:
- self.cachedir = os.path.join(cachedir, 'joblib')
- mkdirp(self.cachedir)
-
- def cache(self, func=None, ignore=None, verbose=None,
- mmap_mode=False):
- """ Decorates the given function func to only compute its return
- value for input arguments not cached on disk.
-
- Parameters
- ----------
- func: callable, optional
- The function to be decorated
- ignore: list of strings
- A list of arguments name to ignore in the hashing
- verbose: integer, optional
- The verbosity mode of the function. By default that
- of the memory object is used.
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
- The memmapping mode used when loading from cache
- numpy arrays. See numpy.load for the meaning of the
- arguments. By default that of the memory object is used.
-
- Returns
- -------
- decorated_func: MemorizedFunc object
- The returned object is a MemorizedFunc object, that is
- callable (behaves like a function), but offers extra
- methods for cache lookup and management. See the
- documentation for :class:`joblib.memory.MemorizedFunc`.
- """
- if func is None:
- # Partial application, to be able to specify extra keyword
- # arguments in decorators
- return functools.partial(self.cache, ignore=ignore,
- verbose=verbose, mmap_mode=mmap_mode)
- if self.cachedir is None:
- return NotMemorizedFunc(func)
- if verbose is None:
- verbose = self._verbose
- if mmap_mode is False:
- mmap_mode = self.mmap_mode
- if isinstance(func, MemorizedFunc):
- func = func.func
- return MemorizedFunc(func, cachedir=self.cachedir,
- mmap_mode=mmap_mode,
- ignore=ignore,
- compress=self.compress,
- verbose=verbose,
- timestamp=self.timestamp)
-
- def clear(self, warn=True):
- """ Erase the complete cache directory.
- """
- if warn:
- self.warn('Flushing completely the cache')
- if self.cachedir is not None:
- rm_subdirs(self.cachedir)
-
- def eval(self, func, *args, **kwargs):
- """ Eval function func with arguments `*args` and `**kwargs`,
- in the context of the memory.
-
- This method works similarly to the builtin `apply`, except
- that the function is called only if the cache is not
- up to date.
-
- """
- if self.cachedir is None:
- return func(*args, **kwargs)
- return self.cache(func)(*args, **kwargs)
-
- #-------------------------------------------------------------------------
- # Private `object` interface
- #-------------------------------------------------------------------------
-
- def __repr__(self):
- return '%s(cachedir=%s)' % (
- self.__class__.__name__,
- repr(self.cachedir),
- )
-
- def __reduce__(self):
- """ We don't store the timestamp when pickling, to avoid the hash
- depending from it.
- In addition, when unpickling, we run the __init__
- """
- # We need to remove 'joblib' from the end of cachedir
- cachedir = self.cachedir[:-7] if self.cachedir is not None else None
- return (self.__class__, (cachedir,
- self.mmap_mode, self.compress, self._verbose))
diff --git a/mloop/localsklearn/externals/joblib/my_exceptions.py b/mloop/localsklearn/externals/joblib/my_exceptions.py
deleted file mode 100644
index 28f31dd..0000000
--- a/mloop/localsklearn/externals/joblib/my_exceptions.py
+++ /dev/null
@@ -1,112 +0,0 @@
-"""
-Exceptions
-"""
-# Author: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
-# Copyright: 2010, Gael Varoquaux
-# License: BSD 3 clause
-
-import sys
-
-from ._compat import PY3_OR_LATER
-
-class JoblibException(Exception):
- """A simple exception with an error message that you can get to."""
- def __init__(self, *args):
- # We need to implement __init__ so that it is picked in the
- # multiple heritance hierarchy in the class created in
- # _mk_exception. Note: in Python 2, if you implement __init__
- # in your exception class you need to set .args correctly,
- # otherwise you can dump an exception instance with pickle but
- # not load it (at load time an empty .args will be passed to
- # the constructor). Also we want to be explicit and not use
- # 'super' here. Using 'super' can cause a sibling class method
- # to be called and we have no control the sibling class method
- # constructor signature in the exception returned by
- # _mk_exception.
- Exception.__init__(self, *args)
-
- def __repr__(self):
- if hasattr(self, 'args') and len(self.args) > 0:
- message = self.args[0]
- else:
- message = ''
-
- name = self.__class__.__name__
- return '%s\n%s\n%s\n%s' % (name, 75 * '_', message, 75 * '_')
-
- __str__ = __repr__
-
-
-class TransportableException(JoblibException):
- """An exception containing all the info to wrap an original
- exception and recreate it.
- """
-
- def __init__(self, message, etype):
- # The next line set the .args correctly. This is needed to
- # make the exception loadable with pickle
- JoblibException.__init__(self, message, etype)
- self.message = message
- self.etype = etype
-
-
-class WorkerInterrupt(Exception):
- """ An exception that is not KeyboardInterrupt to allow subprocesses
- to be interrupted.
- """
- pass
-
-
-_exception_mapping = dict()
-
-
-def _mk_exception(exception, name=None):
- # Create an exception inheriting from both JoblibException
- # and that exception
- if name is None:
- name = exception.__name__
- this_name = 'Joblib%s' % name
- if this_name in _exception_mapping:
- # Avoid creating twice the same exception
- this_exception = _exception_mapping[this_name]
- else:
- if exception is Exception:
- # JoblibException is already a subclass of Exception. No
- # need to use multiple inheritance
- return JoblibException, this_name
- try:
- this_exception = type(
- this_name, (JoblibException, exception), {})
- _exception_mapping[this_name] = this_exception
- except TypeError:
- # This happens if "Cannot create a consistent method
- # resolution order", e.g. because 'exception' is a
- # subclass of JoblibException or 'exception' is not an
- # acceptable base class
- this_exception = JoblibException
-
- return this_exception, this_name
-
-
-def _mk_common_exceptions():
- namespace = dict()
- if PY3_OR_LATER:
- import builtins as _builtin_exceptions
- common_exceptions = filter(
- lambda x: x.endswith('Error'),
- dir(_builtin_exceptions))
- else:
- import exceptions as _builtin_exceptions
- common_exceptions = dir(_builtin_exceptions)
-
- for name in common_exceptions:
- obj = getattr(_builtin_exceptions, name)
- if isinstance(obj, type) and issubclass(obj, BaseException):
- this_obj, this_name = _mk_exception(obj, name=name)
- namespace[this_name] = this_obj
- return namespace
-
-
-# Updating module locals so that the exceptions pickle right. AFAIK this
-# works only at module-creation time
-locals().update(_mk_common_exceptions())
diff --git a/mloop/localsklearn/externals/joblib/numpy_pickle.py b/mloop/localsklearn/externals/joblib/numpy_pickle.py
deleted file mode 100644
index 0cf88a2..0000000
--- a/mloop/localsklearn/externals/joblib/numpy_pickle.py
+++ /dev/null
@@ -1,577 +0,0 @@
-"""Utilities for fast persistence of big data, with optional compression."""
-
-# Author: Gael Varoquaux
-# Copyright (c) 2009 Gael Varoquaux
-# License: BSD Style, 3 clauses.
-
-import pickle
-import os
-import sys
-import warnings
-try:
- from pathlib import Path
-except ImportError:
- Path = None
-
-from .numpy_pickle_utils import _COMPRESSORS
-from .numpy_pickle_utils import BinaryZlibFile
-from .numpy_pickle_utils import Unpickler, Pickler
-from .numpy_pickle_utils import _read_fileobject, _write_fileobject
-from .numpy_pickle_utils import _read_bytes, BUFFER_SIZE
-from .numpy_pickle_compat import load_compatibility
-from .numpy_pickle_compat import NDArrayWrapper
-# For compatibility with old versions of joblib, we need ZNDArrayWrapper
-# to be visible in the current namespace.
-# Explicitly skipping next line from flake8 as it triggers an F401 warning
-# which we don't care.
-from .numpy_pickle_compat import ZNDArrayWrapper # noqa
-from ._compat import _basestring, PY3_OR_LATER
-
-###############################################################################
-# Utility objects for persistence.
-
-
-class NumpyArrayWrapper(object):
- """An object to be persisted instead of numpy arrays.
-
- This object is used to hack into the pickle machinery and read numpy
- array data from our custom persistence format.
- More precisely, this object is used for:
- * carrying the information of the persisted array: subclass, shape, order,
- dtype. Those ndarray metadata are used to correctly reconstruct the array
- with low level numpy functions.
- * determining if memmap is allowed on the array.
- * reading the array bytes from a file.
- * reading the array using memorymap from a file.
- * writing the array bytes to a file.
-
- Attributes
- ----------
- subclass: numpy.ndarray subclass
- Determine the subclass of the wrapped array.
- shape: numpy.ndarray shape
- Determine the shape of the wrapped array.
- order: {'C', 'F'}
- Determine the order of wrapped array data. 'C' is for C order, 'F' is
- for fortran order.
- dtype: numpy.ndarray dtype
- Determine the data type of the wrapped array.
- allow_mmap: bool
- Determine if memory mapping is allowed on the wrapped array.
- Default: False.
- """
-
- def __init__(self, subclass, shape, order, dtype, allow_mmap=False):
- """Constructor. Store the useful information for later."""
- self.subclass = subclass
- self.shape = shape
- self.order = order
- self.dtype = dtype
- self.allow_mmap = allow_mmap
-
- def write_array(self, array, pickler):
- """Write array bytes to pickler file handle.
-
- This function is an adaptation of the numpy write_array function
- available in version 1.10.1 in numpy/lib/format.py.
- """
- # Set buffer size to 16 MiB to hide the Python loop overhead.
- buffersize = max(16 * 1024 ** 2 // array.itemsize, 1)
- if array.dtype.hasobject:
- # We contain Python objects so we cannot write out the data
- # directly. Instead, we will pickle it out with version 2 of the
- # pickle protocol.
- pickle.dump(array, pickler.file_handle, protocol=2)
- else:
- for chunk in pickler.np.nditer(array,
- flags=['external_loop',
- 'buffered',
- 'zerosize_ok'],
- buffersize=buffersize,
- order=self.order):
- pickler.file_handle.write(chunk.tostring('C'))
-
- def read_array(self, unpickler):
- """Read array from unpickler file handle.
-
- This function is an adaptation of the numpy read_array function
- available in version 1.10.1 in numpy/lib/format.py.
- """
- if len(self.shape) == 0:
- count = 1
- else:
- count = unpickler.np.multiply.reduce(self.shape)
- # Now read the actual data.
- if self.dtype.hasobject:
- # The array contained Python objects. We need to unpickle the data.
- array = pickle.load(unpickler.file_handle)
- else:
- if (not PY3_OR_LATER and
- unpickler.np.compat.isfileobj(unpickler.file_handle)):
- # In python 2, gzip.GzipFile is considered as a file so one
- # can use numpy.fromfile().
- # For file objects, use np.fromfile function.
- # This function is faster than the memory-intensive
- # method below.
- array = unpickler.np.fromfile(unpickler.file_handle,
- dtype=self.dtype, count=count)
- else:
- # This is not a real file. We have to read it the
- # memory-intensive way.
- # crc32 module fails on reads greater than 2 ** 32 bytes,
- # breaking large reads from gzip streams. Chunk reads to
- # BUFFER_SIZE bytes to avoid issue and reduce memory overhead
- # of the read. In non-chunked case count < max_read_count, so
- # only one read is performed.
- max_read_count = BUFFER_SIZE // min(BUFFER_SIZE,
- self.dtype.itemsize)
-
- array = unpickler.np.empty(count, dtype=self.dtype)
- for i in range(0, count, max_read_count):
- read_count = min(max_read_count, count - i)
- read_size = int(read_count * self.dtype.itemsize)
- data = _read_bytes(unpickler.file_handle,
- read_size, "array data")
- array[i:i + read_count] = \
- unpickler.np.frombuffer(data, dtype=self.dtype,
- count=read_count)
- del data
-
- if self.order == 'F':
- array.shape = self.shape[::-1]
- array = array.transpose()
- else:
- array.shape = self.shape
-
- return array
-
- def read_mmap(self, unpickler):
- """Read an array using numpy memmap."""
- offset = unpickler.file_handle.tell()
- if unpickler.mmap_mode == 'w+':
- unpickler.mmap_mode = 'r+'
-
- marray = unpickler.np.memmap(unpickler.filename,
- dtype=self.dtype,
- shape=self.shape,
- order=self.order,
- mode=unpickler.mmap_mode,
- offset=offset)
- # update the offset so that it corresponds to the end of the read array
- unpickler.file_handle.seek(offset + marray.nbytes)
-
- return marray
-
- def read(self, unpickler):
- """Read the array corresponding to this wrapper.
-
- Use the unpickler to get all information to correctly read the array.
-
- Parameters
- ----------
- unpickler: NumpyUnpickler
-
- Returns
- -------
- array: numpy.ndarray
-
- """
- # When requested, only use memmap mode if allowed.
- if unpickler.mmap_mode is not None and self.allow_mmap:
- array = self.read_mmap(unpickler)
- else:
- array = self.read_array(unpickler)
-
- # Manage array subclass case
- if (hasattr(array, '__array_prepare__') and
- self.subclass not in (unpickler.np.ndarray,
- unpickler.np.memmap)):
- # We need to reconstruct another subclass
- new_array = unpickler.np.core.multiarray._reconstruct(
- self.subclass, (0,), 'b')
- return new_array.__array_prepare__(array)
- else:
- return array
-
-###############################################################################
-# Pickler classes
-
-
-class NumpyPickler(Pickler):
- """A pickler to persist big data efficiently.
-
- The main features of this object are:
- * persistence of numpy arrays in a single file.
- * optional compression with a special care on avoiding memory copies.
-
- Attributes
- ----------
- fp: file
- File object handle used for serializing the input object.
- protocol: int
- Pickle protocol used. Default is pickle.DEFAULT_PROTOCOL under
- python 3, pickle.HIGHEST_PROTOCOL otherwise.
- """
-
- dispatch = Pickler.dispatch.copy()
-
- def __init__(self, fp, protocol=None):
- self.file_handle = fp
- self.buffered = isinstance(self.file_handle, BinaryZlibFile)
-
- # By default we want a pickle protocol that only changes with
- # the major python version and not the minor one
- if protocol is None:
- protocol = (pickle.DEFAULT_PROTOCOL if PY3_OR_LATER
- else pickle.HIGHEST_PROTOCOL)
-
- Pickler.__init__(self, self.file_handle, protocol=protocol)
- # delayed import of numpy, to avoid tight coupling
- try:
- import numpy as np
- except ImportError:
- np = None
- self.np = np
-
- def _create_array_wrapper(self, array):
- """Create and returns a numpy array wrapper from a numpy array."""
- order = 'F' if (array.flags.f_contiguous and
- not array.flags.c_contiguous) else 'C'
- allow_mmap = not self.buffered and not array.dtype.hasobject
- wrapper = NumpyArrayWrapper(type(array),
- array.shape, order, array.dtype,
- allow_mmap=allow_mmap)
-
- return wrapper
-
- def save(self, obj):
- """Subclass the Pickler `save` method.
-
- This is a total abuse of the Pickler class in order to use the numpy
- persistence function `save` instead of the default pickle
- implementation. The numpy array is replaced by a custom wrapper in the
- pickle persistence stack and the serialized array is written right
- after in the file. Warning: the file produced does not follow the
- pickle format. As such it can not be read with `pickle.load`.
- """
- if self.np is not None and type(obj) in (self.np.ndarray,
- self.np.matrix,
- self.np.memmap):
- if type(obj) is self.np.memmap:
- # Pickling doesn't work with memmapped arrays
- obj = self.np.asanyarray(obj)
-
- # The array wrapper is pickled instead of the real array.
- wrapper = self._create_array_wrapper(obj)
- Pickler.save(self, wrapper)
-
- # A framer was introduced with pickle protocol 4 and we want to
- # ensure the wrapper object is written before the numpy array
- # buffer in the pickle file.
- # See https://www.python.org/dev/peps/pep-3154/#framing to get
- # more information on the framer behavior.
- if self.proto >= 4:
- self.framer.commit_frame(force=True)
-
- # And then array bytes are written right after the wrapper.
- wrapper.write_array(obj, self)
- return
-
- return Pickler.save(self, obj)
-
-
-class NumpyUnpickler(Unpickler):
- """A subclass of the Unpickler to unpickle our numpy pickles.
-
- Attributes
- ----------
- mmap_mode: str
- The memorymap mode to use for reading numpy arrays.
- file_handle: file_like
- File object to unpickle from.
- filename: str
- Name of the file to unpickle from. It should correspond to file_handle.
- This parameter is required when using mmap_mode.
- np: module
- Reference to numpy module if numpy is installed else None.
-
- """
-
- dispatch = Unpickler.dispatch.copy()
-
- def __init__(self, filename, file_handle, mmap_mode=None):
- # The next line is for backward compatibility with pickle generated
- # with joblib versions less than 0.10.
- self._dirname = os.path.dirname(filename)
-
- self.mmap_mode = mmap_mode
- self.file_handle = file_handle
- # filename is required for numpy mmap mode.
- self.filename = filename
- self.compat_mode = False
- Unpickler.__init__(self, self.file_handle)
- try:
- import numpy as np
- except ImportError:
- np = None
- self.np = np
-
- def load_build(self):
- """Called to set the state of a newly created object.
-
- We capture it to replace our place-holder objects, NDArrayWrapper or
- NumpyArrayWrapper, by the array we are interested in. We
- replace them directly in the stack of pickler.
- NDArrayWrapper is used for backward compatibility with joblib <= 0.9.
- """
- Unpickler.load_build(self)
-
- # For backward compatibility, we support NDArrayWrapper objects.
- if isinstance(self.stack[-1], (NDArrayWrapper, NumpyArrayWrapper)):
- if self.np is None:
- raise ImportError("Trying to unpickle an ndarray, "
- "but numpy didn't import correctly")
- array_wrapper = self.stack.pop()
- # If any NDArrayWrapper is found, we switch to compatibility mode,
- # this will be used to raise a DeprecationWarning to the user at
- # the end of the unpickling.
- if isinstance(array_wrapper, NDArrayWrapper):
- self.compat_mode = True
- self.stack.append(array_wrapper.read(self))
-
- # Be careful to register our new method.
- if PY3_OR_LATER:
- dispatch[pickle.BUILD[0]] = load_build
- else:
- dispatch[pickle.BUILD] = load_build
-
-
-###############################################################################
-# Utility functions
-
-def dump(value, filename, compress=0, protocol=None, cache_size=None):
- """Persist an arbitrary Python object into one file.
-
- Parameters
- -----------
- value: any Python object
- The object to store to disk.
- filename: str or pathlib.Path
- The path of the file in which it is to be stored. The compression
- method corresponding to one of the supported filename extensions ('.z',
- '.gz', '.bz2', '.xz' or '.lzma') will be used automatically.
- compress: int from 0 to 9 or bool or 2-tuple, optional
- Optional compression level for the data. 0 or False is no compression.
- Higher value means more compression, but also slower read and
- write times. Using a value of 3 is often a good compromise.
- See the notes for more details.
- If compress is True, the compression level used is 3.
- If compress is a 2-tuple, the first element must correspond to a string
- between supported compressors (e.g 'zlib', 'gzip', 'bz2', 'lzma'
- 'xz'), the second element must be an integer from 0 to 9, corresponding
- to the compression level.
- protocol: positive int
- Pickle protocol, see pickle.dump documentation for more details.
- cache_size: positive int, optional
- This option is deprecated in 0.10 and has no effect.
-
- Returns
- -------
- filenames: list of strings
- The list of file names in which the data is stored. If
- compress is false, each array is stored in a different file.
-
- See Also
- --------
- joblib.load : corresponding loader
-
- Notes
- -----
- Memmapping on load cannot be used for compressed files. Thus
- using compression can significantly slow down loading. In
- addition, compressed files take extra extra memory during
- dump and load.
-
- """
-
- if Path is not None and isinstance(filename, Path):
- filename = str(filename)
-
- is_filename = isinstance(filename, _basestring)
- is_fileobj = hasattr(filename, "write")
-
- compress_method = 'zlib' # zlib is the default compression method.
- if compress is True:
- # By default, if compress is enabled, we want to be using 3 by default
- compress_level = 3
- elif isinstance(compress, tuple):
- # a 2-tuple was set in compress
- if len(compress) != 2:
- raise ValueError(
- 'Compress argument tuple should contain exactly 2 elements: '
- '(compress method, compress level), you passed {0}'
- .format(compress))
- compress_method, compress_level = compress
- else:
- compress_level = compress
-
- if compress_level is not False and compress_level not in range(10):
- # Raising an error if a non valid compress level is given.
- raise ValueError(
- 'Non valid compress level given: "{0}". Possible values are '
- '{1}.'.format(compress_level, list(range(10))))
-
- if compress_method not in _COMPRESSORS:
- # Raising an error if an unsupported compression method is given.
- raise ValueError(
- 'Non valid compression method given: "{0}". Possible values are '
- '{1}.'.format(compress_method, _COMPRESSORS))
-
- if not is_filename and not is_fileobj:
- # People keep inverting arguments, and the resulting error is
- # incomprehensible
- raise ValueError(
- 'Second argument should be a filename or a file-like object, '
- '%s (type %s) was given.'
- % (filename, type(filename))
- )
-
- if is_filename and not isinstance(compress, tuple):
- # In case no explicit compression was requested using both compression
- # method and level in a tuple and the filename has an explicit
- # extension, we select the corresponding compressor.
- if filename.endswith('.z'):
- compress_method = 'zlib'
- elif filename.endswith('.gz'):
- compress_method = 'gzip'
- elif filename.endswith('.bz2'):
- compress_method = 'bz2'
- elif filename.endswith('.lzma'):
- compress_method = 'lzma'
- elif filename.endswith('.xz'):
- compress_method = 'xz'
- else:
- # no matching compression method found, we unset the variable to
- # be sure no compression level is set afterwards.
- compress_method = None
-
- if compress_method in _COMPRESSORS and compress_level == 0:
- # we choose a default compress_level of 3 in case it was not given
- # as an argument (using compress).
- compress_level = 3
-
- if not PY3_OR_LATER and compress_method in ('lzma', 'xz'):
- raise NotImplementedError("{0} compression is only available for "
- "python version >= 3.3. You are using "
- "{1}.{2}".format(compress_method,
- sys.version_info[0],
- sys.version_info[1]))
-
- if cache_size is not None:
- # Cache size is deprecated starting from version 0.10
- warnings.warn("Please do not set 'cache_size' in joblib.dump, "
- "this parameter has no effect and will be removed. "
- "You used 'cache_size={0}'".format(cache_size),
- DeprecationWarning, stacklevel=2)
-
- if compress_level != 0:
- with _write_fileobject(filename, compress=(compress_method,
- compress_level)) as f:
- NumpyPickler(f, protocol=protocol).dump(value)
- elif is_filename:
- with open(filename, 'wb') as f:
- NumpyPickler(f, protocol=protocol).dump(value)
- else:
- NumpyPickler(filename, protocol=protocol).dump(value)
-
- # If the target container is a file object, nothing is returned.
- if is_fileobj:
- return
-
- # For compatibility, the list of created filenames (e.g with one element
- # after 0.10.0) is returned by default.
- return [filename]
-
-
-def _unpickle(fobj, filename="", mmap_mode=None):
- """Internal unpickling function."""
- # We are careful to open the file handle early and keep it open to
- # avoid race-conditions on renames.
- # That said, if data is stored in companion files, which can be
- # the case with the old persistence format, moving the directory
- # will create a race when joblib tries to access the companion
- # files.
- unpickler = NumpyUnpickler(filename, fobj, mmap_mode=mmap_mode)
- obj = None
- try:
- obj = unpickler.load()
- if unpickler.compat_mode:
- warnings.warn("The file '%s' has been generated with a "
- "joblib version less than 0.10. "
- "Please regenerate this pickle file."
- % filename,
- DeprecationWarning, stacklevel=3)
- except UnicodeDecodeError as exc:
- # More user-friendly error message
- if PY3_OR_LATER:
- new_exc = ValueError(
- 'You may be trying to read with '
- 'python 3 a joblib pickle generated with python 2. '
- 'This feature is not supported by joblib.')
- new_exc.__cause__ = exc
- raise new_exc
- # Reraise exception with Python 2
- raise
-
- return obj
-
-
-def load(filename, mmap_mode=None):
- """Reconstruct a Python object from a file persisted with joblib.dump.
-
- Parameters
- -----------
- filename: str or pathlib.Path
- The path of the file from which to load the object
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
- If not None, the arrays are memory-mapped from the disk. This
- mode has no effect for compressed files. Note that in this
- case the reconstructed object might not longer match exactly
- the originally pickled object.
-
- Returns
- -------
- result: any Python object
- The object stored in the file.
-
- See Also
- --------
- joblib.dump : function to save an object
-
- Notes
- -----
-
- This function can load numpy array files saved separately during the
- dump. If the mmap_mode argument is given, it is passed to np.load and
- arrays are loaded as memmaps. As a consequence, the reconstructed
- object might not match the original pickled object. Note that if the
- file was saved with compression, the arrays cannot be memmaped.
- """
- if Path is not None and isinstance(filename, Path):
- filename = str(filename)
-
- if hasattr(filename, "read") and hasattr(filename, "seek"):
- with _read_fileobject(filename, "", mmap_mode) as fobj:
- obj = _unpickle(fobj)
- else:
- with open(filename, 'rb') as f:
- with _read_fileobject(f, filename, mmap_mode) as fobj:
- if isinstance(fobj, _basestring):
- # if the returned file object is a string, this means we
- # try to load a pickle file generated with an version of
- # Joblib so we load it with joblib compatibility function.
- return load_compatibility(fobj)
-
- obj = _unpickle(fobj, filename, mmap_mode)
-
- return obj
diff --git a/mloop/localsklearn/externals/joblib/numpy_pickle_compat.py b/mloop/localsklearn/externals/joblib/numpy_pickle_compat.py
deleted file mode 100644
index 150d8f4..0000000
--- a/mloop/localsklearn/externals/joblib/numpy_pickle_compat.py
+++ /dev/null
@@ -1,239 +0,0 @@
-"""Numpy pickle compatibility functions."""
-
-import pickle
-import os
-import zlib
-from io import BytesIO
-
-from ._compat import PY3_OR_LATER
-from .numpy_pickle_utils import _ZFILE_PREFIX
-from .numpy_pickle_utils import Unpickler
-
-
-def hex_str(an_int):
- """Convert an int to an hexadecimal string."""
- return '{0:#x}'.format(an_int)
-
-if PY3_OR_LATER:
- def asbytes(s):
- if isinstance(s, bytes):
- return s
- return s.encode('latin1')
-else:
- asbytes = str
-
-_MAX_LEN = len(hex_str(2 ** 64))
-_CHUNK_SIZE = 64 * 1024
-
-
-def read_zfile(file_handle):
- """Read the z-file and return the content as a string.
-
- Z-files are raw data compressed with zlib used internally by joblib
- for persistence. Backward compatibility is not guaranteed. Do not
- use for external purposes.
- """
- file_handle.seek(0)
- header_length = len(_ZFILE_PREFIX) + _MAX_LEN
- length = file_handle.read(header_length)
- length = length[len(_ZFILE_PREFIX):]
- length = int(length, 16)
-
- # With python2 and joblib version <= 0.8.4 compressed pickle header is one
- # character wider so we need to ignore an additional space if present.
- # Note: the first byte of the zlib data is guaranteed not to be a
- # space according to
- # https://tools.ietf.org/html/rfc6713#section-2.1
- next_byte = file_handle.read(1)
- if next_byte != b' ':
- # The zlib compressed data has started and we need to go back
- # one byte
- file_handle.seek(header_length)
-
- # We use the known length of the data to tell Zlib the size of the
- # buffer to allocate.
- data = zlib.decompress(file_handle.read(), 15, length)
- assert len(data) == length, (
- "Incorrect data length while decompressing %s."
- "The file could be corrupted." % file_handle)
- return data
-
-
-def write_zfile(file_handle, data, compress=1):
- """Write the data in the given file as a Z-file.
-
- Z-files are raw data compressed with zlib used internally by joblib
- for persistence. Backward compatibility is not guarantied. Do not
- use for external purposes.
- """
- file_handle.write(_ZFILE_PREFIX)
- length = hex_str(len(data))
- # Store the length of the data
- file_handle.write(asbytes(length.ljust(_MAX_LEN)))
- file_handle.write(zlib.compress(asbytes(data), compress))
-
-###############################################################################
-# Utility objects for persistence.
-
-
-class NDArrayWrapper(object):
- """An object to be persisted instead of numpy arrays.
-
- The only thing this object does, is to carry the filename in which
- the array has been persisted, and the array subclass.
- """
-
- def __init__(self, filename, subclass, allow_mmap=True):
- """Constructor. Store the useful information for later."""
- self.filename = filename
- self.subclass = subclass
- self.allow_mmap = allow_mmap
-
- def read(self, unpickler):
- """Reconstruct the array."""
- filename = os.path.join(unpickler._dirname, self.filename)
- # Load the array from the disk
- # use getattr instead of self.allow_mmap to ensure backward compat
- # with NDArrayWrapper instances pickled with joblib < 0.9.0
- allow_mmap = getattr(self, 'allow_mmap', True)
- memmap_kwargs = ({} if not allow_mmap
- else {'mmap_mode': unpickler.mmap_mode})
- array = unpickler.np.load(filename, **memmap_kwargs)
- # Reconstruct subclasses. This does not work with old
- # versions of numpy
- if (hasattr(array, '__array_prepare__') and
- self.subclass not in (unpickler.np.ndarray,
- unpickler.np.memmap)):
- # We need to reconstruct another subclass
- new_array = unpickler.np.core.multiarray._reconstruct(
- self.subclass, (0,), 'b')
- return new_array.__array_prepare__(array)
- else:
- return array
-
-
-class ZNDArrayWrapper(NDArrayWrapper):
- """An object to be persisted instead of numpy arrays.
-
- This object store the Zfile filename in which
- the data array has been persisted, and the meta information to
- retrieve it.
- The reason that we store the raw buffer data of the array and
- the meta information, rather than array representation routine
- (tostring) is that it enables us to use completely the strided
- model to avoid memory copies (a and a.T store as fast). In
- addition saving the heavy information separately can avoid
- creating large temporary buffers when unpickling data with
- large arrays.
- """
-
- def __init__(self, filename, init_args, state):
- """Constructor. Store the useful information for later."""
- self.filename = filename
- self.state = state
- self.init_args = init_args
-
- def read(self, unpickler):
- """Reconstruct the array from the meta-information and the z-file."""
- # Here we a simply reproducing the unpickling mechanism for numpy
- # arrays
- filename = os.path.join(unpickler._dirname, self.filename)
- array = unpickler.np.core.multiarray._reconstruct(*self.init_args)
- with open(filename, 'rb') as f:
- data = read_zfile(f)
- state = self.state + (data,)
- array.__setstate__(state)
- return array
-
-
-class ZipNumpyUnpickler(Unpickler):
- """A subclass of the Unpickler to unpickle our numpy pickles."""
-
- dispatch = Unpickler.dispatch.copy()
-
- def __init__(self, filename, file_handle, mmap_mode=None):
- """Constructor."""
- self._filename = os.path.basename(filename)
- self._dirname = os.path.dirname(filename)
- self.mmap_mode = mmap_mode
- self.file_handle = self._open_pickle(file_handle)
- Unpickler.__init__(self, self.file_handle)
- try:
- import numpy as np
- except ImportError:
- np = None
- self.np = np
-
- def _open_pickle(self, file_handle):
- return BytesIO(read_zfile(file_handle))
-
- def load_build(self):
- """Set the state of a newly created object.
-
- We capture it to replace our place-holder objects,
- NDArrayWrapper, by the array we are interested in. We
- replace them directly in the stack of pickler.
- """
- Unpickler.load_build(self)
- if isinstance(self.stack[-1], NDArrayWrapper):
- if self.np is None:
- raise ImportError("Trying to unpickle an ndarray, "
- "but numpy didn't import correctly")
- nd_array_wrapper = self.stack.pop()
- array = nd_array_wrapper.read(self)
- self.stack.append(array)
-
- # Be careful to register our new method.
- if PY3_OR_LATER:
- dispatch[pickle.BUILD[0]] = load_build
- else:
- dispatch[pickle.BUILD] = load_build
-
-
-def load_compatibility(filename):
- """Reconstruct a Python object from a file persisted with joblib.dump.
-
- This function ensures the compatibility with joblib old persistence format
- (<= 0.9.3).
-
- Parameters
- -----------
- filename: string
- The name of the file from which to load the object
-
- Returns
- -------
- result: any Python object
- The object stored in the file.
-
- See Also
- --------
- joblib.dump : function to save an object
-
- Notes
- -----
-
- This function can load numpy array files saved separately during the
- dump.
- """
- with open(filename, 'rb') as file_handle:
- # We are careful to open the file handle early and keep it open to
- # avoid race-conditions on renames. That said, if data is stored in
- # companion files, moving the directory will create a race when
- # joblib tries to access the companion files.
- unpickler = ZipNumpyUnpickler(filename, file_handle=file_handle)
- try:
- obj = unpickler.load()
- except UnicodeDecodeError as exc:
- # More user-friendly error message
- if PY3_OR_LATER:
- new_exc = ValueError(
- 'You may be trying to read with '
- 'python 3 a joblib pickle generated with python 2. '
- 'This feature is not supported by joblib.')
- new_exc.__cause__ = exc
- raise new_exc
- finally:
- if hasattr(unpickler, 'file_handle'):
- unpickler.file_handle.close()
- return obj
diff --git a/mloop/localsklearn/externals/joblib/numpy_pickle_utils.py b/mloop/localsklearn/externals/joblib/numpy_pickle_utils.py
deleted file mode 100644
index ee879a6..0000000
--- a/mloop/localsklearn/externals/joblib/numpy_pickle_utils.py
+++ /dev/null
@@ -1,623 +0,0 @@
-"""Utilities for fast persistence of big data, with optional compression."""
-
-# Author: Gael Varoquaux
-# Copyright (c) 2009 Gael Varoquaux
-# License: BSD Style, 3 clauses.
-
-import pickle
-import sys
-import io
-import zlib
-import gzip
-import bz2
-import warnings
-import contextlib
-from contextlib import closing
-
-from ._compat import PY3_OR_LATER, PY26, PY27
-
-try:
- from threading import RLock
-except ImportError:
- from dummy_threading import RLock
-
-if PY3_OR_LATER:
- Unpickler = pickle._Unpickler
- Pickler = pickle._Pickler
- xrange = range
-else:
- Unpickler = pickle.Unpickler
- Pickler = pickle.Pickler
-
-try:
- import numpy as np
-except ImportError:
- np = None
-
-try:
- import lzma
-except ImportError:
- lzma = None
-
-
-# Magic numbers of supported compression file formats. '
-_ZFILE_PREFIX = b'ZF' # used with pickle files created before 0.9.3.
-_ZLIB_PREFIX = b'\x78'
-_GZIP_PREFIX = b'\x1f\x8b'
-_BZ2_PREFIX = b'BZ'
-_XZ_PREFIX = b'\xfd\x37\x7a\x58\x5a'
-_LZMA_PREFIX = b'\x5d\x00'
-
-# Supported compressors
-_COMPRESSORS = ('zlib', 'bz2', 'lzma', 'xz', 'gzip')
-_COMPRESSOR_CLASSES = [gzip.GzipFile, bz2.BZ2File]
-if lzma is not None:
- _COMPRESSOR_CLASSES.append(lzma.LZMAFile)
-
-# The max magic number length of supported compression file types.
-_MAX_PREFIX_LEN = max(len(prefix)
- for prefix in (_ZFILE_PREFIX, _GZIP_PREFIX, _BZ2_PREFIX,
- _XZ_PREFIX, _LZMA_PREFIX))
-
-# Buffer size used in io.BufferedReader and io.BufferedWriter
-_IO_BUFFER_SIZE = 1024 ** 2
-
-
-###############################################################################
-# Cache file utilities
-def _detect_compressor(fileobj):
- """Return the compressor matching fileobj.
-
- Parameters
- ----------
- fileobj: file object
-
- Returns
- -------
- str in {'zlib', 'gzip', 'bz2', 'lzma', 'xz', 'compat', 'not-compressed'}
- """
- # Ensure we read the first bytes.
- fileobj.seek(0)
- first_bytes = fileobj.read(_MAX_PREFIX_LEN)
- fileobj.seek(0)
-
- if first_bytes.startswith(_ZLIB_PREFIX):
- return "zlib"
- elif first_bytes.startswith(_GZIP_PREFIX):
- return "gzip"
- elif first_bytes.startswith(_BZ2_PREFIX):
- return "bz2"
- elif first_bytes.startswith(_LZMA_PREFIX):
- return "lzma"
- elif first_bytes.startswith(_XZ_PREFIX):
- return "xz"
- elif first_bytes.startswith(_ZFILE_PREFIX):
- return "compat"
-
- return "not-compressed"
-
-
-def _buffered_read_file(fobj):
- """Return a buffered version of a read file object."""
- if PY26 or (PY27 and isinstance(fobj, bz2.BZ2File)):
- # Python 2.6 doesn't fully support io.BufferedReader.
- # Python 2.7 doesn't work with BZ2File through a buffer: "no
- # attribute 'readable'" error.
- return fobj
- else:
- return io.BufferedReader(fobj, buffer_size=_IO_BUFFER_SIZE)
-
-
-def _buffered_write_file(fobj):
- """Return a buffered version of a write file object."""
- if PY26 or (PY27 and isinstance(fobj, bz2.BZ2File)):
- # Python 2.6 doesn't fully support io.BufferedWriter.
- # Python 2.7 doesn't work with BZ2File through a buffer: no attribute
- # 'writable'.
- # BZ2File doesn't implement the file object context manager in python 2
- # so we wrap the fileobj using `closing`.
- return closing(fobj)
- else:
- return io.BufferedWriter(fobj, buffer_size=_IO_BUFFER_SIZE)
-
-
-@contextlib.contextmanager
-def _read_fileobject(fileobj, filename, mmap_mode=None):
- """Utility function opening the right fileobject from a filename.
-
- The magic number is used to choose between the type of file object to open:
- * regular file object (default)
- * zlib file object
- * gzip file object
- * bz2 file object
- * lzma file object (for xz and lzma compressor)
-
- Parameters
- ----------
- fileobj: file object
- compressor: str in {'zlib', 'gzip', 'bz2', 'lzma', 'xz', 'compat',
- 'not-compressed'}
- filename: str
- filename path corresponding to the fileobj parameter.
- mmap_mode: str
- memory map mode that should be used to open the pickle file. This
- parameter is useful to verify that the user is not trying to one with
- compression. Default: None.
-
- Returns
- -------
- a file like object
-
- """
- # Detect if the fileobj contains compressed data.
- compressor = _detect_compressor(fileobj)
- if isinstance(fileobj, tuple(_COMPRESSOR_CLASSES)):
- compressor = fileobj.__class__.__name__
- if compressor == 'compat':
- # Compatibility with old pickle mode: simply return the input
- # filename "as-is" and let the compatibility function be called by the
- # caller.
- warnings.warn("The file '%s' has been generated with a joblib "
- "version less than 0.10. "
- "Please regenerate this pickle file." % filename,
- DeprecationWarning, stacklevel=2)
- yield filename
- else:
- # Checking if incompatible load parameters with the type of file:
- # mmap_mode cannot be used with compressed file or in memory buffers
- # such as io.BytesIO.
- if ((compressor in _COMPRESSORS or
- isinstance(fileobj, tuple(_COMPRESSOR_CLASSES))) and
- mmap_mode is not None):
- warnings.warn('File "%(filename)s" is compressed using '
- '"%(compressor)s" which is not compatible with '
- 'mmap_mode "%(mmap_mode)s" flag passed. mmap_mode '
- 'option will be ignored.'
- % locals(), stacklevel=2)
- if isinstance(fileobj, io.BytesIO) and mmap_mode is not None:
- warnings.warn('In memory persistence is not compatible with '
- 'mmap_mode "%(mmap_mode)s" flag passed. mmap_mode '
- 'option will be ignored.'
- % locals(), stacklevel=2)
-
- # if the passed fileobj is in the supported list of decompressor
- # objects (GzipFile, BZ2File, LzmaFile), we simply return it.
- if isinstance(fileobj, tuple(_COMPRESSOR_CLASSES)):
- yield fileobj
- # otherwise, based on the compressor detected in the file, we open the
- # correct decompressor file object, wrapped in a buffer.
- elif compressor == 'zlib':
- yield _buffered_read_file(BinaryZlibFile(fileobj, 'rb'))
- elif compressor == 'gzip':
- yield _buffered_read_file(BinaryGzipFile(fileobj, 'rb'))
- elif compressor == 'bz2':
- if PY3_OR_LATER:
- yield _buffered_read_file(bz2.BZ2File(fileobj, 'rb'))
- else:
- # In python 2, BZ2File doesn't support a fileobj opened in
- # binary mode. In this case, we pass the filename.
- yield _buffered_read_file(bz2.BZ2File(fileobj.name, 'rb'))
- elif (compressor == 'lzma' or compressor == 'xz'):
- if lzma is not None:
- yield _buffered_read_file(lzma.LZMAFile(fileobj, 'rb'))
- else:
- raise NotImplementedError("Lzma decompression is not "
- "available for this version of "
- "python ({0}.{1})"
- .format(sys.version_info[0],
- sys.version_info[1]))
- # No compression detected => returning the input file object (open)
- else:
- yield fileobj
-
-
-def _write_fileobject(filename, compress=("zlib", 3)):
- """Return the right compressor file object in write mode."""
- compressmethod = compress[0]
- compresslevel = compress[1]
- if compressmethod == "gzip":
- return _buffered_write_file(BinaryGzipFile(filename, 'wb',
- compresslevel=compresslevel))
- elif compressmethod == "bz2":
- return _buffered_write_file(bz2.BZ2File(filename, 'wb',
- compresslevel=compresslevel))
- elif lzma is not None and compressmethod == "xz":
- return _buffered_write_file(lzma.LZMAFile(filename, 'wb',
- check=lzma.CHECK_NONE,
- preset=compresslevel))
- elif lzma is not None and compressmethod == "lzma":
- return _buffered_write_file(lzma.LZMAFile(filename, 'wb',
- preset=compresslevel,
- format=lzma.FORMAT_ALONE))
- else:
- return _buffered_write_file(BinaryZlibFile(filename, 'wb',
- compresslevel=compresslevel))
-
-
-###############################################################################
-# Joblib zlib compression file object definition
-
-_MODE_CLOSED = 0
-_MODE_READ = 1
-_MODE_READ_EOF = 2
-_MODE_WRITE = 3
-_BUFFER_SIZE = 8192
-
-
-class BinaryZlibFile(io.BufferedIOBase):
- """A file object providing transparent zlib (de)compression.
-
- A BinaryZlibFile can act as a wrapper for an existing file object, or refer
- directly to a named file on disk.
-
- Note that BinaryZlibFile provides only a *binary* file interface: data read
- is returned as bytes, and data to be written should be given as bytes.
-
- This object is an adaptation of the BZ2File object and is compatible with
- versions of python >= 2.6.
-
- If filename is a str or bytes object, it gives the name
- of the file to be opened. Otherwise, it should be a file object,
- which will be used to read or write the compressed data.
-
- mode can be 'rb' for reading (default) or 'wb' for (over)writing
-
- If mode is 'wb', compresslevel can be a number between 1
- and 9 specifying the level of compression: 1 produces the least
- compression, and 9 (default) produces the most compression.
- """
-
- wbits = zlib.MAX_WBITS
-
- def __init__(self, filename, mode="rb", compresslevel=9):
- # This lock must be recursive, so that BufferedIOBase's
- # readline(), readlines() and writelines() don't deadlock.
- self._lock = RLock()
- self._fp = None
- self._closefp = False
- self._mode = _MODE_CLOSED
- self._pos = 0
- self._size = -1
-
- if not isinstance(compresslevel, int) or not (1 <= compresslevel <= 9):
- raise ValueError("compresslevel must be between an integer "
- "between 1 and 9, you gave {0}"
- .format(compresslevel))
-
- if mode == "rb":
- mode_code = _MODE_READ
- self._decompressor = zlib.decompressobj(self.wbits)
- self._buffer = b""
- self._buffer_offset = 0
- elif mode == "wb":
- mode_code = _MODE_WRITE
- self._compressor = zlib.compressobj(compresslevel,
- zlib.DEFLATED,
- self.wbits,
- zlib.DEF_MEM_LEVEL,
- 0)
- else:
- raise ValueError("Invalid mode: %r" % (mode,))
-
- if isinstance(filename, (str, bytes)):
- self._fp = open(filename, mode)
- self._closefp = True
- self._mode = mode_code
- elif hasattr(filename, "read") or hasattr(filename, "write"):
- self._fp = filename
- self._mode = mode_code
- else:
- raise TypeError("filename must be a str or bytes object, "
- "or a file")
-
- def close(self):
- """Flush and close the file.
-
- May be called more than once without error. Once the file is
- closed, any other operation on it will raise a ValueError.
- """
- with self._lock:
- if self._mode == _MODE_CLOSED:
- return
- try:
- if self._mode in (_MODE_READ, _MODE_READ_EOF):
- self._decompressor = None
- elif self._mode == _MODE_WRITE:
- self._fp.write(self._compressor.flush())
- self._compressor = None
- finally:
- try:
- if self._closefp:
- self._fp.close()
- finally:
- self._fp = None
- self._closefp = False
- self._mode = _MODE_CLOSED
- self._buffer = b""
- self._buffer_offset = 0
-
- @property
- def closed(self):
- """True if this file is closed."""
- return self._mode == _MODE_CLOSED
-
- def fileno(self):
- """Return the file descriptor for the underlying file."""
- self._check_not_closed()
- return self._fp.fileno()
-
- def seekable(self):
- """Return whether the file supports seeking."""
- return self.readable() and self._fp.seekable()
-
- def readable(self):
- """Return whether the file was opened for reading."""
- self._check_not_closed()
- return self._mode in (_MODE_READ, _MODE_READ_EOF)
-
- def writable(self):
- """Return whether the file was opened for writing."""
- self._check_not_closed()
- return self._mode == _MODE_WRITE
-
- # Mode-checking helper functions.
-
- def _check_not_closed(self):
- if self.closed:
- fname = getattr(self._fp, 'name', None)
- msg = "I/O operation on closed file"
- if fname is not None:
- msg += " {0}".format(fname)
- msg += "."
- raise ValueError(msg)
-
- def _check_can_read(self):
- if self._mode not in (_MODE_READ, _MODE_READ_EOF):
- self._check_not_closed()
- raise io.UnsupportedOperation("File not open for reading")
-
- def _check_can_write(self):
- if self._mode != _MODE_WRITE:
- self._check_not_closed()
- raise io.UnsupportedOperation("File not open for writing")
-
- def _check_can_seek(self):
- if self._mode not in (_MODE_READ, _MODE_READ_EOF):
- self._check_not_closed()
- raise io.UnsupportedOperation("Seeking is only supported "
- "on files open for reading")
- if not self._fp.seekable():
- raise io.UnsupportedOperation("The underlying file object "
- "does not support seeking")
-
- # Fill the readahead buffer if it is empty. Returns False on EOF.
- def _fill_buffer(self):
- if self._mode == _MODE_READ_EOF:
- return False
- # Depending on the input data, our call to the decompressor may not
- # return any data. In this case, try again after reading another block.
- while self._buffer_offset == len(self._buffer):
- try:
- rawblock = (self._decompressor.unused_data or
- self._fp.read(_BUFFER_SIZE))
-
- if not rawblock:
- raise EOFError
- except EOFError:
- # End-of-stream marker and end of file. We're good.
- self._mode = _MODE_READ_EOF
- self._size = self._pos
- return False
- else:
- self._buffer = self._decompressor.decompress(rawblock)
- self._buffer_offset = 0
- return True
-
- # Read data until EOF.
- # If return_data is false, consume the data without returning it.
- def _read_all(self, return_data=True):
- # The loop assumes that _buffer_offset is 0. Ensure that this is true.
- self._buffer = self._buffer[self._buffer_offset:]
- self._buffer_offset = 0
-
- blocks = []
- while self._fill_buffer():
- if return_data:
- blocks.append(self._buffer)
- self._pos += len(self._buffer)
- self._buffer = b""
- if return_data:
- return b"".join(blocks)
-
- # Read a block of up to n bytes.
- # If return_data is false, consume the data without returning it.
- def _read_block(self, n_bytes, return_data=True):
- # If we have enough data buffered, return immediately.
- end = self._buffer_offset + n_bytes
- if end <= len(self._buffer):
- data = self._buffer[self._buffer_offset: end]
- self._buffer_offset = end
- self._pos += len(data)
- return data if return_data else None
-
- # The loop assumes that _buffer_offset is 0. Ensure that this is true.
- self._buffer = self._buffer[self._buffer_offset:]
- self._buffer_offset = 0
-
- blocks = []
- while n_bytes > 0 and self._fill_buffer():
- if n_bytes < len(self._buffer):
- data = self._buffer[:n_bytes]
- self._buffer_offset = n_bytes
- else:
- data = self._buffer
- self._buffer = b""
- if return_data:
- blocks.append(data)
- self._pos += len(data)
- n_bytes -= len(data)
- if return_data:
- return b"".join(blocks)
-
- def read(self, size=-1):
- """Read up to size uncompressed bytes from the file.
-
- If size is negative or omitted, read until EOF is reached.
- Returns b'' if the file is already at EOF.
- """
- with self._lock:
- self._check_can_read()
- if size == 0:
- return b""
- elif size < 0:
- return self._read_all()
- else:
- return self._read_block(size)
-
- def readinto(self, b):
- """Read up to len(b) bytes into b.
-
- Returns the number of bytes read (0 for EOF).
- """
- with self._lock:
- return io.BufferedIOBase.readinto(self, b)
-
- def write(self, data):
- """Write a byte string to the file.
-
- Returns the number of uncompressed bytes written, which is
- always len(data). Note that due to buffering, the file on disk
- may not reflect the data written until close() is called.
- """
- with self._lock:
- self._check_can_write()
- # Convert data type if called by io.BufferedWriter.
- if not PY26 and isinstance(data, memoryview):
- data = data.tobytes()
-
- compressed = self._compressor.compress(data)
- self._fp.write(compressed)
- self._pos += len(data)
- return len(data)
-
- # Rewind the file to the beginning of the data stream.
- def _rewind(self):
- self._fp.seek(0, 0)
- self._mode = _MODE_READ
- self._pos = 0
- self._decompressor = zlib.decompressobj(self.wbits)
- self._buffer = b""
- self._buffer_offset = 0
-
- def seek(self, offset, whence=0):
- """Change the file position.
-
- The new position is specified by offset, relative to the
- position indicated by whence. Values for whence are:
-
- 0: start of stream (default); offset must not be negative
- 1: current stream position
- 2: end of stream; offset must not be positive
-
- Returns the new file position.
-
- Note that seeking is emulated, so depending on the parameters,
- this operation may be extremely slow.
- """
- with self._lock:
- self._check_can_seek()
-
- # Recalculate offset as an absolute file position.
- if whence == 0:
- pass
- elif whence == 1:
- offset = self._pos + offset
- elif whence == 2:
- # Seeking relative to EOF - we need to know the file's size.
- if self._size < 0:
- self._read_all(return_data=False)
- offset = self._size + offset
- else:
- raise ValueError("Invalid value for whence: %s" % (whence,))
-
- # Make it so that offset is the number of bytes to skip forward.
- if offset < self._pos:
- self._rewind()
- else:
- offset -= self._pos
-
- # Read and discard data until we reach the desired position.
- self._read_block(offset, return_data=False)
-
- return self._pos
-
- def tell(self):
- """Return the current file position."""
- with self._lock:
- self._check_not_closed()
- return self._pos
-
-
-class BinaryGzipFile(BinaryZlibFile):
- """A file object providing transparent gzip (de)compression.
-
- If filename is a str or bytes object, it gives the name
- of the file to be opened. Otherwise, it should be a file object,
- which will be used to read or write the compressed data.
-
- mode can be 'rb' for reading (default) or 'wb' for (over)writing
-
- If mode is 'wb', compresslevel can be a number between 1
- and 9 specifying the level of compression: 1 produces the least
- compression, and 9 (default) produces the most compression.
- """
-
- wbits = 31 # zlib compressor/decompressor wbits value for gzip format.
-
-
-# Utility functions/variables from numpy required for writing arrays.
-# We need at least the functions introduced in version 1.9 of numpy. Here,
-# we use the ones from numpy 1.10.2.
-BUFFER_SIZE = 2 ** 18 # size of buffer for reading npz files in bytes
-
-
-def _read_bytes(fp, size, error_template="ran out of data"):
- """Read from file-like object until size bytes are read.
-
- Raises ValueError if not EOF is encountered before size bytes are read.
- Non-blocking objects only supported if they derive from io objects.
-
- Required as e.g. ZipExtFile in python 2.6 can return less data than
- requested.
-
- This function was taken from numpy/lib/format.py in version 1.10.2.
-
- Parameters
- ----------
- fp: file-like object
- size: int
- error_template: str
-
- Returns
- -------
- a bytes object
- The data read in bytes.
-
- """
- data = bytes()
- while True:
- # io files (default in python3) return None or raise on
- # would-block, python2 file will truncate, probably nothing can be
- # done about that. note that regular files can't be non-blocking
- try:
- r = fp.read(size - len(data))
- data += r
- if len(r) == 0 or len(data) == size:
- break
- except io.BlockingIOError:
- pass
- if len(data) != size:
- msg = "EOF: reading %s, expected %d bytes got %d"
- raise ValueError(msg % (error_template, size, len(data)))
- else:
- return data
diff --git a/mloop/localsklearn/externals/joblib/parallel.py b/mloop/localsklearn/externals/joblib/parallel.py
deleted file mode 100644
index 43913f3..0000000
--- a/mloop/localsklearn/externals/joblib/parallel.py
+++ /dev/null
@@ -1,779 +0,0 @@
-"""
-Helpers for embarrassingly parallel code.
-"""
-# Author: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
-# Copyright: 2010, Gael Varoquaux
-# License: BSD 3 clause
-
-from __future__ import division
-
-import os
-import sys
-from math import sqrt
-import functools
-import time
-import threading
-import itertools
-from numbers import Integral
-from contextlib import contextmanager
-try:
- import cPickle as pickle
-except:
- import pickle
-
-from ._multiprocessing_helpers import mp
-
-from .format_stack import format_outer_frames
-from .logger import Logger, short_format_time
-from .my_exceptions import TransportableException, _mk_exception
-from .disk import memstr_to_bytes
-from ._parallel_backends import (FallbackToBackend, MultiprocessingBackend,
- ThreadingBackend, SequentialBackend)
-from ._compat import _basestring
-from .func_inspect import getfullargspec
-
-
-BACKENDS = {
- 'multiprocessing': MultiprocessingBackend,
- 'threading': ThreadingBackend,
- 'sequential': SequentialBackend,
-}
-
-# name of the backend used by default by Parallel outside of any context
-# managed by ``parallel_backend``.
-DEFAULT_BACKEND = 'multiprocessing'
-DEFAULT_N_JOBS = 1
-
-# Thread local value that can be overriden by the ``parallel_backend`` context
-# manager
-_backend = threading.local()
-
-
-def get_active_backend():
- """Return the active default backend"""
- active_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
- if active_backend_and_jobs is not None:
- return active_backend_and_jobs
- # We are outside of the scope of any parallel_backend context manager,
- # create the default backend instance now
- active_backend = BACKENDS[DEFAULT_BACKEND]()
- return active_backend, DEFAULT_N_JOBS
-
-
-@contextmanager
-def parallel_backend(backend, n_jobs=-1, **backend_params):
- """Change the default backend used by Parallel inside a with block.
-
- If ``backend`` is a string it must match a previously registered
- implementation using the ``register_parallel_backend`` function.
-
- Alternatively backend can be passed directly as an instance.
-
- By default all available workers will be used (``n_jobs=-1``) unless the
- caller passes an explicit value for the ``n_jobs`` parameter.
-
- This is an alternative to passing a ``backend='backend_name'`` argument to
- the ``Parallel`` class constructor. It is particularly useful when calling
- into library code that uses joblib internally but does not expose the
- backend argument in its own API.
-
- >>> from operator import neg
- >>> with parallel_backend('threading'):
- ... print(Parallel()(delayed(neg)(i + 1) for i in range(5)))
- ...
- [-1, -2, -3, -4, -5]
-
- Warning: this function is experimental and subject to change in a future
- version of joblib.
-
- .. versionadded:: 0.10
-
- """
- if isinstance(backend, _basestring):
- backend = BACKENDS[backend](**backend_params)
- old_backend_and_jobs = getattr(_backend, 'backend_and_jobs', None)
- try:
- _backend.backend_and_jobs = (backend, n_jobs)
- # return the backend instance to make it easier to write tests
- yield backend, n_jobs
- finally:
- if old_backend_and_jobs is None:
- if getattr(_backend, 'backend_and_jobs', None) is not None:
- del _backend.backend_and_jobs
- else:
- _backend.backend_and_jobs = old_backend_and_jobs
-
-
-# Under Linux or OS X the default start method of multiprocessing
-# can cause third party libraries to crash. Under Python 3.4+ it is possible
-# to set an environment variable to switch the default start method from
-# 'fork' to 'forkserver' or 'spawn' to avoid this issue albeit at the cost
-# of causing semantic changes and some additional pool instantiation overhead.
-if hasattr(mp, 'get_context'):
- method = os.environ.get('JOBLIB_START_METHOD', '').strip() or None
- DEFAULT_MP_CONTEXT = mp.get_context(method=method)
-else:
- DEFAULT_MP_CONTEXT = None
-
-
-class BatchedCalls(object):
- """Wrap a sequence of (func, args, kwargs) tuples as a single callable"""
-
- def __init__(self, iterator_slice):
- self.items = list(iterator_slice)
- self._size = len(self.items)
-
- def __call__(self):
- return [func(*args, **kwargs) for func, args, kwargs in self.items]
-
- def __len__(self):
- return self._size
-
-
-###############################################################################
-# CPU count that works also when multiprocessing has been disabled via
-# the JOBLIB_MULTIPROCESSING environment variable
-def cpu_count():
- """Return the number of CPUs."""
- if mp is None:
- return 1
- return mp.cpu_count()
-
-
-###############################################################################
-# For verbosity
-
-def _verbosity_filter(index, verbose):
- """ Returns False for indices increasingly apart, the distance
- depending on the value of verbose.
-
- We use a lag increasing as the square of index
- """
- if not verbose:
- return True
- elif verbose > 10:
- return False
- if index == 0:
- return False
- verbose = .5 * (11 - verbose) ** 2
- scale = sqrt(index / verbose)
- next_scale = sqrt((index + 1) / verbose)
- return (int(next_scale) == int(scale))
-
-
-###############################################################################
-def delayed(function, check_pickle=True):
- """Decorator used to capture the arguments of a function.
-
- Pass `check_pickle=False` when:
-
- - performing a possibly repeated check is too costly and has been done
- already once outside of the call to delayed.
-
- - when used in conjunction `Parallel(backend='threading')`.
-
- """
- # Try to pickle the input function, to catch the problems early when
- # using with multiprocessing:
- if check_pickle:
- pickle.dumps(function)
-
- def delayed_function(*args, **kwargs):
- return function, args, kwargs
- try:
- delayed_function = functools.wraps(function)(delayed_function)
- except AttributeError:
- " functools.wraps fails on some callable objects "
- return delayed_function
-
-
-###############################################################################
-class BatchCompletionCallBack(object):
- """Callback used by joblib.Parallel's multiprocessing backend.
-
- This callable is executed by the parent process whenever a worker process
- has returned the results of a batch of tasks.
-
- It is used for progress reporting, to update estimate of the batch
- processing duration and to schedule the next batch of tasks to be
- processed.
-
- """
- def __init__(self, dispatch_timestamp, batch_size, parallel):
- self.dispatch_timestamp = dispatch_timestamp
- self.batch_size = batch_size
- self.parallel = parallel
-
- def __call__(self, out):
- self.parallel.n_completed_tasks += self.batch_size
- this_batch_duration = time.time() - self.dispatch_timestamp
-
- self.parallel._backend.batch_completed(self.batch_size,
- this_batch_duration)
- self.parallel.print_progress()
- if self.parallel._original_iterator is not None:
- self.parallel.dispatch_next()
-
-
-###############################################################################
-def register_parallel_backend(name, factory, make_default=False):
- """Register a new Parallel backend factory.
-
- The new backend can then be selected by passing its name as the backend
- argument to the Parallel class. Moreover, the default backend can be
- overwritten globally by setting make_default=True.
-
- The factory can be any callable that takes no argument and return an
- instance of ``ParallelBackendBase``.
-
- Warning: this function is experimental and subject to change in a future
- version of joblib.
-
- .. versionadded:: 0.10
-
- """
- BACKENDS[name] = factory
- if make_default:
- global DEFAULT_BACKEND
- DEFAULT_BACKEND = name
-
-
-def effective_n_jobs(n_jobs=-1):
- """Determine the number of jobs that can actually run in parallel
-
- n_jobs is the is the number of workers requested by the callers.
- Passing n_jobs=-1 means requesting all available workers for instance
- matching the number of CPU cores on the worker host(s).
-
- This method should return a guesstimate of the number of workers that can
- actually perform work concurrently with the currently enabled default
- backend. The primary use case is to make it possible for the caller to know
- in how many chunks to slice the work.
-
- In general working on larger data chunks is more efficient (less
- scheduling overhead and better use of CPU cache prefetching heuristics)
- as long as all the workers have enough work to do.
-
- Warning: this function is experimental and subject to change in a future
- version of joblib.
-
- .. versionadded:: 0.10
-
- """
- backend, _ = get_active_backend()
- return backend.effective_n_jobs(n_jobs=n_jobs)
-
-
-###############################################################################
-class Parallel(Logger):
- ''' Helper class for readable parallel mapping.
-
- Parameters
- -----------
- n_jobs: int, default: 1
- The maximum number of concurrently running jobs, such as the number
- of Python worker processes when backend="multiprocessing"
- or the size of the thread-pool when backend="threading".
- If -1 all CPUs are used. If 1 is given, no parallel computing code
- is used at all, which is useful for debugging. For n_jobs below -1,
- (n_cpus + 1 + n_jobs) are used. Thus for n_jobs = -2, all
- CPUs but one are used.
- backend: str or None, default: 'multiprocessing'
- Specify the parallelization backend implementation.
- Supported backends are:
- - "multiprocessing" used by default, can induce some
- communication and memory overhead when exchanging input and
- output data with the worker Python processes.
- - "threading" is a very low-overhead backend but it suffers
- from the Python Global Interpreter Lock if the called function
- relies a lot on Python objects. "threading" is mostly useful
- when the execution bottleneck is a compiled extension that
- explicitly releases the GIL (for instance a Cython loop wrapped
- in a "with nogil" block or an expensive call to a library such
- as NumPy).
- - finally, you can register backends by calling
- register_parallel_backend. This will allow you to implement
- a backend of your liking.
- verbose: int, optional
- The verbosity level: if non zero, progress messages are
- printed. Above 50, the output is sent to stdout.
- The frequency of the messages increases with the verbosity level.
- If it more than 10, all iterations are reported.
- timeout: float, optional
- Timeout limit for each task to complete. If any task takes longer
- a TimeOutError will be raised. Only applied when n_jobs != 1
- pre_dispatch: {'all', integer, or expression, as in '3*n_jobs'}
- The number of batches (of tasks) to be pre-dispatched.
- Default is '2*n_jobs'. When batch_size="auto" this is reasonable
- default and the multiprocessing workers should never starve.
- batch_size: int or 'auto', default: 'auto'
- The number of atomic tasks to dispatch at once to each
- worker. When individual evaluations are very fast, multiprocessing
- can be slower than sequential computation because of the overhead.
- Batching fast computations together can mitigate this.
- The ``'auto'`` strategy keeps track of the time it takes for a batch
- to complete, and dynamically adjusts the batch size to keep the time
- on the order of half a second, using a heuristic. The initial batch
- size is 1.
- ``batch_size="auto"`` with ``backend="threading"`` will dispatch
- batches of a single task at a time as the threading backend has
- very little overhead and using larger batch size has not proved to
- bring any gain in that case.
- temp_folder: str, optional
- Folder to be used by the pool for memmaping large arrays
- for sharing memory with worker processes. If None, this will try in
- order:
- - a folder pointed by the JOBLIB_TEMP_FOLDER environment variable,
- - /dev/shm if the folder exists and is writable: this is a RAMdisk
- filesystem available by default on modern Linux distributions,
- - the default system temporary folder that can be overridden
- with TMP, TMPDIR or TEMP environment variables, typically /tmp
- under Unix operating systems.
- Only active when backend="multiprocessing".
- max_nbytes int, str, or None, optional, 1M by default
- Threshold on the size of arrays passed to the workers that
- triggers automated memory mapping in temp_folder. Can be an int
- in Bytes, or a human-readable string, e.g., '1M' for 1 megabyte.
- Use None to disable memmaping of large arrays.
- Only active when backend="multiprocessing".
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}
- Memmapping mode for numpy arrays passed to workers.
- See 'max_nbytes' parameter documentation for more details.
-
- Notes
- -----
-
- This object uses the multiprocessing module to compute in
- parallel the application of a function to many different
- arguments. The main functionality it brings in addition to
- using the raw multiprocessing API are (see examples for details):
-
- * More readable code, in particular since it avoids
- constructing list of arguments.
-
- * Easier debugging:
- - informative tracebacks even when the error happens on
- the client side
- - using 'n_jobs=1' enables to turn off parallel computing
- for debugging without changing the codepath
- - early capture of pickling errors
-
- * An optional progress meter.
-
- * Interruption of multiprocesses jobs with 'Ctrl-C'
-
- * Flexible pickling control for the communication to and from
- the worker processes.
-
- * Ability to use shared memory efficiently with worker
- processes for large numpy-based datastructures.
-
- Examples
- --------
-
- A simple example:
-
- >>> from math import sqrt
- >>> from sklearn.externals.joblib import Parallel, delayed
- >>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
- [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
-
- Reshaping the output when the function has several return
- values:
-
- >>> from math import modf
- >>> from sklearn.externals.joblib import Parallel, delayed
- >>> r = Parallel(n_jobs=1)(delayed(modf)(i/2.) for i in range(10))
- >>> res, i = zip(*r)
- >>> res
- (0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5)
- >>> i
- (0.0, 0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0)
-
- The progress meter: the higher the value of `verbose`, the more
- messages::
-
- >>> from time import sleep
- >>> from sklearn.externals.joblib import Parallel, delayed
- >>> r = Parallel(n_jobs=2, verbose=5)(delayed(sleep)(.1) for _ in range(10)) #doctest: +SKIP
- [Parallel(n_jobs=2)]: Done 1 out of 10 | elapsed: 0.1s remaining: 0.9s
- [Parallel(n_jobs=2)]: Done 3 out of 10 | elapsed: 0.2s remaining: 0.5s
- [Parallel(n_jobs=2)]: Done 6 out of 10 | elapsed: 0.3s remaining: 0.2s
- [Parallel(n_jobs=2)]: Done 9 out of 10 | elapsed: 0.5s remaining: 0.1s
- [Parallel(n_jobs=2)]: Done 10 out of 10 | elapsed: 0.5s finished
-
- Traceback example, note how the line of the error is indicated
- as well as the values of the parameter passed to the function that
- triggered the exception, even though the traceback happens in the
- child process::
-
- >>> from heapq import nlargest
- >>> from sklearn.externals.joblib import Parallel, delayed
- >>> Parallel(n_jobs=2)(delayed(nlargest)(2, n) for n in (range(4), 'abcde', 3)) #doctest: +SKIP
- #...
- ---------------------------------------------------------------------------
- Sub-process traceback:
- ---------------------------------------------------------------------------
- TypeError Mon Nov 12 11:37:46 2012
- PID: 12934 Python 2.7.3: /usr/bin/python
- ...........................................................................
- /usr/lib/python2.7/heapq.pyc in nlargest(n=2, iterable=3, key=None)
- 419 if n >= size:
- 420 return sorted(iterable, key=key, reverse=True)[:n]
- 421
- 422 # When key is none, use simpler decoration
- 423 if key is None:
- --> 424 it = izip(iterable, count(0,-1)) # decorate
- 425 result = _nlargest(n, it)
- 426 return map(itemgetter(0), result) # undecorate
- 427
- 428 # General case, slowest method
-
- TypeError: izip argument #1 must support iteration
- ___________________________________________________________________________
-
-
- Using pre_dispatch in a producer/consumer situation, where the
- data is generated on the fly. Note how the producer is first
- called a 3 times before the parallel loop is initiated, and then
- called to generate new data on the fly. In this case the total
- number of iterations cannot be reported in the progress messages::
-
- >>> from math import sqrt
- >>> from sklearn.externals.joblib import Parallel, delayed
-
- >>> def producer():
- ... for i in range(6):
- ... print('Produced %s' % i)
- ... yield i
-
- >>> out = Parallel(n_jobs=2, verbose=100, pre_dispatch='1.5*n_jobs')(
- ... delayed(sqrt)(i) for i in producer()) #doctest: +SKIP
- Produced 0
- Produced 1
- Produced 2
- [Parallel(n_jobs=2)]: Done 1 jobs | elapsed: 0.0s
- Produced 3
- [Parallel(n_jobs=2)]: Done 2 jobs | elapsed: 0.0s
- Produced 4
- [Parallel(n_jobs=2)]: Done 3 jobs | elapsed: 0.0s
- Produced 5
- [Parallel(n_jobs=2)]: Done 4 jobs | elapsed: 0.0s
- [Parallel(n_jobs=2)]: Done 5 out of 6 | elapsed: 0.0s remaining: 0.0s
- [Parallel(n_jobs=2)]: Done 6 out of 6 | elapsed: 0.0s finished
- '''
- def __init__(self, n_jobs=1, backend=None, verbose=0, timeout=None,
- pre_dispatch='2 * n_jobs', batch_size='auto',
- temp_folder=None, max_nbytes='1M', mmap_mode='r'):
- active_backend, default_n_jobs = get_active_backend()
- if backend is None and n_jobs == 1:
- # If we are under a parallel_backend context manager, look up
- # the default number of jobs and use that instead:
- n_jobs = default_n_jobs
- self.n_jobs = n_jobs
- self.verbose = verbose
- self.timeout = timeout
- self.pre_dispatch = pre_dispatch
-
- if isinstance(max_nbytes, _basestring):
- max_nbytes = memstr_to_bytes(max_nbytes)
-
- self._backend_args = dict(
- max_nbytes=max_nbytes,
- mmap_mode=mmap_mode,
- temp_folder=temp_folder,
- verbose=max(0, self.verbose - 50),
- )
- if DEFAULT_MP_CONTEXT is not None:
- self._backend_args['context'] = DEFAULT_MP_CONTEXT
-
- if backend is None:
- backend = active_backend
- elif hasattr(backend, 'Pool') and hasattr(backend, 'Lock'):
- # Make it possible to pass a custom multiprocessing context as
- # backend to change the start method to forkserver or spawn or
- # preload modules on the forkserver helper process.
- self._backend_args['context'] = backend
- backend = MultiprocessingBackend()
- else:
- try:
- backend_factory = BACKENDS[backend]
- except KeyError:
- raise ValueError("Invalid backend: %s, expected one of %r"
- % (backend, sorted(BACKENDS.keys())))
- backend = backend_factory()
-
- if (batch_size == 'auto' or isinstance(batch_size, Integral) and
- batch_size > 0):
- self.batch_size = batch_size
- else:
- raise ValueError(
- "batch_size must be 'auto' or a positive integer, got: %r"
- % batch_size)
-
- self._backend = backend
- self._output = None
- self._jobs = list()
- self._managed_backend = False
-
- # This lock is used coordinate the main thread of this process with
- # the async callback thread of our the pool.
- self._lock = threading.Lock()
-
- def __enter__(self):
- self._managed_backend = True
- self._initialize_backend()
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- self._terminate_backend()
- self._managed_backend = False
-
- def _initialize_backend(self):
- """Build a process or thread pool and return the number of workers"""
- try:
- return self._backend.configure(n_jobs=self.n_jobs, parallel=self,
- **self._backend_args)
- except FallbackToBackend as e:
- # Recursively initialize the backend in case of requested fallback.
- self._backend = e.backend
- return self._initialize_backend()
-
- def _effective_n_jobs(self):
- if self._backend:
- return self._backend.effective_n_jobs(self.n_jobs)
- return 1
-
- def _terminate_backend(self):
- if self._backend is not None:
- self._backend.terminate()
-
- def _dispatch(self, batch):
- """Queue the batch for computing, with or without multiprocessing
-
- WARNING: this method is not thread-safe: it should be only called
- indirectly via dispatch_one_batch.
-
- """
- # If job.get() catches an exception, it closes the queue:
- if self._aborting:
- return
-
- self.n_dispatched_tasks += len(batch)
- self.n_dispatched_batches += 1
-
- dispatch_timestamp = time.time()
- cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
- job = self._backend.apply_async(batch, callback=cb)
- self._jobs.append(job)
-
- def dispatch_next(self):
- """Dispatch more data for parallel processing
-
- This method is meant to be called concurrently by the multiprocessing
- callback. We rely on the thread-safety of dispatch_one_batch to protect
- against concurrent consumption of the unprotected iterator.
-
- """
- if not self.dispatch_one_batch(self._original_iterator):
- self._iterating = False
- self._original_iterator = None
-
- def dispatch_one_batch(self, iterator):
- """Prefetch the tasks for the next batch and dispatch them.
-
- The effective size of the batch is computed here.
- If there are no more jobs to dispatch, return False, else return True.
-
- The iterator consumption and dispatching is protected by the same
- lock so calling this function should be thread safe.
-
- """
- if self.batch_size == 'auto':
- batch_size = self._backend.compute_batch_size()
- else:
- # Fixed batch size strategy
- batch_size = self.batch_size
-
- with self._lock:
- tasks = BatchedCalls(itertools.islice(iterator, batch_size))
- if len(tasks) == 0:
- # No more tasks available in the iterator: tell caller to stop.
- return False
- else:
- self._dispatch(tasks)
- return True
-
- def _print(self, msg, msg_args):
- """Display the message on stout or stderr depending on verbosity"""
- # XXX: Not using the logger framework: need to
- # learn to use logger better.
- if not self.verbose:
- return
- if self.verbose < 50:
- writer = sys.stderr.write
- else:
- writer = sys.stdout.write
- msg = msg % msg_args
- writer('[%s]: %s\n' % (self, msg))
-
- def print_progress(self):
- """Display the process of the parallel execution only a fraction
- of time, controlled by self.verbose.
- """
- if not self.verbose:
- return
- elapsed_time = time.time() - self._start_time
-
- # Original job iterator becomes None once it has been fully
- # consumed : at this point we know the total number of jobs and we are
- # able to display an estimation of the remaining time based on already
- # completed jobs. Otherwise, we simply display the number of completed
- # tasks.
- if self._original_iterator is not None:
- if _verbosity_filter(self.n_dispatched_batches, self.verbose):
- return
- self._print('Done %3i tasks | elapsed: %s',
- (self.n_completed_tasks,
- short_format_time(elapsed_time), ))
- else:
- index = self.n_completed_tasks
- # We are finished dispatching
- total_tasks = self.n_dispatched_tasks
- # We always display the first loop
- if not index == 0:
- # Display depending on the number of remaining items
- # A message as soon as we finish dispatching, cursor is 0
- cursor = (total_tasks - index + 1 -
- self._pre_dispatch_amount)
- frequency = (total_tasks // self.verbose) + 1
- is_last_item = (index + 1 == total_tasks)
- if (is_last_item or cursor % frequency):
- return
- remaining_time = (elapsed_time / index) * \
- (self.n_dispatched_tasks - index * 1.0)
- # only display status if remaining time is greater or equal to 0
- self._print('Done %3i out of %3i | elapsed: %s remaining: %s',
- (index,
- total_tasks,
- short_format_time(elapsed_time),
- short_format_time(remaining_time),
- ))
-
- def retrieve(self):
- self._output = list()
- while self._iterating or len(self._jobs) > 0:
- if len(self._jobs) == 0:
- # Wait for an async callback to dispatch new jobs
- time.sleep(0.01)
- continue
- # We need to be careful: the job list can be filling up as
- # we empty it and Python list are not thread-safe by default hence
- # the use of the lock
- with self._lock:
- job = self._jobs.pop(0)
- try:
- # check if timeout supported in backend future implementation
- if 'timeout' in getfullargspec(job.get).args:
- self._output.extend(job.get(timeout=self.timeout))
- else:
- self._output.extend(job.get())
- except BaseException as exception:
- # Note: we catch any BaseException instead of just Exception
- # instances to also include KeyboardInterrupt.
-
- # Stop dispatching any new job in the async callback thread
- self._aborting = True
-
- if isinstance(exception, TransportableException):
- # Capture exception to add information on the local
- # stack in addition to the distant stack
- this_report = format_outer_frames(context=10,
- stack_start=1)
- report = """Multiprocessing exception:
-%s
----------------------------------------------------------------------------
-Sub-process traceback:
----------------------------------------------------------------------------
-%s""" % (this_report, exception.message)
- # Convert this to a JoblibException
- exception_type = _mk_exception(exception.etype)[0]
- exception = exception_type(report)
-
- # If the backends allows it, cancel or kill remaining running
- # tasks without waiting for the results as we will raise
- # the exception we got back to the caller instead of returning
- # any result.
- backend = self._backend
- if (backend is not None and
- hasattr(backend, 'abort_everything')):
- # If the backend is managed externally we need to make sure
- # to leave it in a working state to allow for future jobs
- # scheduling.
- ensure_ready = self._managed_backend
- backend.abort_everything(ensure_ready=ensure_ready)
- raise exception
-
- def __call__(self, iterable):
- if self._jobs:
- raise ValueError('This Parallel instance is already running')
- # A flag used to abort the dispatching of jobs in case an
- # exception is found
- self._aborting = False
- if not self._managed_backend:
- n_jobs = self._initialize_backend()
- else:
- n_jobs = self._effective_n_jobs()
-
- iterator = iter(iterable)
- pre_dispatch = self.pre_dispatch
-
- if pre_dispatch == 'all' or n_jobs == 1:
- # prevent further dispatch via multiprocessing callback thread
- self._original_iterator = None
- self._pre_dispatch_amount = 0
- else:
- self._original_iterator = iterator
- if hasattr(pre_dispatch, 'endswith'):
- pre_dispatch = eval(pre_dispatch)
- self._pre_dispatch_amount = pre_dispatch = int(pre_dispatch)
-
- # The main thread will consume the first pre_dispatch items and
- # the remaining items will later be lazily dispatched by async
- # callbacks upon task completions.
- iterator = itertools.islice(iterator, pre_dispatch)
-
- self._start_time = time.time()
- self.n_dispatched_batches = 0
- self.n_dispatched_tasks = 0
- self.n_completed_tasks = 0
- try:
- # Only set self._iterating to True if at least a batch
- # was dispatched. In particular this covers the edge
- # case of Parallel used with an exhausted iterator.
- while self.dispatch_one_batch(iterator):
- self._iterating = True
- else:
- self._iterating = False
-
- if pre_dispatch == "all" or n_jobs == 1:
- # The iterable was consumed all at once by the above for loop.
- # No need to wait for async callbacks to trigger to
- # consumption.
- self._iterating = False
- self.retrieve()
- # Make sure that we get a last message telling us we are done
- elapsed_time = time.time() - self._start_time
- self._print('Done %3i out of %3i | elapsed: %s finished',
- (len(self._output), len(self._output),
- short_format_time(elapsed_time)))
- finally:
- if not self._managed_backend:
- self._terminate_backend()
- self._jobs = list()
- output = self._output
- self._output = None
- return output
-
- def __repr__(self):
- return '%s(n_jobs=%s)' % (self.__class__.__name__, self.n_jobs)
diff --git a/mloop/localsklearn/externals/joblib/pool.py b/mloop/localsklearn/externals/joblib/pool.py
deleted file mode 100644
index e0682c1..0000000
--- a/mloop/localsklearn/externals/joblib/pool.py
+++ /dev/null
@@ -1,615 +0,0 @@
-"""Custom implementation of multiprocessing.Pool with custom pickler.
-
-This module provides efficient ways of working with data stored in
-shared memory with numpy.memmap arrays without inducing any memory
-copy between the parent and child processes.
-
-This module should not be imported if multiprocessing is not
-available as it implements subclasses of multiprocessing Pool
-that uses a custom alternative to SimpleQueue.
-
-"""
-# Author: Olivier Grisel
-# Copyright: 2012, Olivier Grisel
-# License: BSD 3 clause
-
-from mmap import mmap
-import errno
-import os
-import stat
-import sys
-import threading
-import atexit
-import tempfile
-import shutil
-import warnings
-from time import sleep
-
-try:
- WindowsError
-except NameError:
- WindowsError = None
-
-from pickle import whichmodule
-try:
- # Python 2 compat
- from cPickle import loads
- from cPickle import dumps
-except ImportError:
- from pickle import loads
- from pickle import dumps
- import copyreg
-
-# Customizable pure Python pickler in Python 2
-# customizable C-optimized pickler under Python 3.3+
-from pickle import Pickler
-
-from pickle import HIGHEST_PROTOCOL
-from io import BytesIO
-
-from ._multiprocessing_helpers import mp, assert_spawning
-# We need the class definition to derive from it not the multiprocessing.Pool
-# factory function
-from multiprocessing.pool import Pool
-
-try:
- import numpy as np
- from numpy.lib.stride_tricks import as_strided
-except ImportError:
- np = None
-
-from .numpy_pickle import load
-from .numpy_pickle import dump
-from .hashing import hash
-
-# Some system have a ramdisk mounted by default, we can use it instead of /tmp
-# as the default folder to dump big arrays to share with subprocesses
-SYSTEM_SHARED_MEM_FS = '/dev/shm'
-
-# Folder and file permissions to chmod temporary files generated by the
-# memmaping pool. Only the owner of the Python process can access the
-# temporary files and folder.
-FOLDER_PERMISSIONS = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR
-FILE_PERMISSIONS = stat.S_IRUSR | stat.S_IWUSR
-
-###############################################################################
-# Support for efficient transient pickling of numpy data structures
-
-
-def _get_backing_memmap(a):
- """Recursively look up the original np.memmap instance base if any."""
- b = getattr(a, 'base', None)
- if b is None:
- # TODO: check scipy sparse datastructure if scipy is installed
- # a nor its descendants do not have a memmap base
- return None
-
- elif isinstance(b, mmap):
- # a is already a real memmap instance.
- return a
-
- else:
- # Recursive exploration of the base ancestry
- return _get_backing_memmap(b)
-
-
-def has_shareable_memory(a):
- """Return True if a is backed by some mmap buffer directly or not."""
- return _get_backing_memmap(a) is not None
-
-
-def _strided_from_memmap(filename, dtype, mode, offset, order, shape, strides,
- total_buffer_len):
- """Reconstruct an array view on a memory mapped file."""
- if mode == 'w+':
- # Do not zero the original data when unpickling
- mode = 'r+'
-
- if strides is None:
- # Simple, contiguous memmap
- return np.memmap(filename, dtype=dtype, shape=shape, mode=mode,
- offset=offset, order=order)
- else:
- # For non-contiguous data, memmap the total enclosing buffer and then
- # extract the non-contiguous view with the stride-tricks API
- base = np.memmap(filename, dtype=dtype, shape=total_buffer_len,
- mode=mode, offset=offset, order=order)
- return as_strided(base, shape=shape, strides=strides)
-
-
-def _reduce_memmap_backed(a, m):
- """Pickling reduction for memmap backed arrays.
-
- a is expected to be an instance of np.ndarray (or np.memmap)
- m is expected to be an instance of np.memmap on the top of the ``base``
- attribute ancestry of a. ``m.base`` should be the real python mmap object.
- """
- # offset that comes from the striding differences between a and m
- a_start, a_end = np.byte_bounds(a)
- m_start = np.byte_bounds(m)[0]
- offset = a_start - m_start
-
- # offset from the backing memmap
- offset += m.offset
-
- if m.flags['F_CONTIGUOUS']:
- order = 'F'
- else:
- # The backing memmap buffer is necessarily contiguous hence C if not
- # Fortran
- order = 'C'
-
- if a.flags['F_CONTIGUOUS'] or a.flags['C_CONTIGUOUS']:
- # If the array is a contiguous view, no need to pass the strides
- strides = None
- total_buffer_len = None
- else:
- # Compute the total number of items to map from which the strided
- # view will be extracted.
- strides = a.strides
- total_buffer_len = (a_end - a_start) // a.itemsize
- return (_strided_from_memmap,
- (m.filename, a.dtype, m.mode, offset, order, a.shape, strides,
- total_buffer_len))
-
-
-def reduce_memmap(a):
- """Pickle the descriptors of a memmap instance to reopen on same file."""
- m = _get_backing_memmap(a)
- if m is not None:
- # m is a real mmap backed memmap instance, reduce a preserving striding
- # information
- return _reduce_memmap_backed(a, m)
- else:
- # This memmap instance is actually backed by a regular in-memory
- # buffer: this can happen when using binary operators on numpy.memmap
- # instances
- return (loads, (dumps(np.asarray(a), protocol=HIGHEST_PROTOCOL),))
-
-
-class ArrayMemmapReducer(object):
- """Reducer callable to dump large arrays to memmap files.
-
- Parameters
- ----------
- max_nbytes: int
- Threshold to trigger memmaping of large arrays to files created
- a folder.
- temp_folder: str
- Path of a folder where files for backing memmaped arrays are created.
- mmap_mode: 'r', 'r+' or 'c'
- Mode for the created memmap datastructure. See the documentation of
- numpy.memmap for more details. Note: 'w+' is coerced to 'r+'
- automatically to avoid zeroing the data on unpickling.
- verbose: int, optional, 0 by default
- If verbose > 0, memmap creations are logged.
- If verbose > 1, both memmap creations, reuse and array pickling are
- logged.
- prewarm: bool, optional, False by default.
- Force a read on newly memmaped array to make sure that OS pre-cache it
- memory. This can be useful to avoid concurrent disk access when the
- same data array is passed to different worker processes.
- """
-
- def __init__(self, max_nbytes, temp_folder, mmap_mode, verbose=0,
- context_id=None, prewarm=True):
- self._max_nbytes = max_nbytes
- self._temp_folder = temp_folder
- self._mmap_mode = mmap_mode
- self.verbose = int(verbose)
- self._prewarm = prewarm
- if context_id is not None:
- warnings.warn('context_id is deprecated and ignored in joblib'
- ' 0.9.4 and will be removed in 0.11',
- DeprecationWarning)
-
- def __call__(self, a):
- m = _get_backing_memmap(a)
- if m is not None:
- # a is already backed by a memmap file, let's reuse it directly
- return _reduce_memmap_backed(a, m)
-
- if (not a.dtype.hasobject
- and self._max_nbytes is not None
- and a.nbytes > self._max_nbytes):
- # check that the folder exists (lazily create the pool temp folder
- # if required)
- try:
- os.makedirs(self._temp_folder)
- os.chmod(self._temp_folder, FOLDER_PERMISSIONS)
- except OSError as e:
- if e.errno != errno.EEXIST:
- raise e
-
- # Find a unique, concurrent safe filename for writing the
- # content of this array only once.
- basename = "%d-%d-%s.pkl" % (
- os.getpid(), id(threading.current_thread()), hash(a))
- filename = os.path.join(self._temp_folder, basename)
-
- # In case the same array with the same content is passed several
- # times to the pool subprocess children, serialize it only once
-
- # XXX: implement an explicit reference counting scheme to make it
- # possible to delete temporary files as soon as the workers are
- # done processing this data.
- if not os.path.exists(filename):
- if self.verbose > 0:
- print("Memmaping (shape=%r, dtype=%s) to new file %s" % (
- a.shape, a.dtype, filename))
- for dumped_filename in dump(a, filename):
- os.chmod(dumped_filename, FILE_PERMISSIONS)
-
- if self._prewarm:
- # Warm up the data to avoid concurrent disk access in
- # multiple children processes
- load(filename, mmap_mode=self._mmap_mode).max()
- elif self.verbose > 1:
- print("Memmaping (shape=%s, dtype=%s) to old file %s" % (
- a.shape, a.dtype, filename))
-
- # The worker process will use joblib.load to memmap the data
- return (load, (filename, self._mmap_mode))
- else:
- # do not convert a into memmap, let pickler do its usual copy with
- # the default system pickler
- if self.verbose > 1:
- print("Pickling array (shape=%r, dtype=%s)." % (
- a.shape, a.dtype))
- return (loads, (dumps(a, protocol=HIGHEST_PROTOCOL),))
-
-
-###############################################################################
-# Enable custom pickling in Pool queues
-
-class CustomizablePickler(Pickler):
- """Pickler that accepts custom reducers.
-
- HIGHEST_PROTOCOL is selected by default as this pickler is used
- to pickle ephemeral datastructures for interprocess communication
- hence no backward compatibility is required.
-
- `reducers` is expected to be a dictionary with key/values
- being `(type, callable)` pairs where `callable` is a function that
- give an instance of `type` will return a tuple `(constructor,
- tuple_of_objects)` to rebuild an instance out of the pickled
- `tuple_of_objects` as would return a `__reduce__` method. See the
- standard library documentation on pickling for more details.
-
- """
-
- # We override the pure Python pickler as its the only way to be able to
- # customize the dispatch table without side effects in Python 2.6
- # to 3.2. For Python 3.3+ leverage the new dispatch_table
- # feature from http://bugs.python.org/issue14166 that makes it possible
- # to use the C implementation of the Pickler which is faster.
-
- def __init__(self, writer, reducers=None, protocol=HIGHEST_PROTOCOL):
- Pickler.__init__(self, writer, protocol=protocol)
- if reducers is None:
- reducers = {}
- if hasattr(Pickler, 'dispatch'):
- # Make the dispatch registry an instance level attribute instead of
- # a reference to the class dictionary under Python 2
- self.dispatch = Pickler.dispatch.copy()
- else:
- # Under Python 3 initialize the dispatch table with a copy of the
- # default registry
- self.dispatch_table = copyreg.dispatch_table.copy()
- for type, reduce_func in reducers.items():
- self.register(type, reduce_func)
-
- def register(self, type, reduce_func):
- """Attach a reducer function to a given type in the dispatch table."""
- if hasattr(Pickler, 'dispatch'):
- # Python 2 pickler dispatching is not explicitly customizable.
- # Let us use a closure to workaround this limitation.
- def dispatcher(self, obj):
- reduced = reduce_func(obj)
- self.save_reduce(obj=obj, *reduced)
- self.dispatch[type] = dispatcher
- else:
- self.dispatch_table[type] = reduce_func
-
-
-class CustomizablePicklingQueue(object):
- """Locked Pipe implementation that uses a customizable pickler.
-
- This class is an alternative to the multiprocessing implementation
- of SimpleQueue in order to make it possible to pass custom
- pickling reducers, for instance to avoid memory copy when passing
- memory mapped datastructures.
-
- `reducers` is expected to be a dict with key / values being
- `(type, callable)` pairs where `callable` is a function that, given an
- instance of `type`, will return a tuple `(constructor, tuple_of_objects)`
- to rebuild an instance out of the pickled `tuple_of_objects` as would
- return a `__reduce__` method.
-
- See the standard library documentation on pickling for more details.
- """
-
- def __init__(self, context, reducers=None):
- self._reducers = reducers
- self._reader, self._writer = context.Pipe(duplex=False)
- self._rlock = context.Lock()
- if sys.platform == 'win32':
- self._wlock = None
- else:
- self._wlock = context.Lock()
- self._make_methods()
-
- def __getstate__(self):
- assert_spawning(self)
- return (self._reader, self._writer, self._rlock, self._wlock,
- self._reducers)
-
- def __setstate__(self, state):
- (self._reader, self._writer, self._rlock, self._wlock,
- self._reducers) = state
- self._make_methods()
-
- def empty(self):
- return not self._reader.poll()
-
- def _make_methods(self):
- self._recv = recv = self._reader.recv
- racquire, rrelease = self._rlock.acquire, self._rlock.release
-
- def get():
- racquire()
- try:
- return recv()
- finally:
- rrelease()
-
- self.get = get
-
- if self._reducers:
- def send(obj):
- buffer = BytesIO()
- CustomizablePickler(buffer, self._reducers).dump(obj)
- self._writer.send_bytes(buffer.getvalue())
- self._send = send
- else:
- self._send = send = self._writer.send
- if self._wlock is None:
- # writes to a message oriented win32 pipe are atomic
- self.put = send
- else:
- wlock_acquire, wlock_release = (
- self._wlock.acquire, self._wlock.release)
-
- def put(obj):
- wlock_acquire()
- try:
- return send(obj)
- finally:
- wlock_release()
-
- self.put = put
-
-
-class PicklingPool(Pool):
- """Pool implementation with customizable pickling reducers.
-
- This is useful to control how data is shipped between processes
- and makes it possible to use shared memory without useless
- copies induces by the default pickling methods of the original
- objects passed as arguments to dispatch.
-
- `forward_reducers` and `backward_reducers` are expected to be
- dictionaries with key/values being `(type, callable)` pairs where
- `callable` is a function that, given an instance of `type`, will return a
- tuple `(constructor, tuple_of_objects)` to rebuild an instance out of the
- pickled `tuple_of_objects` as would return a `__reduce__` method.
- See the standard library documentation about pickling for more details.
-
- """
-
- def __init__(self, processes=None, forward_reducers=None,
- backward_reducers=None, **kwargs):
- if forward_reducers is None:
- forward_reducers = dict()
- if backward_reducers is None:
- backward_reducers = dict()
- self._forward_reducers = forward_reducers
- self._backward_reducers = backward_reducers
- poolargs = dict(processes=processes)
- poolargs.update(kwargs)
- super(PicklingPool, self).__init__(**poolargs)
-
- def _setup_queues(self):
- context = getattr(self, '_ctx', mp)
- self._inqueue = CustomizablePicklingQueue(context,
- self._forward_reducers)
- self._outqueue = CustomizablePicklingQueue(context,
- self._backward_reducers)
- self._quick_put = self._inqueue._send
- self._quick_get = self._outqueue._recv
-
-
-def delete_folder(folder_path):
- """Utility function to cleanup a temporary folder if still existing."""
- try:
- if os.path.exists(folder_path):
- shutil.rmtree(folder_path)
- except WindowsError:
- warnings.warn("Failed to clean temporary folder: %s" % folder_path)
-
-
-class MemmapingPool(PicklingPool):
- """Process pool that shares large arrays to avoid memory copy.
-
- This drop-in replacement for `multiprocessing.pool.Pool` makes
- it possible to work efficiently with shared memory in a numpy
- context.
-
- Existing instances of numpy.memmap are preserved: the child
- suprocesses will have access to the same shared memory in the
- original mode except for the 'w+' mode that is automatically
- transformed as 'r+' to avoid zeroing the original data upon
- instantiation.
-
- Furthermore large arrays from the parent process are automatically
- dumped to a temporary folder on the filesystem such as child
- processes to access their content via memmaping (file system
- backed shared memory).
-
- Note: it is important to call the terminate method to collect
- the temporary folder used by the pool.
-
- Parameters
- ----------
- processes: int, optional
- Number of worker processes running concurrently in the pool.
- initializer: callable, optional
- Callable executed on worker process creation.
- initargs: tuple, optional
- Arguments passed to the initializer callable.
- temp_folder: str, optional
- Folder to be used by the pool for memmaping large arrays
- for sharing memory with worker processes. If None, this will try in
- order:
- - a folder pointed by the JOBLIB_TEMP_FOLDER environment variable,
- - /dev/shm if the folder exists and is writable: this is a RAMdisk
- filesystem available by default on modern Linux distributions,
- - the default system temporary folder that can be overridden
- with TMP, TMPDIR or TEMP environment variables, typically /tmp
- under Unix operating systems.
- max_nbytes int or None, optional, 1e6 by default
- Threshold on the size of arrays passed to the workers that
- triggers automated memory mapping in temp_folder.
- Use None to disable memmaping of large arrays.
- mmap_mode: {'r+', 'r', 'w+', 'c'}
- Memmapping mode for numpy arrays passed to workers.
- See 'max_nbytes' parameter documentation for more details.
- forward_reducers: dictionary, optional
- Reducers used to pickle objects passed from master to worker
- processes: see below.
- backward_reducers: dictionary, optional
- Reducers used to pickle return values from workers back to the
- master process.
- verbose: int, optional
- Make it possible to monitor how the communication of numpy arrays
- with the subprocess is handled (pickling or memmaping)
- prewarm: bool or str, optional, "auto" by default.
- If True, force a read on newly memmaped array to make sure that OS pre-
- cache it in memory. This can be useful to avoid concurrent disk access
- when the same data array is passed to different worker processes.
- If "auto" (by default), prewarm is set to True, unless the Linux shared
- memory partition /dev/shm is available and used as temp_folder.
-
- `forward_reducers` and `backward_reducers` are expected to be
- dictionaries with key/values being `(type, callable)` pairs where
- `callable` is a function that give an instance of `type` will return
- a tuple `(constructor, tuple_of_objects)` to rebuild an instance out
- of the pickled `tuple_of_objects` as would return a `__reduce__`
- method. See the standard library documentation on pickling for more
- details.
-
- """
-
- def __init__(self, processes=None, temp_folder=None, max_nbytes=1e6,
- mmap_mode='r', forward_reducers=None, backward_reducers=None,
- verbose=0, context_id=None, prewarm=False, **kwargs):
- if forward_reducers is None:
- forward_reducers = dict()
- if backward_reducers is None:
- backward_reducers = dict()
- if context_id is not None:
- warnings.warn('context_id is deprecated and ignored in joblib'
- ' 0.9.4 and will be removed in 0.11',
- DeprecationWarning)
-
- # Prepare a sub-folder name for the serialization of this particular
- # pool instance (do not create in advance to spare FS write access if
- # no array is to be dumped):
- use_shared_mem = False
- pool_folder_name = "joblib_memmaping_pool_%d_%d" % (
- os.getpid(), id(self))
- if temp_folder is None:
- temp_folder = os.environ.get('JOBLIB_TEMP_FOLDER', None)
- if temp_folder is None:
- if os.path.exists(SYSTEM_SHARED_MEM_FS):
- try:
- temp_folder = SYSTEM_SHARED_MEM_FS
- pool_folder = os.path.join(temp_folder, pool_folder_name)
- if not os.path.exists(pool_folder):
- os.makedirs(pool_folder)
- use_shared_mem = True
- except IOError:
- # Missing rights in the the /dev/shm partition,
- # fallback to regular temp folder.
- temp_folder = None
- if temp_folder is None:
- # Fallback to the default tmp folder, typically /tmp
- temp_folder = tempfile.gettempdir()
- temp_folder = os.path.abspath(os.path.expanduser(temp_folder))
- pool_folder = os.path.join(temp_folder, pool_folder_name)
- self._temp_folder = pool_folder
-
- # Register the garbage collector at program exit in case caller forgets
- # to call terminate explicitly: note we do not pass any reference to
- # self to ensure that this callback won't prevent garbage collection of
- # the pool instance and related file handler resources such as POSIX
- # semaphores and pipes
- pool_module_name = whichmodule(delete_folder, 'delete_folder')
-
- def _cleanup():
- # In some cases the Python runtime seems to set delete_folder to
- # None just before exiting when accessing the delete_folder
- # function from the closure namespace. So instead we reimport
- # the delete_folder function explicitly.
- # https://github.com/joblib/joblib/issues/328
- # We cannot just use from 'joblib.pool import delete_folder'
- # because joblib should only use relative imports to allow
- # easy vendoring.
- delete_folder = __import__(
- pool_module_name, fromlist=['delete_folder']).delete_folder
- delete_folder(pool_folder)
-
- atexit.register(_cleanup)
-
- if np is not None:
- # Register smart numpy.ndarray reducers that detects memmap backed
- # arrays and that is alse able to dump to memmap large in-memory
- # arrays over the max_nbytes threshold
- if prewarm == "auto":
- prewarm = not use_shared_mem
- forward_reduce_ndarray = ArrayMemmapReducer(
- max_nbytes, pool_folder, mmap_mode, verbose,
- prewarm=prewarm)
- forward_reducers[np.ndarray] = forward_reduce_ndarray
- forward_reducers[np.memmap] = reduce_memmap
-
- # Communication from child process to the parent process always
- # pickles in-memory numpy.ndarray without dumping them as memmap
- # to avoid confusing the caller and make it tricky to collect the
- # temporary folder
- backward_reduce_ndarray = ArrayMemmapReducer(
- None, pool_folder, mmap_mode, verbose)
- backward_reducers[np.ndarray] = backward_reduce_ndarray
- backward_reducers[np.memmap] = reduce_memmap
-
- poolargs = dict(
- processes=processes,
- forward_reducers=forward_reducers,
- backward_reducers=backward_reducers)
- poolargs.update(kwargs)
- super(MemmapingPool, self).__init__(**poolargs)
-
- def terminate(self):
- n_retries = 10
- for i in range(n_retries):
- try:
- super(MemmapingPool, self).terminate()
- break
- except WindowsError as e:
- # Workaround occasional "[Error 5] Access is denied" issue
- # when trying to terminate a process under windows.
- sleep(0.1)
- if i + 1 == n_retries:
- warnings.warn("Failed to terminate worker processes in "
- " multiprocessing pool: %r" % e)
- delete_folder(self._temp_folder)
diff --git a/mloop/localsklearn/externals/joblib/testing.py b/mloop/localsklearn/externals/joblib/testing.py
deleted file mode 100644
index 21dfbc8..0000000
--- a/mloop/localsklearn/externals/joblib/testing.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""
-Helper for testing.
-"""
-
-import sys
-import warnings
-import os.path
-import re
-import subprocess
-import threading
-
-from sklearn.externals.joblib._compat import PY3_OR_LATER
-
-
-def warnings_to_stdout():
- """ Redirect all warnings to stdout.
- """
- showwarning_orig = warnings.showwarning
-
- def showwarning(msg, cat, fname, lno, file=None, line=0):
- showwarning_orig(msg, cat, os.path.basename(fname), line, sys.stdout)
-
- warnings.showwarning = showwarning
- #warnings.simplefilter('always')
-
-
-try:
- from nose.tools import assert_raises_regex
-except ImportError:
- # For Python 2.7
- try:
- from nose.tools import assert_raises_regexp as assert_raises_regex
- except ImportError:
- # for Python 2.6
- def assert_raises_regex(expected_exception, expected_regexp,
- callable_obj=None, *args, **kwargs):
- """Helper function to check for message patterns in exceptions"""
-
- not_raised = False
- try:
- callable_obj(*args, **kwargs)
- not_raised = True
- except Exception as e:
- error_message = str(e)
- if not re.compile(expected_regexp).search(error_message):
- raise AssertionError("Error message should match pattern "
- "%r. %r does not." %
- (expected_regexp, error_message))
- if not_raised:
- raise AssertionError("Should have raised %r" %
- expected_exception(expected_regexp))
-
-
-def check_subprocess_call(cmd, timeout=1, stdout_regex=None):
- """Runs a command in a subprocess with timeout in seconds.
-
- Also checks returncode is zero and stdout if stdout_regex is set.
- """
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
-
- def kill_process():
- proc.kill()
-
- timer = threading.Timer(timeout, kill_process)
- try:
- timer.start()
- stdout, stderr = proc.communicate()
-
- if PY3_OR_LATER:
- stdout, stderr = stdout.decode(), stderr.decode()
- if proc.returncode != 0:
- message = (
- 'Non-zero return code: {0}.\nStdout:\n{1}\n'
- 'Stderr:\n{2}').format(
- proc.returncode, stdout, stderr)
- raise ValueError(message)
-
- if (stdout_regex is not None and
- not re.search(stdout_regex, stdout)):
- raise ValueError(
- "Unexpected output: '{0!r}' does not match:\n{1!r}".format(
- stdout_regex, stdout))
- finally:
- timer.cancel()
diff --git a/mloop/localsklearn/externals/odict.py b/mloop/localsklearn/externals/odict.py
deleted file mode 100644
index 2880863..0000000
--- a/mloop/localsklearn/externals/odict.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
-# Passes Python2.7's test suite and incorporates all the latest updates.
-# Copyright 2009 Raymond Hettinger
-# http://code.activestate.com/recipes/576693/
-"Ordered dictionary"
-
-try:
- from thread import get_ident as _get_ident
-except ImportError:
- try:
- from dummy_thread import get_ident as _get_ident
- except ImportError:
- # Ensure that this module is still importable under Python3 to avoid
- # crashing code-inspecting tools like nose.
- from _dummy_thread import get_ident as _get_ident
-
-try:
- from _abcoll import KeysView, ValuesView, ItemsView
-except ImportError:
- pass
-
-
-class OrderedDict(dict):
- 'Dictionary that remembers insertion order'
- # An inherited dict maps keys to values.
- # The inherited dict provides __getitem__, __len__, __contains__, and get.
- # The remaining methods are order-aware.
- # Big-O running times for all methods are the same as for regular dictionaries.
-
- # The internal self.__map dictionary maps keys to links in a doubly linked list.
- # The circular doubly linked list starts and ends with a sentinel element.
- # The sentinel element never gets deleted (this simplifies the algorithm).
- # Each link is stored as a list of length three: [PREV, NEXT, KEY].
-
- def __init__(self, *args, **kwds):
- '''Initialize an ordered dictionary. Signature is the same as for
- regular dictionaries, but keyword arguments are not recommended
- because their insertion order is arbitrary.
-
- '''
- if len(args) > 1:
- raise TypeError('expected at most 1 arguments, got %d' % len(args))
- try:
- self.__root
- except AttributeError:
- self.__root = root = [] # sentinel node
- root[:] = [root, root, None]
- self.__map = {}
- self.__update(*args, **kwds)
-
- def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
- 'od.__setitem__(i, y) <==> od[i]=y'
- # Setting a new item creates a new link which goes at the end of the linked
- # list, and the inherited dictionary is updated with the new key/value pair.
- if key not in self:
- root = self.__root
- last = root[0]
- last[1] = root[0] = self.__map[key] = [last, root, key]
- dict_setitem(self, key, value)
-
- def __delitem__(self, key, dict_delitem=dict.__delitem__):
- 'od.__delitem__(y) <==> del od[y]'
- # Deleting an existing item uses self.__map to find the link which is
- # then removed by updating the links in the predecessor and successor nodes.
- dict_delitem(self, key)
- link_prev, link_next, key = self.__map.pop(key)
- link_prev[1] = link_next
- link_next[0] = link_prev
-
- def __iter__(self):
- 'od.__iter__() <==> iter(od)'
- root = self.__root
- curr = root[1]
- while curr is not root:
- yield curr[2]
- curr = curr[1]
-
- def __reversed__(self):
- 'od.__reversed__() <==> reversed(od)'
- root = self.__root
- curr = root[0]
- while curr is not root:
- yield curr[2]
- curr = curr[0]
-
- def clear(self):
- 'od.clear() -> None. Remove all items from od.'
- try:
- for node in self.__map.itervalues():
- del node[:]
- root = self.__root
- root[:] = [root, root, None]
- self.__map.clear()
- except AttributeError:
- pass
- dict.clear(self)
-
- def popitem(self, last=True):
- '''od.popitem() -> (k, v), return and remove a (key, value) pair.
- Pairs are returned in LIFO order if last is true or FIFO order if false.
-
- '''
- if not self:
- raise KeyError('dictionary is empty')
- root = self.__root
- if last:
- link = root[0]
- link_prev = link[0]
- link_prev[1] = root
- root[0] = link_prev
- else:
- link = root[1]
- link_next = link[1]
- root[1] = link_next
- link_next[0] = root
- key = link[2]
- del self.__map[key]
- value = dict.pop(self, key)
- return key, value
-
- # -- the following methods do not depend on the internal structure --
-
- def keys(self):
- 'od.keys() -> list of keys in od'
- return list(self)
-
- def values(self):
- 'od.values() -> list of values in od'
- return [self[key] for key in self]
-
- def items(self):
- 'od.items() -> list of (key, value) pairs in od'
- return [(key, self[key]) for key in self]
-
- def iterkeys(self):
- 'od.iterkeys() -> an iterator over the keys in od'
- return iter(self)
-
- def itervalues(self):
- 'od.itervalues -> an iterator over the values in od'
- for k in self:
- yield self[k]
-
- def iteritems(self):
- 'od.iteritems -> an iterator over the (key, value) items in od'
- for k in self:
- yield (k, self[k])
-
- def update(*args, **kwds):
- '''od.update(E, **F) -> None. Update od from dict/iterable E and F.
-
- If E is a dict instance, does: for k in E: od[k] = E[k]
- If E has a .keys() method, does: for k in E.keys(): od[k] = E[k]
- Or if E is an iterable of items, does: for k, v in E: od[k] = v
- In either case, this is followed by: for k, v in F.items(): od[k] = v
-
- '''
- if len(args) > 2:
- raise TypeError('update() takes at most 2 positional '
- 'arguments (%d given)' % (len(args),))
- elif not args:
- raise TypeError('update() takes at least 1 argument (0 given)')
- self = args[0]
- # Make progressively weaker assumptions about "other"
- other = ()
- if len(args) == 2:
- other = args[1]
- if isinstance(other, dict):
- for key in other:
- self[key] = other[key]
- elif hasattr(other, 'keys'):
- for key in other.keys():
- self[key] = other[key]
- else:
- for key, value in other:
- self[key] = value
- for key, value in kwds.items():
- self[key] = value
-
- __update = update # let subclasses override update without breaking __init__
-
- __marker = object()
-
- def pop(self, key, default=__marker):
- '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
- If key is not found, d is returned if given, otherwise KeyError is raised.
-
- '''
- if key in self:
- result = self[key]
- del self[key]
- return result
- if default is self.__marker:
- raise KeyError(key)
- return default
-
- def setdefault(self, key, default=None):
- 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
- if key in self:
- return self[key]
- self[key] = default
- return default
-
- def __repr__(self, _repr_running={}):
- 'od.__repr__() <==> repr(od)'
- call_key = id(self), _get_ident()
- if call_key in _repr_running:
- return '...'
- _repr_running[call_key] = 1
- try:
- if not self:
- return '%s()' % (self.__class__.__name__,)
- return '%s(%r)' % (self.__class__.__name__, self.items())
- finally:
- del _repr_running[call_key]
-
- def __reduce__(self):
- 'Return state information for pickling'
- items = [[k, self[k]] for k in self]
- inst_dict = vars(self).copy()
- for k in vars(OrderedDict()):
- inst_dict.pop(k, None)
- if inst_dict:
- return (self.__class__, (items,), inst_dict)
- return self.__class__, (items,)
-
- def copy(self):
- 'od.copy() -> a shallow copy of od'
- return self.__class__(self)
-
- @classmethod
- def fromkeys(cls, iterable, value=None):
- '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
- and values equal to v (which defaults to None).
-
- '''
- d = cls()
- for key in iterable:
- d[key] = value
- return d
-
- def __eq__(self, other):
- '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
- while comparison to a regular mapping is order-insensitive.
-
- '''
- if isinstance(other, OrderedDict):
- return len(self)==len(other) and self.items() == other.items()
- return dict.__eq__(self, other)
-
- def __ne__(self, other):
- return not self == other
-
- # -- the following methods are only used in Python 2.7 --
-
- def viewkeys(self):
- "od.viewkeys() -> a set-like object providing a view on od's keys"
- return KeysView(self)
-
- def viewvalues(self):
- "od.viewvalues() -> an object providing a view on od's values"
- return ValuesView(self)
-
- def viewitems(self):
- "od.viewitems() -> a set-like object providing a view on od's items"
- return ItemsView(self)
diff --git a/mloop/localsklearn/externals/setup.py b/mloop/localsklearn/externals/setup.py
deleted file mode 100644
index 936f032..0000000
--- a/mloop/localsklearn/externals/setup.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# -*- coding: utf-8 -*-
-
-
-def configuration(parent_package='', top_path=None):
- from numpy.distutils.misc_util import Configuration
- config = Configuration('externals', parent_package, top_path)
- config.add_subpackage('joblib')
-
- return config
diff --git a/mloop/localsklearn/externals/six.py b/mloop/localsklearn/externals/six.py
deleted file mode 100644
index 85898ec..0000000
--- a/mloop/localsklearn/externals/six.py
+++ /dev/null
@@ -1,577 +0,0 @@
-"""Utilities for writing code that runs on Python 2 and 3"""
-
-# Copyright (c) 2010-2013 Benjamin Peterson
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import operator
-import sys
-import types
-
-__author__ = "Benjamin Peterson "
-__version__ = "1.4.1"
-
-
-# Useful for very coarse version differentiation.
-PY2 = sys.version_info[0] == 2
-PY3 = sys.version_info[0] == 3
-
-if PY3:
- string_types = str,
- integer_types = int,
- class_types = type,
- text_type = str
- binary_type = bytes
-
- MAXSIZE = sys.maxsize
-else:
- string_types = basestring,
- integer_types = (int, long)
- class_types = (type, types.ClassType)
- text_type = unicode
- binary_type = str
-
- if sys.platform.startswith("java"):
- # Jython always uses 32 bits.
- MAXSIZE = int((1 << 31) - 1)
- else:
- # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
- class X(object):
- def __len__(self):
- return 1 << 31
- try:
- len(X())
- except OverflowError:
- # 32-bit
- MAXSIZE = int((1 << 31) - 1)
- else:
- # 64-bit
- MAXSIZE = int((1 << 63) - 1)
- del X
-
-
-def _add_doc(func, doc):
- """Add documentation to a function."""
- func.__doc__ = doc
-
-
-def _import_module(name):
- """Import module, returning the module after the last dot."""
- __import__(name)
- return sys.modules[name]
-
-
-class _LazyDescr(object):
-
- def __init__(self, name):
- self.name = name
-
- def __get__(self, obj, tp):
- result = self._resolve()
- setattr(obj, self.name, result)
- # This is a bit ugly, but it avoids running this again.
- delattr(tp, self.name)
- return result
-
-
-class MovedModule(_LazyDescr):
-
- def __init__(self, name, old, new=None):
- super(MovedModule, self).__init__(name)
- if PY3:
- if new is None:
- new = name
- self.mod = new
- else:
- self.mod = old
-
- def _resolve(self):
- return _import_module(self.mod)
-
-
-class MovedAttribute(_LazyDescr):
-
- def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
- super(MovedAttribute, self).__init__(name)
- if PY3:
- if new_mod is None:
- new_mod = name
- self.mod = new_mod
- if new_attr is None:
- if old_attr is None:
- new_attr = name
- else:
- new_attr = old_attr
- self.attr = new_attr
- else:
- self.mod = old_mod
- if old_attr is None:
- old_attr = name
- self.attr = old_attr
-
- def _resolve(self):
- module = _import_module(self.mod)
- return getattr(module, self.attr)
-
-
-
-class _MovedItems(types.ModuleType):
- """Lazy loading of moved objects"""
-
-
-_moved_attributes = [
- MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
- MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
- MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
- MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
- MovedAttribute("map", "itertools", "builtins", "imap", "map"),
- MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
- MovedAttribute("reload_module", "__builtin__", "imp", "reload"),
- MovedAttribute("reduce", "__builtin__", "functools"),
- MovedAttribute("StringIO", "StringIO", "io"),
- MovedAttribute("UserString", "UserString", "collections"),
- MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
- MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
- MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
-
- MovedModule("builtins", "__builtin__"),
- MovedModule("configparser", "ConfigParser"),
- MovedModule("copyreg", "copy_reg"),
- MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
- MovedModule("http_cookies", "Cookie", "http.cookies"),
- MovedModule("html_entities", "htmlentitydefs", "html.entities"),
- MovedModule("html_parser", "HTMLParser", "html.parser"),
- MovedModule("http_client", "httplib", "http.client"),
- MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
- MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
- MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
- MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
- MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
- MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
- MovedModule("cPickle", "cPickle", "pickle"),
- MovedModule("queue", "Queue"),
- MovedModule("reprlib", "repr"),
- MovedModule("socketserver", "SocketServer"),
- MovedModule("tkinter", "Tkinter"),
- MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
- MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
- MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
- MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
- MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
- MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
- MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
- MovedModule("tkinter_colorchooser", "tkColorChooser",
- "tkinter.colorchooser"),
- MovedModule("tkinter_commondialog", "tkCommonDialog",
- "tkinter.commondialog"),
- MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
- MovedModule("tkinter_font", "tkFont", "tkinter.font"),
- MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
- MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
- "tkinter.simpledialog"),
- MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
- MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
- MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
- MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
- MovedModule("winreg", "_winreg"),
-]
-for attr in _moved_attributes:
- setattr(_MovedItems, attr.name, attr)
-del attr
-
-moves = sys.modules[__name__ + ".moves"] = _MovedItems(__name__ + ".moves")
-
-
-
-class Module_six_moves_urllib_parse(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_parse"""
-
-
-_urllib_parse_moved_attributes = [
- MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
- MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
- MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
- MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
- MovedAttribute("urljoin", "urlparse", "urllib.parse"),
- MovedAttribute("urlparse", "urlparse", "urllib.parse"),
- MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
- MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
- MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
- MovedAttribute("quote", "urllib", "urllib.parse"),
- MovedAttribute("quote_plus", "urllib", "urllib.parse"),
- MovedAttribute("unquote", "urllib", "urllib.parse"),
- MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
- MovedAttribute("urlencode", "urllib", "urllib.parse"),
-]
-for attr in _urllib_parse_moved_attributes:
- setattr(Module_six_moves_urllib_parse, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse")
-sys.modules[__name__ + ".moves.urllib.parse"] = Module_six_moves_urllib_parse(__name__ + ".moves.urllib.parse")
-
-
-class Module_six_moves_urllib_error(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_error"""
-
-
-_urllib_error_moved_attributes = [
- MovedAttribute("URLError", "urllib2", "urllib.error"),
- MovedAttribute("HTTPError", "urllib2", "urllib.error"),
- MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
-]
-for attr in _urllib_error_moved_attributes:
- setattr(Module_six_moves_urllib_error, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib_error")
-sys.modules[__name__ + ".moves.urllib.error"] = Module_six_moves_urllib_error(__name__ + ".moves.urllib.error")
-
-
-class Module_six_moves_urllib_request(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_request"""
-
-
-_urllib_request_moved_attributes = [
- MovedAttribute("urlopen", "urllib2", "urllib.request"),
- MovedAttribute("install_opener", "urllib2", "urllib.request"),
- MovedAttribute("build_opener", "urllib2", "urllib.request"),
- MovedAttribute("pathname2url", "urllib", "urllib.request"),
- MovedAttribute("url2pathname", "urllib", "urllib.request"),
- MovedAttribute("getproxies", "urllib", "urllib.request"),
- MovedAttribute("Request", "urllib2", "urllib.request"),
- MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
- MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
- MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
- MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
- MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
- MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
- MovedAttribute("FileHandler", "urllib2", "urllib.request"),
- MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
- MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
- MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
- MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
- MovedAttribute("urlretrieve", "urllib", "urllib.request"),
- MovedAttribute("urlcleanup", "urllib", "urllib.request"),
- MovedAttribute("URLopener", "urllib", "urllib.request"),
- MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
-]
-for attr in _urllib_request_moved_attributes:
- setattr(Module_six_moves_urllib_request, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib_request")
-sys.modules[__name__ + ".moves.urllib.request"] = Module_six_moves_urllib_request(__name__ + ".moves.urllib.request")
-
-
-class Module_six_moves_urllib_response(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_response"""
-
-
-_urllib_response_moved_attributes = [
- MovedAttribute("addbase", "urllib", "urllib.response"),
- MovedAttribute("addclosehook", "urllib", "urllib.response"),
- MovedAttribute("addinfo", "urllib", "urllib.response"),
- MovedAttribute("addinfourl", "urllib", "urllib.response"),
-]
-for attr in _urllib_response_moved_attributes:
- setattr(Module_six_moves_urllib_response, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib_response")
-sys.modules[__name__ + ".moves.urllib.response"] = Module_six_moves_urllib_response(__name__ + ".moves.urllib.response")
-
-
-class Module_six_moves_urllib_robotparser(types.ModuleType):
- """Lazy loading of moved objects in six.moves.urllib_robotparser"""
-
-
-_urllib_robotparser_moved_attributes = [
- MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
-]
-for attr in _urllib_robotparser_moved_attributes:
- setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
-del attr
-
-sys.modules[__name__ + ".moves.urllib_robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib_robotparser")
-sys.modules[__name__ + ".moves.urllib.robotparser"] = Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser")
-
-
-class Module_six_moves_urllib(types.ModuleType):
- """Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
- parse = sys.modules[__name__ + ".moves.urllib_parse"]
- error = sys.modules[__name__ + ".moves.urllib_error"]
- request = sys.modules[__name__ + ".moves.urllib_request"]
- response = sys.modules[__name__ + ".moves.urllib_response"]
- robotparser = sys.modules[__name__ + ".moves.urllib_robotparser"]
-
-
-sys.modules[__name__ + ".moves.urllib"] = Module_six_moves_urllib(__name__ + ".moves.urllib")
-
-
-def add_move(move):
- """Add an item to six.moves."""
- setattr(_MovedItems, move.name, move)
-
-
-def remove_move(name):
- """Remove item from six.moves."""
- try:
- delattr(_MovedItems, name)
- except AttributeError:
- try:
- del moves.__dict__[name]
- except KeyError:
- raise AttributeError("no such move, %r" % (name,))
-
-
-if PY3:
- _meth_func = "__func__"
- _meth_self = "__self__"
-
- _func_closure = "__closure__"
- _func_code = "__code__"
- _func_defaults = "__defaults__"
- _func_globals = "__globals__"
-
- _iterkeys = "keys"
- _itervalues = "values"
- _iteritems = "items"
- _iterlists = "lists"
-else:
- _meth_func = "im_func"
- _meth_self = "im_self"
-
- _func_closure = "func_closure"
- _func_code = "func_code"
- _func_defaults = "func_defaults"
- _func_globals = "func_globals"
-
- _iterkeys = "iterkeys"
- _itervalues = "itervalues"
- _iteritems = "iteritems"
- _iterlists = "iterlists"
-
-
-try:
- advance_iterator = next
-except NameError:
- def advance_iterator(it):
- return it.next()
-next = advance_iterator
-
-
-try:
- callable = callable
-except NameError:
- def callable(obj):
- return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
-
-
-if PY3:
- def get_unbound_function(unbound):
- return unbound
-
- create_bound_method = types.MethodType
-
- Iterator = object
-else:
- def get_unbound_function(unbound):
- return unbound.im_func
-
- def create_bound_method(func, obj):
- return types.MethodType(func, obj, obj.__class__)
-
- class Iterator(object):
-
- def next(self):
- return type(self).__next__(self)
-
- callable = callable
-_add_doc(get_unbound_function,
- """Get the function out of a possibly unbound function""")
-
-
-get_method_function = operator.attrgetter(_meth_func)
-get_method_self = operator.attrgetter(_meth_self)
-get_function_closure = operator.attrgetter(_func_closure)
-get_function_code = operator.attrgetter(_func_code)
-get_function_defaults = operator.attrgetter(_func_defaults)
-get_function_globals = operator.attrgetter(_func_globals)
-
-
-def iterkeys(d, **kw):
- """Return an iterator over the keys of a dictionary."""
- return iter(getattr(d, _iterkeys)(**kw))
-
-def itervalues(d, **kw):
- """Return an iterator over the values of a dictionary."""
- return iter(getattr(d, _itervalues)(**kw))
-
-def iteritems(d, **kw):
- """Return an iterator over the (key, value) pairs of a dictionary."""
- return iter(getattr(d, _iteritems)(**kw))
-
-def iterlists(d, **kw):
- """Return an iterator over the (key, [values]) pairs of a dictionary."""
- return iter(getattr(d, _iterlists)(**kw))
-
-
-if PY3:
- def b(s):
- return s.encode("latin-1")
- def u(s):
- return s
- unichr = chr
- if sys.version_info[1] <= 1:
- def int2byte(i):
- return bytes((i,))
- else:
- # This is about 2x faster than the implementation above on 3.2+
- int2byte = operator.methodcaller("to_bytes", 1, "big")
- byte2int = operator.itemgetter(0)
- indexbytes = operator.getitem
- iterbytes = iter
- import io
- StringIO = io.StringIO
- BytesIO = io.BytesIO
-else:
- def b(s):
- return s
- def u(s):
- return unicode(s, "unicode_escape")
- unichr = unichr
- int2byte = chr
- def byte2int(bs):
- return ord(bs[0])
- def indexbytes(buf, i):
- return ord(buf[i])
- def iterbytes(buf):
- return (ord(byte) for byte in buf)
- import StringIO
- StringIO = BytesIO = StringIO.StringIO
-_add_doc(b, """Byte literal""")
-_add_doc(u, """Text literal""")
-
-
-if PY3:
- import builtins
- exec_ = getattr(builtins, "exec")
-
-
- def reraise(tp, value, tb=None):
- if value.__traceback__ is not tb:
- raise value.with_traceback(tb)
- raise value
-
-
- print_ = getattr(builtins, "print")
- del builtins
-
-else:
- def exec_(_code_, _globs_=None, _locs_=None):
- """Execute code in a namespace."""
- if _globs_ is None:
- frame = sys._getframe(1)
- _globs_ = frame.f_globals
- if _locs_ is None:
- _locs_ = frame.f_locals
- del frame
- elif _locs_ is None:
- _locs_ = _globs_
- exec("""exec _code_ in _globs_, _locs_""")
-
-
- exec_("""def reraise(tp, value, tb=None):
- raise tp, value, tb
-""")
-
-
- def print_(*args, **kwargs):
- """The new-style print function."""
- fp = kwargs.pop("file", sys.stdout)
- if fp is None:
- return
- def write(data):
- if not isinstance(data, basestring):
- data = str(data)
- fp.write(data)
- want_unicode = False
- sep = kwargs.pop("sep", None)
- if sep is not None:
- if isinstance(sep, unicode):
- want_unicode = True
- elif not isinstance(sep, str):
- raise TypeError("sep must be None or a string")
- end = kwargs.pop("end", None)
- if end is not None:
- if isinstance(end, unicode):
- want_unicode = True
- elif not isinstance(end, str):
- raise TypeError("end must be None or a string")
- if kwargs:
- raise TypeError("invalid keyword arguments to print()")
- if not want_unicode:
- for arg in args:
- if isinstance(arg, unicode):
- want_unicode = True
- break
- if want_unicode:
- newline = unicode("\n")
- space = unicode(" ")
- else:
- newline = "\n"
- space = " "
- if sep is None:
- sep = space
- if end is None:
- end = newline
- for i, arg in enumerate(args):
- if i:
- write(sep)
- write(arg)
- write(end)
-
-_add_doc(reraise, """Reraise an exception.""")
-
-
-def with_metaclass(meta, *bases):
- """Create a base class with a metaclass."""
- return meta("NewBase", bases, {})
-
-def add_metaclass(metaclass):
- """Class decorator for creating a class with a metaclass."""
- def wrapper(cls):
- orig_vars = cls.__dict__.copy()
- orig_vars.pop('__dict__', None)
- orig_vars.pop('__weakref__', None)
- for slots_var in orig_vars.get('__slots__', ()):
- orig_vars.pop(slots_var)
- return metaclass(cls.__name__, cls.__bases__, orig_vars)
- return wrapper
diff --git a/mloop/localsklearn/gaussian_process/__init__.py b/mloop/localsklearn/gaussian_process/__init__.py
deleted file mode 100644
index aaeb4a9..0000000
--- a/mloop/localsklearn/gaussian_process/__init__.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Author: Jan Hendrik Metzen
-# Vincent Dubourg
-# (mostly translation, see implementation details)
-# License: BSD 3 clause
-
-"""
-The :mod:`sklearn.gaussian_process` module implements Gaussian Process
-based regression and classification.
-"""
-
-from .gpr import GaussianProcessRegressor
-#from .gpc import GaussianProcessClassifier
-from . import kernels
-
-from .gaussian_process import GaussianProcess
-from . import correlation_models
-from . import regression_models
-
-__all__ = ['GaussianProcess', 'correlation_models', 'regression_models',
- 'GaussianProcessRegressor',
- 'kernels']
diff --git a/mloop/localsklearn/gaussian_process/correlation_models.py b/mloop/localsklearn/gaussian_process/correlation_models.py
deleted file mode 100644
index 1678e70..0000000
--- a/mloop/localsklearn/gaussian_process/correlation_models.py
+++ /dev/null
@@ -1,284 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Author: Vincent Dubourg
-# (mostly translation, see implementation details)
-# License: BSD 3 clause
-
-"""
-The built-in correlation models submodule for the gaussian_process module.
-"""
-
-
-import numpy as np
-
-
-def absolute_exponential(theta, d):
- """
- Absolute exponential autocorrelation model.
- (Ornstein-Uhlenbeck stochastic process)::
-
- n
- theta, d --> r(theta, d) = exp( sum - theta_i * |d_i| )
- i = 1
-
- Parameters
- ----------
- theta : array_like
- An array with shape 1 (isotropic) or n (anisotropic) giving the
- autocorrelation parameter(s).
-
- d : array_like
- An array with shape (n_eval, n_features) giving the componentwise
- distances between locations x and x' at which the correlation model
- should be evaluated.
-
- Returns
- -------
- r : array_like
- An array with shape (n_eval, ) containing the values of the
- autocorrelation model.
- """
- theta = np.asarray(theta, dtype=np.float64)
- d = np.abs(np.asarray(d, dtype=np.float64))
-
- if d.ndim > 1:
- n_features = d.shape[1]
- else:
- n_features = 1
-
- if theta.size == 1:
- return np.exp(- theta[0] * np.sum(d, axis=1))
- elif theta.size != n_features:
- raise ValueError("Length of theta must be 1 or %s" % n_features)
- else:
- return np.exp(- np.sum(theta.reshape(1, n_features) * d, axis=1))
-
-
-def squared_exponential(theta, d):
- """
- Squared exponential correlation model (Radial Basis Function).
- (Infinitely differentiable stochastic process, very smooth)::
-
- n
- theta, d --> r(theta, d) = exp( sum - theta_i * (d_i)^2 )
- i = 1
-
- Parameters
- ----------
- theta : array_like
- An array with shape 1 (isotropic) or n (anisotropic) giving the
- autocorrelation parameter(s).
-
- d : array_like
- An array with shape (n_eval, n_features) giving the componentwise
- distances between locations x and x' at which the correlation model
- should be evaluated.
-
- Returns
- -------
- r : array_like
- An array with shape (n_eval, ) containing the values of the
- autocorrelation model.
- """
-
- theta = np.asarray(theta, dtype=np.float64)
- d = np.asarray(d, dtype=np.float64)
-
- if d.ndim > 1:
- n_features = d.shape[1]
- else:
- n_features = 1
-
- if theta.size == 1:
- return np.exp(-theta[0] * np.sum(d ** 2, axis=1))
- elif theta.size != n_features:
- raise ValueError("Length of theta must be 1 or %s" % n_features)
- else:
- return np.exp(-np.sum(theta.reshape(1, n_features) * d ** 2, axis=1))
-
-
-def generalized_exponential(theta, d):
- """
- Generalized exponential correlation model.
- (Useful when one does not know the smoothness of the function to be
- predicted.)::
-
- n
- theta, d --> r(theta, d) = exp( sum - theta_i * |d_i|^p )
- i = 1
-
- Parameters
- ----------
- theta : array_like
- An array with shape 1+1 (isotropic) or n+1 (anisotropic) giving the
- autocorrelation parameter(s) (theta, p).
-
- d : array_like
- An array with shape (n_eval, n_features) giving the componentwise
- distances between locations x and x' at which the correlation model
- should be evaluated.
-
- Returns
- -------
- r : array_like
- An array with shape (n_eval, ) with the values of the autocorrelation
- model.
- """
-
- theta = np.asarray(theta, dtype=np.float64)
- d = np.asarray(d, dtype=np.float64)
-
- if d.ndim > 1:
- n_features = d.shape[1]
- else:
- n_features = 1
-
- lth = theta.size
- if n_features > 1 and lth == 2:
- theta = np.hstack([np.repeat(theta[0], n_features), theta[1]])
- elif lth != n_features + 1:
- raise Exception("Length of theta must be 2 or %s" % (n_features + 1))
- else:
- theta = theta.reshape(1, lth)
-
- td = theta[:, 0:-1].reshape(1, n_features) * np.abs(d) ** theta[:, -1]
- r = np.exp(- np.sum(td, 1))
-
- return r
-
-
-def pure_nugget(theta, d):
- """
- Spatial independence correlation model (pure nugget).
- (Useful when one wants to solve an ordinary least squares problem!)::
-
- n
- theta, d --> r(theta, d) = 1 if sum |d_i| == 0
- i = 1
- 0 otherwise
-
- Parameters
- ----------
- theta : array_like
- None.
-
- d : array_like
- An array with shape (n_eval, n_features) giving the componentwise
- distances between locations x and x' at which the correlation model
- should be evaluated.
-
- Returns
- -------
- r : array_like
- An array with shape (n_eval, ) with the values of the autocorrelation
- model.
- """
-
- theta = np.asarray(theta, dtype=np.float64)
- d = np.asarray(d, dtype=np.float64)
-
- n_eval = d.shape[0]
- r = np.zeros(n_eval)
- r[np.all(d == 0., axis=1)] = 1.
-
- return r
-
-
-def cubic(theta, d):
- """
- Cubic correlation model::
-
- theta, d --> r(theta, d) =
- n
- prod max(0, 1 - 3(theta_j*d_ij)^2 + 2(theta_j*d_ij)^3) , i = 1,...,m
- j = 1
-
- Parameters
- ----------
- theta : array_like
- An array with shape 1 (isotropic) or n (anisotropic) giving the
- autocorrelation parameter(s).
-
- d : array_like
- An array with shape (n_eval, n_features) giving the componentwise
- distances between locations x and x' at which the correlation model
- should be evaluated.
-
- Returns
- -------
- r : array_like
- An array with shape (n_eval, ) with the values of the autocorrelation
- model.
- """
-
- theta = np.asarray(theta, dtype=np.float64)
- d = np.asarray(d, dtype=np.float64)
-
- if d.ndim > 1:
- n_features = d.shape[1]
- else:
- n_features = 1
-
- lth = theta.size
- if lth == 1:
- td = np.abs(d) * theta
- elif lth != n_features:
- raise Exception("Length of theta must be 1 or " + str(n_features))
- else:
- td = np.abs(d) * theta.reshape(1, n_features)
-
- td[td > 1.] = 1.
- ss = 1. - td ** 2. * (3. - 2. * td)
- r = np.prod(ss, 1)
-
- return r
-
-
-def linear(theta, d):
- """
- Linear correlation model::
-
- theta, d --> r(theta, d) =
- n
- prod max(0, 1 - theta_j*d_ij) , i = 1,...,m
- j = 1
-
- Parameters
- ----------
- theta : array_like
- An array with shape 1 (isotropic) or n (anisotropic) giving the
- autocorrelation parameter(s).
-
- d : array_like
- An array with shape (n_eval, n_features) giving the componentwise
- distances between locations x and x' at which the correlation model
- should be evaluated.
-
- Returns
- -------
- r : array_like
- An array with shape (n_eval, ) with the values of the autocorrelation
- model.
- """
-
- theta = np.asarray(theta, dtype=np.float64)
- d = np.asarray(d, dtype=np.float64)
-
- if d.ndim > 1:
- n_features = d.shape[1]
- else:
- n_features = 1
-
- lth = theta.size
- if lth == 1:
- td = np.abs(d) * theta
- elif lth != n_features:
- raise Exception("Length of theta must be 1 or %s" % n_features)
- else:
- td = np.abs(d) * theta.reshape(1, n_features)
-
- td[td > 1.] = 1.
- ss = 1. - td
- r = np.prod(ss, 1)
-
- return r
diff --git a/mloop/localsklearn/gaussian_process/gaussian_process.py b/mloop/localsklearn/gaussian_process/gaussian_process.py
deleted file mode 100644
index 19a6820..0000000
--- a/mloop/localsklearn/gaussian_process/gaussian_process.py
+++ /dev/null
@@ -1,896 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Author: Vincent Dubourg
-# (mostly translation, see implementation details)
-# License: BSD 3 clause
-
-from __future__ import print_function
-
-import numpy as np
-from scipy import linalg, optimize
-
-from ..base import BaseEstimator, RegressorMixin
-from ..metrics.pairwise import manhattan_distances
-from ..utils import check_random_state, check_array, check_X_y
-from ..utils.validation import check_is_fitted
-from . import regression_models as regression
-from . import correlation_models as correlation
-from ..utils import deprecated
-
-MACHINE_EPSILON = np.finfo(np.double).eps
-
-
-@deprecated("l1_cross_distances is deprecated and will be removed in 0.20.")
-def l1_cross_distances(X):
- """
- Computes the nonzero componentwise L1 cross-distances between the vectors
- in X.
-
- Parameters
- ----------
-
- X: array_like
- An array with shape (n_samples, n_features)
-
- Returns
- -------
-
- D: array with shape (n_samples * (n_samples - 1) / 2, n_features)
- The array of componentwise L1 cross-distances.
-
- ij: arrays with shape (n_samples * (n_samples - 1) / 2, 2)
- The indices i and j of the vectors in X associated to the cross-
- distances in D: D[k] = np.abs(X[ij[k, 0]] - Y[ij[k, 1]]).
- """
- X = check_array(X)
- n_samples, n_features = X.shape
- n_nonzero_cross_dist = n_samples * (n_samples - 1) // 2
- ij = np.zeros((n_nonzero_cross_dist, 2), dtype=np.int)
- D = np.zeros((n_nonzero_cross_dist, n_features))
- ll_1 = 0
- for k in range(n_samples - 1):
- ll_0 = ll_1
- ll_1 = ll_0 + n_samples - k - 1
- ij[ll_0:ll_1, 0] = k
- ij[ll_0:ll_1, 1] = np.arange(k + 1, n_samples)
- D[ll_0:ll_1] = np.abs(X[k] - X[(k + 1):n_samples])
-
- return D, ij
-
-
-@deprecated("GaussianProcess is deprecated and will be removed in 0.20. "
- "Use the GaussianProcessRegressor instead.")
-class GaussianProcess(BaseEstimator, RegressorMixin):
- """The legacy Gaussian Process model class.
-
- Note that this class is deprecated and will be removed in 0.20.
- Use the GaussianProcessRegressor instead.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- regr : string or callable, optional
- A regression function returning an array of outputs of the linear
- regression functional basis. The number of observations n_samples
- should be greater than the size p of this basis.
- Default assumes a simple constant regression trend.
- Available built-in regression models are::
-
- 'constant', 'linear', 'quadratic'
-
- corr : string or callable, optional
- A stationary autocorrelation function returning the autocorrelation
- between two points x and x'.
- Default assumes a squared-exponential autocorrelation model.
- Built-in correlation models are::
-
- 'absolute_exponential', 'squared_exponential',
- 'generalized_exponential', 'cubic', 'linear'
-
- beta0 : double array_like, optional
- The regression weight vector to perform Ordinary Kriging (OK).
- Default assumes Universal Kriging (UK) so that the vector beta of
- regression weights is estimated using the maximum likelihood
- principle.
-
- storage_mode : string, optional
- A string specifying whether the Cholesky decomposition of the
- correlation matrix should be stored in the class (storage_mode =
- 'full') or not (storage_mode = 'light').
- Default assumes storage_mode = 'full', so that the
- Cholesky decomposition of the correlation matrix is stored.
- This might be a useful parameter when one is not interested in the
- MSE and only plan to estimate the BLUP, for which the correlation
- matrix is not required.
-
- verbose : boolean, optional
- A boolean specifying the verbose level.
- Default is verbose = False.
-
- theta0 : double array_like, optional
- An array with shape (n_features, ) or (1, ).
- The parameters in the autocorrelation model.
- If thetaL and thetaU are also specified, theta0 is considered as
- the starting point for the maximum likelihood estimation of the
- best set of parameters.
- Default assumes isotropic autocorrelation model with theta0 = 1e-1.
-
- thetaL : double array_like, optional
- An array with shape matching theta0's.
- Lower bound on the autocorrelation parameters for maximum
- likelihood estimation.
- Default is None, so that it skips maximum likelihood estimation and
- it uses theta0.
-
- thetaU : double array_like, optional
- An array with shape matching theta0's.
- Upper bound on the autocorrelation parameters for maximum
- likelihood estimation.
- Default is None, so that it skips maximum likelihood estimation and
- it uses theta0.
-
- normalize : boolean, optional
- Input X and observations y are centered and reduced wrt
- means and standard deviations estimated from the n_samples
- observations provided.
- Default is normalize = True so that data is normalized to ease
- maximum likelihood estimation.
-
- nugget : double or ndarray, optional
- Introduce a nugget effect to allow smooth predictions from noisy
- data. If nugget is an ndarray, it must be the same length as the
- number of data points used for the fit.
- The nugget is added to the diagonal of the assumed training covariance;
- in this way it acts as a Tikhonov regularization in the problem. In
- the special case of the squared exponential correlation function, the
- nugget mathematically represents the variance of the input values.
- Default assumes a nugget close to machine precision for the sake of
- robustness (nugget = 10. * MACHINE_EPSILON).
-
- optimizer : string, optional
- A string specifying the optimization algorithm to be used.
- Default uses 'fmin_cobyla' algorithm from scipy.optimize.
- Available optimizers are::
-
- 'fmin_cobyla', 'Welch'
-
- 'Welch' optimizer is dued to Welch et al., see reference [WBSWM1992]_.
- It consists in iterating over several one-dimensional optimizations
- instead of running one single multi-dimensional optimization.
-
- random_start : int, optional
- The number of times the Maximum Likelihood Estimation should be
- performed from a random starting point.
- The first MLE always uses the specified starting point (theta0),
- the next starting points are picked at random according to an
- exponential distribution (log-uniform on [thetaL, thetaU]).
- Default does not use random starting point (random_start = 1).
-
- random_state: integer or numpy.RandomState, optional
- The generator used to shuffle the sequence of coordinates of theta in
- the Welch optimizer. If an integer is given, it fixes the seed.
- Defaults to the global numpy random number generator.
-
-
- Attributes
- ----------
- theta_ : array
- Specified theta OR the best set of autocorrelation parameters (the \
- sought maximizer of the reduced likelihood function).
-
- reduced_likelihood_function_value_ : array
- The optimal reduced likelihood function value.
-
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.gaussian_process import GaussianProcess
- >>> X = np.array([[1., 3., 5., 6., 7., 8.]]).T
- >>> y = (X * np.sin(X)).ravel()
- >>> gp = GaussianProcess(theta0=0.1, thetaL=.001, thetaU=1.)
- >>> gp.fit(X, y) # doctest: +ELLIPSIS
- GaussianProcess(beta0=None...
- ...
-
- Notes
- -----
- The presentation implementation is based on a translation of the DACE
- Matlab toolbox, see reference [NLNS2002]_.
-
- References
- ----------
-
- .. [NLNS2002] `H.B. Nielsen, S.N. Lophaven, H. B. Nielsen and J.
- Sondergaard. DACE - A MATLAB Kriging Toolbox.` (2002)
- http://imedea.uib-csic.es/master/cambioglobal/Modulo_V_cod101615/Lab/lab_maps/krigging/DACE-krigingsoft/dace/dace.pdf
-
- .. [WBSWM1992] `W.J. Welch, R.J. Buck, J. Sacks, H.P. Wynn, T.J. Mitchell,
- and M.D. Morris (1992). Screening, predicting, and computer
- experiments. Technometrics, 34(1) 15--25.`
- http://www.jstor.org/pss/1269548
- """
-
- _regression_types = {
- 'constant': regression.constant,
- 'linear': regression.linear,
- 'quadratic': regression.quadratic}
-
- _correlation_types = {
- 'absolute_exponential': correlation.absolute_exponential,
- 'squared_exponential': correlation.squared_exponential,
- 'generalized_exponential': correlation.generalized_exponential,
- 'cubic': correlation.cubic,
- 'linear': correlation.linear}
-
- _optimizer_types = [
- 'fmin_cobyla',
- 'Welch']
-
- def __init__(self, regr='constant', corr='squared_exponential', beta0=None,
- storage_mode='full', verbose=False, theta0=1e-1,
- thetaL=None, thetaU=None, optimizer='fmin_cobyla',
- random_start=1, normalize=True,
- nugget=10. * MACHINE_EPSILON, random_state=None):
-
- self.regr = regr
- self.corr = corr
- self.beta0 = beta0
- self.storage_mode = storage_mode
- self.verbose = verbose
- self.theta0 = theta0
- self.thetaL = thetaL
- self.thetaU = thetaU
- self.normalize = normalize
- self.nugget = nugget
- self.optimizer = optimizer
- self.random_start = random_start
- self.random_state = random_state
-
- def fit(self, X, y):
- """
- The Gaussian Process model fitting method.
-
- Parameters
- ----------
- X : double array_like
- An array with shape (n_samples, n_features) with the input at which
- observations were made.
-
- y : double array_like
- An array with shape (n_samples, ) or shape (n_samples, n_targets)
- with the observations of the output to be predicted.
-
- Returns
- -------
- gp : self
- A fitted Gaussian Process model object awaiting data to perform
- predictions.
- """
- # Run input checks
- self._check_params()
-
- self.random_state = check_random_state(self.random_state)
-
- # Force data to 2D numpy.array
- X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
- self.y_ndim_ = y.ndim
- if y.ndim == 1:
- y = y[:, np.newaxis]
-
- # Check shapes of DOE & observations
- n_samples, n_features = X.shape
- _, n_targets = y.shape
-
- # Run input checks
- self._check_params(n_samples)
-
- # Normalize data or don't
- if self.normalize:
- X_mean = np.mean(X, axis=0)
- X_std = np.std(X, axis=0)
- y_mean = np.mean(y, axis=0)
- y_std = np.std(y, axis=0)
- X_std[X_std == 0.] = 1.
- y_std[y_std == 0.] = 1.
- # center and scale X if necessary
- X = (X - X_mean) / X_std
- y = (y - y_mean) / y_std
- else:
- X_mean = np.zeros(1)
- X_std = np.ones(1)
- y_mean = np.zeros(1)
- y_std = np.ones(1)
-
- # Calculate matrix of distances D between samples
- D, ij = l1_cross_distances(X)
- if (np.min(np.sum(D, axis=1)) == 0.
- and self.corr != correlation.pure_nugget):
- raise Exception("Multiple input features cannot have the same"
- " target value.")
-
- # Regression matrix and parameters
- F = self.regr(X)
- n_samples_F = F.shape[0]
- if F.ndim > 1:
- p = F.shape[1]
- else:
- p = 1
- if n_samples_F != n_samples:
- raise Exception("Number of rows in F and X do not match. Most "
- "likely something is going wrong with the "
- "regression model.")
- if p > n_samples_F:
- raise Exception(("Ordinary least squares problem is undetermined "
- "n_samples=%d must be greater than the "
- "regression model size p=%d.") % (n_samples, p))
- if self.beta0 is not None:
- if self.beta0.shape[0] != p:
- raise Exception("Shapes of beta0 and F do not match.")
-
- # Set attributes
- self.X = X
- self.y = y
- self.D = D
- self.ij = ij
- self.F = F
- self.X_mean, self.X_std = X_mean, X_std
- self.y_mean, self.y_std = y_mean, y_std
-
- # Determine Gaussian Process model parameters
- if self.thetaL is not None and self.thetaU is not None:
- # Maximum Likelihood Estimation of the parameters
- if self.verbose:
- print("Performing Maximum Likelihood Estimation of the "
- "autocorrelation parameters...")
- self.theta_, self.reduced_likelihood_function_value_, par = \
- self._arg_max_reduced_likelihood_function()
- if np.isinf(self.reduced_likelihood_function_value_):
- raise Exception("Bad parameter region. "
- "Try increasing upper bound")
-
- else:
- # Given parameters
- if self.verbose:
- print("Given autocorrelation parameters. "
- "Computing Gaussian Process model parameters...")
- self.theta_ = self.theta0
- self.reduced_likelihood_function_value_, par = \
- self.reduced_likelihood_function()
- if np.isinf(self.reduced_likelihood_function_value_):
- raise Exception("Bad point. Try increasing theta0.")
-
- self.beta = par['beta']
- self.gamma = par['gamma']
- self.sigma2 = par['sigma2']
- self.C = par['C']
- self.Ft = par['Ft']
- self.G = par['G']
-
- if self.storage_mode == 'light':
- # Delete heavy data (it will be computed again if required)
- # (it is required only when MSE is wanted in self.predict)
- if self.verbose:
- print("Light storage mode specified. "
- "Flushing autocorrelation matrix...")
- self.D = None
- self.ij = None
- self.F = None
- self.C = None
- self.Ft = None
- self.G = None
-
- return self
-
- def predict(self, X, eval_MSE=False, batch_size=None):
- """
- This function evaluates the Gaussian Process model at x.
-
- Parameters
- ----------
- X : array_like
- An array with shape (n_eval, n_features) giving the point(s) at
- which the prediction(s) should be made.
-
- eval_MSE : boolean, optional
- A boolean specifying whether the Mean Squared Error should be
- evaluated or not.
- Default assumes evalMSE = False and evaluates only the BLUP (mean
- prediction).
-
- batch_size : integer, optional
- An integer giving the maximum number of points that can be
- evaluated simultaneously (depending on the available memory).
- Default is None so that all given points are evaluated at the same
- time.
-
- Returns
- -------
- y : array_like, shape (n_samples, ) or (n_samples, n_targets)
- An array with shape (n_eval, ) if the Gaussian Process was trained
- on an array of shape (n_samples, ) or an array with shape
- (n_eval, n_targets) if the Gaussian Process was trained on an array
- of shape (n_samples, n_targets) with the Best Linear Unbiased
- Prediction at x.
-
- MSE : array_like, optional (if eval_MSE == True)
- An array with shape (n_eval, ) or (n_eval, n_targets) as with y,
- with the Mean Squared Error at x.
- """
- check_is_fitted(self, "X")
-
- # Check input shapes
- X = check_array(X)
- n_eval, _ = X.shape
- n_samples, n_features = self.X.shape
- n_samples_y, n_targets = self.y.shape
-
- # Run input checks
- self._check_params(n_samples)
-
- if X.shape[1] != n_features:
- raise ValueError(("The number of features in X (X.shape[1] = %d) "
- "should match the number of features used "
- "for fit() "
- "which is %d.") % (X.shape[1], n_features))
-
- if batch_size is None:
- # No memory management
- # (evaluates all given points in a single batch run)
-
- # Normalize input
- X = (X - self.X_mean) / self.X_std
-
- # Initialize output
- y = np.zeros(n_eval)
- if eval_MSE:
- MSE = np.zeros(n_eval)
-
- # Get pairwise componentwise L1-distances to the input training set
- dx = manhattan_distances(X, Y=self.X, sum_over_features=False)
- # Get regression function and correlation
- f = self.regr(X)
- r = self.corr(self.theta_, dx).reshape(n_eval, n_samples)
-
- # Scaled predictor
- y_ = np.dot(f, self.beta) + np.dot(r, self.gamma)
-
- # Predictor
- y = (self.y_mean + self.y_std * y_).reshape(n_eval, n_targets)
-
- if self.y_ndim_ == 1:
- y = y.ravel()
-
- # Mean Squared Error
- if eval_MSE:
- C = self.C
- if C is None:
- # Light storage mode (need to recompute C, F, Ft and G)
- if self.verbose:
- print("This GaussianProcess used 'light' storage mode "
- "at instantiation. Need to recompute "
- "autocorrelation matrix...")
- reduced_likelihood_function_value, par = \
- self.reduced_likelihood_function()
- self.C = par['C']
- self.Ft = par['Ft']
- self.G = par['G']
-
- rt = linalg.solve_triangular(self.C, r.T, lower=True)
-
- if self.beta0 is None:
- # Universal Kriging
- u = linalg.solve_triangular(self.G.T,
- np.dot(self.Ft.T, rt) - f.T,
- lower=True)
- else:
- # Ordinary Kriging
- u = np.zeros((n_targets, n_eval))
-
- MSE = np.dot(self.sigma2.reshape(n_targets, 1),
- (1. - (rt ** 2.).sum(axis=0)
- + (u ** 2.).sum(axis=0))[np.newaxis, :])
- MSE = np.sqrt((MSE ** 2.).sum(axis=0) / n_targets)
-
- # Mean Squared Error might be slightly negative depending on
- # machine precision: force to zero!
- MSE[MSE < 0.] = 0.
-
- if self.y_ndim_ == 1:
- MSE = MSE.ravel()
-
- return y, MSE
-
- else:
-
- return y
-
- else:
- # Memory management
-
- if type(batch_size) is not int or batch_size <= 0:
- raise Exception("batch_size must be a positive integer")
-
- if eval_MSE:
-
- y, MSE = np.zeros(n_eval), np.zeros(n_eval)
- for k in range(max(1, n_eval / batch_size)):
- batch_from = k * batch_size
- batch_to = min([(k + 1) * batch_size + 1, n_eval + 1])
- y[batch_from:batch_to], MSE[batch_from:batch_to] = \
- self.predict(X[batch_from:batch_to],
- eval_MSE=eval_MSE, batch_size=None)
-
- return y, MSE
-
- else:
-
- y = np.zeros(n_eval)
- for k in range(max(1, n_eval / batch_size)):
- batch_from = k * batch_size
- batch_to = min([(k + 1) * batch_size + 1, n_eval + 1])
- y[batch_from:batch_to] = \
- self.predict(X[batch_from:batch_to],
- eval_MSE=eval_MSE, batch_size=None)
-
- return y
-
- def reduced_likelihood_function(self, theta=None):
- """
- This function determines the BLUP parameters and evaluates the reduced
- likelihood function for the given autocorrelation parameters theta.
-
- Maximizing this function wrt the autocorrelation parameters theta is
- equivalent to maximizing the likelihood of the assumed joint Gaussian
- distribution of the observations y evaluated onto the design of
- experiments X.
-
- Parameters
- ----------
- theta : array_like, optional
- An array containing the autocorrelation parameters at which the
- Gaussian Process model parameters should be determined.
- Default uses the built-in autocorrelation parameters
- (ie ``theta = self.theta_``).
-
- Returns
- -------
- reduced_likelihood_function_value : double
- The value of the reduced likelihood function associated to the
- given autocorrelation parameters theta.
-
- par : dict
- A dictionary containing the requested Gaussian Process model
- parameters:
-
- sigma2
- Gaussian Process variance.
- beta
- Generalized least-squares regression weights for
- Universal Kriging or given beta0 for Ordinary
- Kriging.
- gamma
- Gaussian Process weights.
- C
- Cholesky decomposition of the correlation matrix [R].
- Ft
- Solution of the linear equation system : [R] x Ft = F
- G
- QR decomposition of the matrix Ft.
- """
- check_is_fitted(self, "X")
-
- if theta is None:
- # Use built-in autocorrelation parameters
- theta = self.theta_
-
- # Initialize output
- reduced_likelihood_function_value = - np.inf
- par = {}
-
- # Retrieve data
- n_samples = self.X.shape[0]
- D = self.D
- ij = self.ij
- F = self.F
-
- if D is None:
- # Light storage mode (need to recompute D, ij and F)
- D, ij = l1_cross_distances(self.X)
- if (np.min(np.sum(D, axis=1)) == 0.
- and self.corr != correlation.pure_nugget):
- raise Exception("Multiple X are not allowed")
- F = self.regr(self.X)
-
- # Set up R
- r = self.corr(theta, D)
- R = np.eye(n_samples) * (1. + self.nugget)
- R[ij[:, 0], ij[:, 1]] = r
- R[ij[:, 1], ij[:, 0]] = r
-
- # Cholesky decomposition of R
- try:
- C = linalg.cholesky(R, lower=True)
- except linalg.LinAlgError:
- return reduced_likelihood_function_value, par
-
- # Get generalized least squares solution
- Ft = linalg.solve_triangular(C, F, lower=True)
- try:
- Q, G = linalg.qr(Ft, econ=True)
- except:
- #/usr/lib/python2.6/dist-packages/scipy/linalg/decomp.py:1177:
- # DeprecationWarning: qr econ argument will be removed after scipy
- # 0.7. The economy transform will then be available through the
- # mode='economic' argument.
- Q, G = linalg.qr(Ft, mode='economic')
-
- sv = linalg.svd(G, compute_uv=False)
- rcondG = sv[-1] / sv[0]
- if rcondG < 1e-10:
- # Check F
- sv = linalg.svd(F, compute_uv=False)
- condF = sv[0] / sv[-1]
- if condF > 1e15:
- raise Exception("F is too ill conditioned. Poor combination "
- "of regression model and observations.")
- else:
- # Ft is too ill conditioned, get out (try different theta)
- return reduced_likelihood_function_value, par
-
- Yt = linalg.solve_triangular(C, self.y, lower=True)
- if self.beta0 is None:
- # Universal Kriging
- beta = linalg.solve_triangular(G, np.dot(Q.T, Yt))
- else:
- # Ordinary Kriging
- beta = np.array(self.beta0)
-
- rho = Yt - np.dot(Ft, beta)
- sigma2 = (rho ** 2.).sum(axis=0) / n_samples
- # The determinant of R is equal to the squared product of the diagonal
- # elements of its Cholesky decomposition C
- detR = (np.diag(C) ** (2. / n_samples)).prod()
-
- # Compute/Organize output
- reduced_likelihood_function_value = - sigma2.sum() * detR
- par['sigma2'] = sigma2 * self.y_std ** 2.
- par['beta'] = beta
- par['gamma'] = linalg.solve_triangular(C.T, rho)
- par['C'] = C
- par['Ft'] = Ft
- par['G'] = G
-
- return reduced_likelihood_function_value, par
-
- def _arg_max_reduced_likelihood_function(self):
- """
- This function estimates the autocorrelation parameters theta as the
- maximizer of the reduced likelihood function.
- (Minimization of the opposite reduced likelihood function is used for
- convenience)
-
- Parameters
- ----------
- self : All parameters are stored in the Gaussian Process model object.
-
- Returns
- -------
- optimal_theta : array_like
- The best set of autocorrelation parameters (the sought maximizer of
- the reduced likelihood function).
-
- optimal_reduced_likelihood_function_value : double
- The optimal reduced likelihood function value.
-
- optimal_par : dict
- The BLUP parameters associated to thetaOpt.
- """
-
- # Initialize output
- best_optimal_theta = []
- best_optimal_rlf_value = []
- best_optimal_par = []
-
- if self.verbose:
- print("The chosen optimizer is: " + str(self.optimizer))
- if self.random_start > 1:
- print(str(self.random_start) + " random starts are required.")
-
- percent_completed = 0.
-
- # Force optimizer to fmin_cobyla if the model is meant to be isotropic
- if self.optimizer == 'Welch' and self.theta0.size == 1:
- self.optimizer = 'fmin_cobyla'
-
- if self.optimizer == 'fmin_cobyla':
-
- def minus_reduced_likelihood_function(log10t):
- return - self.reduced_likelihood_function(
- theta=10. ** log10t)[0]
-
- constraints = []
- for i in range(self.theta0.size):
- constraints.append(lambda log10t, i=i:
- log10t[i] - np.log10(self.thetaL[0, i]))
- constraints.append(lambda log10t, i=i:
- np.log10(self.thetaU[0, i]) - log10t[i])
-
- for k in range(self.random_start):
-
- if k == 0:
- # Use specified starting point as first guess
- theta0 = self.theta0
- else:
- # Generate a random starting point log10-uniformly
- # distributed between bounds
- log10theta0 = (np.log10(self.thetaL)
- + self.random_state.rand(*self.theta0.shape)
- * np.log10(self.thetaU / self.thetaL))
- theta0 = 10. ** log10theta0
-
- # Run Cobyla
- try:
- log10_optimal_theta = \
- optimize.fmin_cobyla(minus_reduced_likelihood_function,
- np.log10(theta0).ravel(), constraints,
- iprint=0)
- except ValueError as ve:
- print("Optimization failed. Try increasing the ``nugget``")
- raise ve
-
- optimal_theta = 10. ** log10_optimal_theta
- optimal_rlf_value, optimal_par = \
- self.reduced_likelihood_function(theta=optimal_theta)
-
- # Compare the new optimizer to the best previous one
- if k > 0:
- if optimal_rlf_value > best_optimal_rlf_value:
- best_optimal_rlf_value = optimal_rlf_value
- best_optimal_par = optimal_par
- best_optimal_theta = optimal_theta
- else:
- best_optimal_rlf_value = optimal_rlf_value
- best_optimal_par = optimal_par
- best_optimal_theta = optimal_theta
- if self.verbose and self.random_start > 1:
- if (20 * k) / self.random_start > percent_completed:
- percent_completed = (20 * k) / self.random_start
- print("%s completed" % (5 * percent_completed))
-
- optimal_rlf_value = best_optimal_rlf_value
- optimal_par = best_optimal_par
- optimal_theta = best_optimal_theta
-
- elif self.optimizer == 'Welch':
-
- # Backup of the given attributes
- theta0, thetaL, thetaU = self.theta0, self.thetaL, self.thetaU
- corr = self.corr
- verbose = self.verbose
-
- # This will iterate over fmin_cobyla optimizer
- self.optimizer = 'fmin_cobyla'
- self.verbose = False
-
- # Initialize under isotropy assumption
- if verbose:
- print("Initialize under isotropy assumption...")
- self.theta0 = check_array(self.theta0.min())
- self.thetaL = check_array(self.thetaL.min())
- self.thetaU = check_array(self.thetaU.max())
- theta_iso, optimal_rlf_value_iso, par_iso = \
- self._arg_max_reduced_likelihood_function()
- optimal_theta = theta_iso + np.zeros(theta0.shape)
-
- # Iterate over all dimensions of theta allowing for anisotropy
- if verbose:
- print("Now improving allowing for anisotropy...")
- for i in self.random_state.permutation(theta0.size):
- if verbose:
- print("Proceeding along dimension %d..." % (i + 1))
- self.theta0 = check_array(theta_iso)
- self.thetaL = check_array(thetaL[0, i])
- self.thetaU = check_array(thetaU[0, i])
-
- def corr_cut(t, d):
- return corr(check_array(np.hstack([optimal_theta[0][0:i],
- t[0],
- optimal_theta[0][(i +
- 1)::]])),
- d)
-
- self.corr = corr_cut
- optimal_theta[0, i], optimal_rlf_value, optimal_par = \
- self._arg_max_reduced_likelihood_function()
-
- # Restore the given attributes
- self.theta0, self.thetaL, self.thetaU = theta0, thetaL, thetaU
- self.corr = corr
- self.optimizer = 'Welch'
- self.verbose = verbose
-
- else:
-
- raise NotImplementedError("This optimizer ('%s') is not "
- "implemented yet. Please contribute!"
- % self.optimizer)
-
- return optimal_theta, optimal_rlf_value, optimal_par
-
- def _check_params(self, n_samples=None):
-
- # Check regression model
- if not callable(self.regr):
- if self.regr in self._regression_types:
- self.regr = self._regression_types[self.regr]
- else:
- raise ValueError("regr should be one of %s or callable, "
- "%s was given."
- % (self._regression_types.keys(), self.regr))
-
- # Check regression weights if given (Ordinary Kriging)
- if self.beta0 is not None:
- self.beta0 = np.atleast_2d(self.beta0)
- if self.beta0.shape[1] != 1:
- # Force to column vector
- self.beta0 = self.beta0.T
-
- # Check correlation model
- if not callable(self.corr):
- if self.corr in self._correlation_types:
- self.corr = self._correlation_types[self.corr]
- else:
- raise ValueError("corr should be one of %s or callable, "
- "%s was given."
- % (self._correlation_types.keys(), self.corr))
-
- # Check storage mode
- if self.storage_mode != 'full' and self.storage_mode != 'light':
- raise ValueError("Storage mode should either be 'full' or "
- "'light', %s was given." % self.storage_mode)
-
- # Check correlation parameters
- self.theta0 = np.atleast_2d(self.theta0)
- lth = self.theta0.size
-
- if self.thetaL is not None and self.thetaU is not None:
- self.thetaL = np.atleast_2d(self.thetaL)
- self.thetaU = np.atleast_2d(self.thetaU)
- if self.thetaL.size != lth or self.thetaU.size != lth:
- raise ValueError("theta0, thetaL and thetaU must have the "
- "same length.")
- if np.any(self.thetaL <= 0) or np.any(self.thetaU < self.thetaL):
- raise ValueError("The bounds must satisfy O < thetaL <= "
- "thetaU.")
-
- elif self.thetaL is None and self.thetaU is None:
- if np.any(self.theta0 <= 0):
- raise ValueError("theta0 must be strictly positive.")
-
- elif self.thetaL is None or self.thetaU is None:
- raise ValueError("thetaL and thetaU should either be both or "
- "neither specified.")
-
- # Force verbose type to bool
- self.verbose = bool(self.verbose)
-
- # Force normalize type to bool
- self.normalize = bool(self.normalize)
-
- # Check nugget value
- self.nugget = np.asarray(self.nugget)
- if np.any(self.nugget) < 0.:
- raise ValueError("nugget must be positive or zero.")
- if (n_samples is not None
- and self.nugget.shape not in [(), (n_samples,)]):
- raise ValueError("nugget must be either a scalar "
- "or array of length n_samples.")
-
- # Check optimizer
- if self.optimizer not in self._optimizer_types:
- raise ValueError("optimizer should be one of %s"
- % self._optimizer_types)
-
- # Force random_start type to int
- self.random_start = int(self.random_start)
diff --git a/mloop/localsklearn/gaussian_process/gpc.py b/mloop/localsklearn/gaussian_process/gpc.py
deleted file mode 100644
index 4823c3e..0000000
--- a/mloop/localsklearn/gaussian_process/gpc.py
+++ /dev/null
@@ -1,729 +0,0 @@
-"""Gaussian processes classification."""
-
-# Authors: Jan Hendrik Metzen
-#
-# License: BSD 3 clause
-
-import warnings
-from operator import itemgetter
-
-import numpy as np
-from scipy.linalg import cholesky, cho_solve, solve
-from scipy.optimize import fmin_l_bfgs_b
-from scipy.special import erf
-
-from ..base import BaseEstimator, ClassifierMixin, clone
-from .kernels \
- import RBF, CompoundKernel, ConstantKernel as C
-from ..utils.validation import check_X_y, check_is_fitted, check_array
-from ..utils import check_random_state
-from ..preprocessing import LabelEncoder
-from ..multiclass import OneVsRestClassifier, OneVsOneClassifier
-
-
-# Values required for approximating the logistic sigmoid by
-# error functions. coefs are obtained via:
-# x = np.array([0, 0.6, 2, 3.5, 4.5, np.inf])
-# b = logistic(x)
-# A = (erf(np.dot(x, self.lambdas)) + 1) / 2
-# coefs = lstsq(A, b)[0]
-LAMBDAS = np.array([0.41, 0.4, 0.37, 0.44, 0.39])[:, np.newaxis]
-COEFS = np.array([-1854.8214151, 3516.89893646, 221.29346712,
- 128.12323805, -2010.49422654])[:, np.newaxis]
-
-
-class _BinaryGaussianProcessClassifierLaplace(BaseEstimator):
- """Binary Gaussian process classification based on Laplace approximation.
-
- The implementation is based on Algorithm 3.1, 3.2, and 5.1 of
- ``Gaussian Processes for Machine Learning'' (GPML) by Rasmussen and
- Williams.
-
- Internally, the Laplace approximation is used for approximating the
- non-Gaussian posterior by a Gaussian.
-
- Currently, the implementation is restricted to using the logistic link
- function.
-
- Parameters
- ----------
- kernel : kernel object
- The kernel specifying the covariance function of the GP. If None is
- passed, the kernel "1.0 * RBF(1.0)" is used as default. Note that
- the kernel's hyperparameters are optimized during fitting.
-
- optimizer : string or callable, optional (default: "fmin_l_bfgs_b")
- Can either be one of the internally supported optimizers for optimizing
- the kernel's parameters, specified by a string, or an externally
- defined optimizer passed as a callable. If a callable is passed, it
- must have the signature::
-
- def optimizer(obj_func, initial_theta, bounds):
- # * 'obj_func' is the objective function to be maximized, which
- # takes the hyperparameters theta as parameter and an
- # optional flag eval_gradient, which determines if the
- # gradient is returned additionally to the function value
- # * 'initial_theta': the initial value for theta, which can be
- # used by local optimizers
- # * 'bounds': the bounds on the values of theta
- ....
- # Returned are the best found hyperparameters theta and
- # the corresponding value of the target function.
- return theta_opt, func_min
-
- Per default, the 'fmin_l_bfgs_b' algorithm from scipy.optimize
- is used. If None is passed, the kernel's parameters are kept fixed.
- Available internal optimizers are::
-
- 'fmin_l_bfgs_b'
-
- n_restarts_optimizer: int, optional (default: 0)
- The number of restarts of the optimizer for finding the kernel's
- parameters which maximize the log-marginal likelihood. The first run
- of the optimizer is performed from the kernel's initial parameters,
- the remaining ones (if any) from thetas sampled log-uniform randomly
- from the space of allowed theta-values. If greater than 0, all bounds
- must be finite. Note that n_restarts_optimizer=0 implies that one
- run is performed.
-
- max_iter_predict: int, optional (default: 100)
- The maximum number of iterations in Newton's method for approximating
- the posterior during predict. Smaller values will reduce computation
- time at the cost of worse results.
-
- warm_start : bool, optional (default: False)
- If warm-starts are enabled, the solution of the last Newton iteration
- on the Laplace approximation of the posterior mode is used as
- initialization for the next call of _posterior_mode(). This can speed
- up convergence when _posterior_mode is called several times on similar
- problems as in hyperparameter optimization.
-
- copy_X_train : bool, optional (default: True)
- If True, a persistent copy of the training data is stored in the
- object. Otherwise, just a reference to the training data is stored,
- which might cause predictions to change if the data is modified
- externally.
-
- random_state : integer or numpy.RandomState, optional
- The generator used to initialize the centers. If an integer is
- given, it fixes the seed. Defaults to the global numpy random
- number generator.
-
- Attributes
- ----------
- X_train_ : array-like, shape = (n_samples, n_features)
- Feature values in training data (also required for prediction)
-
- y_train_: array-like, shape = (n_samples,)
- Target values in training data (also required for prediction)
-
- classes_ : array-like, shape = (n_classes,)
- Unique class labels.
-
- kernel_: kernel object
- The kernel used for prediction. The structure of the kernel is the
- same as the one passed as parameter but with optimized hyperparameters
-
- L_: array-like, shape = (n_samples, n_samples)
- Lower-triangular Cholesky decomposition of the kernel in X_train_
-
- pi_: array-like, shape = (n_samples,)
- The probabilities of the positive class for the training points
- X_train_
-
- W_sr_: array-like, shape = (n_samples,)
- Square root of W, the Hessian of log-likelihood of the latent function
- values for the observed labels. Since W is diagonal, only the diagonal
- of sqrt(W) is stored.
-
- log_marginal_likelihood_value_: float
- The log-marginal-likelihood of ``self.kernel_.theta``
- """
- def __init__(self, kernel=None, optimizer="fmin_l_bfgs_b",
- n_restarts_optimizer=0, max_iter_predict=100,
- warm_start=False, copy_X_train=True, random_state=None):
- self.kernel = kernel
- self.optimizer = optimizer
- self.n_restarts_optimizer = n_restarts_optimizer
- self.max_iter_predict = max_iter_predict
- self.warm_start = warm_start
- self.copy_X_train = copy_X_train
- self.random_state = random_state
-
- def fit(self, X, y):
- """Fit Gaussian process classification model
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
- Training data
-
- y : array-like, shape = (n_samples,)
- Target values, must be binary
-
- Returns
- -------
- self : returns an instance of self.
- """
- if self.kernel is None: # Use an RBF kernel as default
- self.kernel_ = C(1.0, constant_value_bounds="fixed") \
- * RBF(1.0, length_scale_bounds="fixed")
- else:
- self.kernel_ = clone(self.kernel)
-
- self.rng = check_random_state(self.random_state)
-
- self.X_train_ = np.copy(X) if self.copy_X_train else X
-
- # Encode class labels and check that it is a binary classification
- # problem
- label_encoder = LabelEncoder()
- self.y_train_ = label_encoder.fit_transform(y)
- self.classes_ = label_encoder.classes_
- if self.classes_.size > 2:
- raise ValueError("%s supports only binary classification. "
- "y contains classes %s"
- % (self.__class__.__name__, self.classes_))
- elif self.classes_.size == 1:
- raise ValueError("{0:s} requires 2 classes.".format(
- self.__class__.__name__))
-
- if self.optimizer is not None and self.kernel_.n_dims > 0:
- # Choose hyperparameters based on maximizing the log-marginal
- # likelihood (potentially starting from several initial values)
- def obj_func(theta, eval_gradient=True):
- if eval_gradient:
- lml, grad = self.log_marginal_likelihood(
- theta, eval_gradient=True)
- return -lml, -grad
- else:
- return -self.log_marginal_likelihood(theta)
-
- # First optimize starting from theta specified in kernel
- optima = [self._constrained_optimization(obj_func,
- self.kernel_.theta,
- self.kernel_.bounds)]
-
- # Additional runs are performed from log-uniform chosen initial
- # theta
- if self.n_restarts_optimizer > 0:
- if not np.isfinite(self.kernel_.bounds).all():
- raise ValueError(
- "Multiple optimizer restarts (n_restarts_optimizer>0) "
- "requires that all bounds are finite.")
- bounds = self.kernel_.bounds
- for iteration in range(self.n_restarts_optimizer):
- theta_initial = np.exp(self.rng.uniform(bounds[:, 0],
- bounds[:, 1]))
- optima.append(
- self._constrained_optimization(obj_func, theta_initial,
- bounds))
- # Select result from run with minimal (negative) log-marginal
- # likelihood
- lml_values = list(map(itemgetter(1), optima))
- self.kernel_.theta = optima[np.argmin(lml_values)][0]
- self.log_marginal_likelihood_value_ = -np.min(lml_values)
- else:
- self.log_marginal_likelihood_value_ = \
- self.log_marginal_likelihood(self.kernel_.theta)
-
- # Precompute quantities required for predictions which are independent
- # of actual query points
- K = self.kernel_(self.X_train_)
-
- _, (self.pi_, self.W_sr_, self.L_, _, _) = \
- self._posterior_mode(K, return_temporaries=True)
-
- return self
-
- def predict(self, X):
- """Perform classification on an array of test vectors X.
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
-
- Returns
- -------
- C : array, shape = (n_samples,)
- Predicted target values for X, values are from ``classes_``
- """
- check_is_fitted(self, ["X_train_", "y_train_", "pi_", "W_sr_", "L_"])
-
- # As discussed on Section 3.4.2 of GPML, for making hard binary
- # decisions, it is enough to compute the MAP of the posterior and
- # pass it through the link function
- K_star = self.kernel_(self.X_train_, X) # K_star =k(x_star)
- f_star = K_star.T.dot(self.y_train_ - self.pi_) # Algorithm 3.2,Line 4
-
- return np.where(f_star > 0, self.classes_[1], self.classes_[0])
-
- def predict_proba(self, X):
- """Return probability estimates for the test vector X.
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
-
- Returns
- -------
- C : array-like, shape = (n_samples, n_classes)
- Returns the probability of the samples for each class in
- the model. The columns correspond to the classes in sorted
- order, as they appear in the attribute ``classes_``.
- """
- check_is_fitted(self, ["X_train_", "y_train_", "pi_", "W_sr_", "L_"])
-
- # Based on Algorithm 3.2 of GPML
- K_star = self.kernel_(self.X_train_, X) # K_star =k(x_star)
- f_star = K_star.T.dot(self.y_train_ - self.pi_) # Line 4
- v = solve(self.L_, self.W_sr_[:, np.newaxis] * K_star) # Line 5
- # Line 6 (compute np.diag(v.T.dot(v)) via einsum)
- var_f_star = self.kernel_.diag(X) - np.einsum("ij,ij->j", v, v)
-
- # Line 7:
- # Approximate \int log(z) * N(z | f_star, var_f_star)
- # Approximation is due to Williams & Barber, "Bayesian Classification
- # with Gaussian Processes", Appendix A: Approximate the logistic
- # sigmoid by a linear combination of 5 error functions.
- # For information on how this integral can be computed see
- # blitiri.blogspot.de/2012/11/gaussian-integral-of-error-function.html
- alpha = 1 / (2 * var_f_star)
- gamma = LAMBDAS * f_star
- integrals = np.sqrt(np.pi / alpha) \
- * erf(gamma * np.sqrt(alpha / (alpha + LAMBDAS**2))) \
- / (2 * np.sqrt(var_f_star * 2 * np.pi))
- pi_star = (COEFS * integrals).sum(axis=0) + .5 * COEFS.sum()
-
- return np.vstack((1 - pi_star, pi_star)).T
-
- def log_marginal_likelihood(self, theta=None, eval_gradient=False):
- """Returns log-marginal likelihood of theta for training data.
-
- Parameters
- ----------
- theta : array-like, shape = (n_kernel_params,) or None
- Kernel hyperparameters for which the log-marginal likelihood is
- evaluated. If None, the precomputed log_marginal_likelihood
- of ``self.kernel_.theta`` is returned.
-
- eval_gradient : bool, default: False
- If True, the gradient of the log-marginal likelihood with respect
- to the kernel hyperparameters at position theta is returned
- additionally. If True, theta must not be None.
-
- Returns
- -------
- log_likelihood : float
- Log-marginal likelihood of theta for training data.
-
- log_likelihood_gradient : array, shape = (n_kernel_params,), optional
- Gradient of the log-marginal likelihood with respect to the kernel
- hyperparameters at position theta.
- Only returned when eval_gradient is True.
- """
- if theta is None:
- if eval_gradient:
- raise ValueError(
- "Gradient can only be evaluated for theta!=None")
- return self.log_marginal_likelihood_value_
-
- kernel = self.kernel_.clone_with_theta(theta)
-
- if eval_gradient:
- K, K_gradient = kernel(self.X_train_, eval_gradient=True)
- else:
- K = kernel(self.X_train_)
-
- # Compute log-marginal-likelihood Z and also store some temporaries
- # which can be reused for computing Z's gradient
- Z, (pi, W_sr, L, b, a) = \
- self._posterior_mode(K, return_temporaries=True)
-
- if not eval_gradient:
- return Z
-
- # Compute gradient based on Algorithm 5.1 of GPML
- d_Z = np.empty(theta.shape[0])
- # XXX: Get rid of the np.diag() in the next line
- R = W_sr[:, np.newaxis] * cho_solve((L, True), np.diag(W_sr)) # Line 7
- C = solve(L, W_sr[:, np.newaxis] * K) # Line 8
- # Line 9: (use einsum to compute np.diag(C.T.dot(C))))
- s_2 = -0.5 * (np.diag(K) - np.einsum('ij, ij -> j', C, C)) \
- * (pi * (1 - pi) * (1 - 2 * pi)) # third derivative
-
- for j in range(d_Z.shape[0]):
- C = K_gradient[:, :, j] # Line 11
- # Line 12: (R.T.ravel().dot(C.ravel()) = np.trace(R.dot(C)))
- s_1 = .5 * a.T.dot(C).dot(a) - .5 * R.T.ravel().dot(C.ravel())
-
- b = C.dot(self.y_train_ - pi) # Line 13
- s_3 = b - K.dot(R.dot(b)) # Line 14
-
- d_Z[j] = s_1 + s_2.T.dot(s_3) # Line 15
-
- return Z, d_Z
-
- def _posterior_mode(self, K, return_temporaries=False):
- """Mode-finding for binary Laplace GPC and fixed kernel.
-
- This approximates the posterior of the latent function values for given
- inputs and target observations with a Gaussian approximation and uses
- Newton's iteration to find the mode of this approximation.
- """
- # Based on Algorithm 3.1 of GPML
-
- # If warm_start are enabled, we reuse the last solution for the
- # posterior mode as initialization; otherwise, we initialize with 0
- if self.warm_start and hasattr(self, "f_cached") \
- and self.f_cached.shape == self.y_train_.shape:
- f = self.f_cached
- else:
- f = np.zeros_like(self.y_train_, dtype=np.float64)
-
- # Use Newton's iteration method to find mode of Laplace approximation
- log_marginal_likelihood = -np.inf
- for _ in range(self.max_iter_predict):
- # Line 4
- pi = 1 / (1 + np.exp(-f))
- W = pi * (1 - pi)
- # Line 5
- W_sr = np.sqrt(W)
- W_sr_K = W_sr[:, np.newaxis] * K
- B = np.eye(W.shape[0]) + W_sr_K * W_sr
- L = cholesky(B, lower=True)
- # Line 6
- b = W * f + (self.y_train_ - pi)
- # Line 7
- a = b - W_sr * cho_solve((L, True), W_sr_K.dot(b))
- # Line 8
- f = K.dot(a)
-
- # Line 10: Compute log marginal likelihood in loop and use as
- # convergence criterion
- lml = -0.5 * a.T.dot(f) \
- - np.log(1 + np.exp(-(self.y_train_ * 2 - 1) * f)).sum() \
- - np.log(np.diag(L)).sum()
- # Check if we have converged (log marginal likelihood does
- # not decrease)
- # XXX: more complex convergence criterion
- if lml - log_marginal_likelihood < 1e-10:
- break
- log_marginal_likelihood = lml
-
- self.f_cached = f # Remember solution for later warm-starts
- if return_temporaries:
- return log_marginal_likelihood, (pi, W_sr, L, b, a)
- else:
- return log_marginal_likelihood
-
- def _constrained_optimization(self, obj_func, initial_theta, bounds):
- if self.optimizer == "fmin_l_bfgs_b":
- theta_opt, func_min, convergence_dict = \
- fmin_l_bfgs_b(obj_func, initial_theta, bounds=bounds)
- if convergence_dict["warnflag"] != 0:
- warnings.warn("fmin_l_bfgs_b terminated abnormally with the "
- " state: %s" % convergence_dict)
- elif callable(self.optimizer):
- theta_opt, func_min = \
- self.optimizer(obj_func, initial_theta, bounds=bounds)
- else:
- raise ValueError("Unknown optimizer %s." % self.optimizer)
-
- return theta_opt, func_min
-
-
-class GaussianProcessClassifier(BaseEstimator, ClassifierMixin):
- """Gaussian process classification (GPC) based on Laplace approximation.
-
- The implementation is based on Algorithm 3.1, 3.2, and 5.1 of
- Gaussian Processes for Machine Learning (GPML) by Rasmussen and
- Williams.
-
- Internally, the Laplace approximation is used for approximating the
- non-Gaussian posterior by a Gaussian.
-
- Currently, the implementation is restricted to using the logistic link
- function. For multi-class classification, several binary one-versus rest
- classifiers are fitted. Note that this class thus does not implement
- a true multi-class Laplace approximation.
-
- Parameters
- ----------
- kernel : kernel object
- The kernel specifying the covariance function of the GP. If None is
- passed, the kernel "1.0 * RBF(1.0)" is used as default. Note that
- the kernel's hyperparameters are optimized during fitting.
-
- optimizer : string or callable, optional (default: "fmin_l_bfgs_b")
- Can either be one of the internally supported optimizers for optimizing
- the kernel's parameters, specified by a string, or an externally
- defined optimizer passed as a callable. If a callable is passed, it
- must have the signature::
-
- def optimizer(obj_func, initial_theta, bounds):
- # * 'obj_func' is the objective function to be maximized, which
- # takes the hyperparameters theta as parameter and an
- # optional flag eval_gradient, which determines if the
- # gradient is returned additionally to the function value
- # * 'initial_theta': the initial value for theta, which can be
- # used by local optimizers
- # * 'bounds': the bounds on the values of theta
- ....
- # Returned are the best found hyperparameters theta and
- # the corresponding value of the target function.
- return theta_opt, func_min
-
- Per default, the 'fmin_l_bfgs_b' algorithm from scipy.optimize
- is used. If None is passed, the kernel's parameters are kept fixed.
- Available internal optimizers are::
-
- 'fmin_l_bfgs_b'
-
- n_restarts_optimizer: int, optional (default: 0)
- The number of restarts of the optimizer for finding the kernel's
- parameters which maximize the log-marginal likelihood. The first run
- of the optimizer is performed from the kernel's initial parameters,
- the remaining ones (if any) from thetas sampled log-uniform randomly
- from the space of allowed theta-values. If greater than 0, all bounds
- must be finite. Note that n_restarts_optimizer=0 implies that one
- run is performed.
-
- max_iter_predict: int, optional (default: 100)
- The maximum number of iterations in Newton's method for approximating
- the posterior during predict. Smaller values will reduce computation
- time at the cost of worse results.
-
- warm_start : bool, optional (default: False)
- If warm-starts are enabled, the solution of the last Newton iteration
- on the Laplace approximation of the posterior mode is used as
- initialization for the next call of _posterior_mode(). This can speed
- up convergence when _posterior_mode is called several times on similar
- problems as in hyperparameter optimization.
-
- copy_X_train : bool, optional (default: True)
- If True, a persistent copy of the training data is stored in the
- object. Otherwise, just a reference to the training data is stored,
- which might cause predictions to change if the data is modified
- externally.
-
- random_state : integer or numpy.RandomState, optional
- The generator used to initialize the centers. If an integer is
- given, it fixes the seed. Defaults to the global numpy random
- number generator.
-
- multi_class: string, default: "one_vs_rest"
- Specifies how multi-class classification problems are handled.
- Supported are "one_vs_rest" and "one_vs_one". In "one_vs_rest",
- one binary Gaussian process classifier is fitted for each class, which
- is trained to separate this class from the rest. In "one_vs_one", one
- binary Gaussian process classifier is fitted for each pair of classes,
- which is trained to separate these two classes. The predictions of
- these binary predictors are combined into multi-class predictions.
- Note that "one_vs_one" does not support predicting probability
- estimates.
-
- n_jobs : int, optional, default: 1
- The number of jobs to use for the computation. If -1 all CPUs are used.
- If 1 is given, no parallel computing code is used at all, which is
- useful for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are
- used. Thus for n_jobs = -2, all CPUs but one are used.
-
- Attributes
- ----------
- kernel_ : kernel object
- The kernel used for prediction. In case of binary classification,
- the structure of the kernel is the same as the one passed as parameter
- but with optimized hyperparameters. In case of multi-class
- classification, a CompoundKernel is returned which consists of the
- different kernels used in the one-versus-rest classifiers.
-
- log_marginal_likelihood_value_: float
- The log-marginal-likelihood of ``self.kernel_.theta``
-
- classes_ : array-like, shape = (n_classes,)
- Unique class labels.
-
- n_classes_ : int
- The number of classes in the training data
- """
- def __init__(self, kernel=None, optimizer="fmin_l_bfgs_b",
- n_restarts_optimizer=0, max_iter_predict=100,
- warm_start=False, copy_X_train=True, random_state=None,
- multi_class="one_vs_rest", n_jobs=1):
- self.kernel = kernel
- self.optimizer = optimizer
- self.n_restarts_optimizer = n_restarts_optimizer
- self.max_iter_predict = max_iter_predict
- self.warm_start = warm_start
- self.copy_X_train = copy_X_train
- self.random_state = random_state
- self.multi_class = multi_class
- self.n_jobs = n_jobs
-
- def fit(self, X, y):
- """Fit Gaussian process classification model
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
- Training data
-
- y : array-like, shape = (n_samples,)
- Target values, must be binary
-
- Returns
- -------
- self : returns an instance of self.
- """
- X, y = check_X_y(X, y, multi_output=False)
-
- self.base_estimator_ = _BinaryGaussianProcessClassifierLaplace(
- self.kernel, self.optimizer, self.n_restarts_optimizer,
- self.max_iter_predict, self.warm_start, self.copy_X_train,
- self.random_state)
-
- self.classes_ = np.unique(y)
- self.n_classes_ = self.classes_.size
- if self.n_classes_ == 1:
- raise ValueError("GaussianProcessClassifier requires 2 or more "
- "distinct classes. Only class %s present."
- % self.classes_[0])
- if self.n_classes_ > 2:
- if self.multi_class == "one_vs_rest":
- self.base_estimator_ = \
- OneVsRestClassifier(self.base_estimator_,
- n_jobs=self.n_jobs)
- elif self.multi_class == "one_vs_one":
- self.base_estimator_ = \
- OneVsOneClassifier(self.base_estimator_,
- n_jobs=self.n_jobs)
- else:
- raise ValueError("Unknown multi-class mode %s"
- % self.multi_class)
-
- self.base_estimator_.fit(X, y)
-
- if self.n_classes_ > 2:
- self.log_marginal_likelihood_value_ = np.mean(
- [estimator.log_marginal_likelihood()
- for estimator in self.base_estimator_.estimators_])
- else:
- self.log_marginal_likelihood_value_ = \
- self.base_estimator_.log_marginal_likelihood()
-
- return self
-
- def predict(self, X):
- """Perform classification on an array of test vectors X.
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
-
- Returns
- -------
- C : array, shape = (n_samples,)
- Predicted target values for X, values are from ``classes_``
- """
- check_is_fitted(self, ["classes_", "n_classes_"])
- X = check_array(X)
- return self.base_estimator_.predict(X)
-
- def predict_proba(self, X):
- """Return probability estimates for the test vector X.
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
-
- Returns
- -------
- C : array-like, shape = (n_samples, n_classes)
- Returns the probability of the samples for each class in
- the model. The columns correspond to the classes in sorted
- order, as they appear in the attribute `classes_`.
- """
- check_is_fitted(self, ["classes_", "n_classes_"])
- if self.n_classes_ > 2 and self.multi_class == "one_vs_one":
- raise ValueError("one_vs_one multi-class mode does not support "
- "predicting probability estimates. Use "
- "one_vs_rest mode instead.")
- X = check_array(X)
- return self.base_estimator_.predict_proba(X)
-
- @property
- def kernel_(self):
- if self.n_classes_ == 2:
- return self.base_estimator_.kernel_
- else:
- return CompoundKernel(
- [estimator.kernel_
- for estimator in self.base_estimator_.estimators_])
-
- def log_marginal_likelihood(self, theta=None, eval_gradient=False):
- """Returns log-marginal likelihood of theta for training data.
-
- In the case of multi-class classification, the mean log-marginal
- likelihood of the one-versus-rest classifiers are returned.
-
- Parameters
- ----------
- theta : array-like, shape = (n_kernel_params,) or none
- Kernel hyperparameters for which the log-marginal likelihood is
- evaluated. In the case of multi-class classification, theta may
- be the hyperparameters of the compound kernel or of an individual
- kernel. In the latter case, all individual kernel get assigned the
- same theta values. If None, the precomputed log_marginal_likelihood
- of ``self.kernel_.theta`` is returned.
-
- eval_gradient : bool, default: False
- If True, the gradient of the log-marginal likelihood with respect
- to the kernel hyperparameters at position theta is returned
- additionally. Note that gradient computation is not supported
- for non-binary classification. If True, theta must not be None.
-
- Returns
- -------
- log_likelihood : float
- Log-marginal likelihood of theta for training data.
-
- log_likelihood_gradient : array, shape = (n_kernel_params,), optional
- Gradient of the log-marginal likelihood with respect to the kernel
- hyperparameters at position theta.
- Only returned when eval_gradient is True.
- """
- check_is_fitted(self, ["classes_", "n_classes_"])
-
- if theta is None:
- if eval_gradient:
- raise ValueError(
- "Gradient can only be evaluated for theta!=None")
- return self.log_marginal_likelihood_value_
-
- theta = np.asarray(theta)
- if self.n_classes_ == 2:
- return self.base_estimator_.log_marginal_likelihood(
- theta, eval_gradient)
- else:
- if eval_gradient:
- raise NotImplementedError(
- "Gradient of log-marginal-likelihood not implemented for "
- "multi-class GPC.")
- estimators = self.base_estimator_.estimators_
- n_dims = estimators[0].kernel_.n_dims
- if theta.shape[0] == n_dims: # use same theta for all sub-kernels
- return np.mean(
- [estimator.log_marginal_likelihood(theta)
- for i, estimator in enumerate(estimators)])
- elif theta.shape[0] == n_dims * self.classes_.shape[0]:
- # theta for compound kernel
- return np.mean(
- [estimator.log_marginal_likelihood(
- theta[n_dims * i:n_dims * (i + 1)])
- for i, estimator in enumerate(estimators)])
- else:
- raise ValueError("Shape of theta must be either %d or %d. "
- "Obtained theta with shape %d."
- % (n_dims, n_dims * self.classes_.shape[0],
- theta.shape[0]))
diff --git a/mloop/localsklearn/gaussian_process/gpr.py b/mloop/localsklearn/gaussian_process/gpr.py
deleted file mode 100644
index a765279..0000000
--- a/mloop/localsklearn/gaussian_process/gpr.py
+++ /dev/null
@@ -1,430 +0,0 @@
-"""Gaussian processes regression. """
-
-# Authors: Jan Hendrik Metzen
-#
-# License: BSD 3 clause
-
-import warnings
-from operator import itemgetter
-
-import numpy as np
-from scipy.linalg import cholesky, cho_solve, solve_triangular
-from scipy.optimize import fmin_l_bfgs_b
-
-from ..base import BaseEstimator, RegressorMixin, clone
-from .kernels import RBF, ConstantKernel as C
-from ..utils import check_random_state
-from ..utils.validation import check_X_y, check_array
-
-
-class GaussianProcessRegressor(BaseEstimator, RegressorMixin):
- """Gaussian process regression (GPR).
-
- The implementation is based on Algorithm 2.1 of Gaussian Processes
- for Machine Learning (GPML) by Rasmussen and Williams.
-
- In addition to standard sklearn estimator API, GaussianProcessRegressor:
-
- * allows prediction without prior fitting (based on the GP prior)
- * provides an additional method sample_y(X), which evaluates samples
- drawn from the GPR (prior or posterior) at given inputs
- * exposes a method log_marginal_likelihood(theta), which can be used
- externally for other ways of selecting hyperparameters, e.g., via
- Markov chain Monte Carlo.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- kernel : kernel object
- The kernel specifying the covariance function of the GP. If None is
- passed, the kernel "1.0 * RBF(1.0)" is used as default. Note that
- the kernel's hyperparameters are optimized during fitting.
-
- alpha : float or array-like, optional (default: 1e-10)
- Value added to the diagonal of the kernel matrix during fitting.
- Larger values correspond to increased noise level in the observations
- and reduce potential numerical issue during fitting. If an array is
- passed, it must have the same number of entries as the data used for
- fitting and is used as datapoint-dependent noise level. Note that this
- is equivalent to adding a WhiteKernel with c=alpha. Allowing to specify
- the noise level directly as a parameter is mainly for convenience and
- for consistency with Ridge.
-
- optimizer : string or callable, optional (default: "fmin_l_bfgs_b")
- Can either be one of the internally supported optimizers for optimizing
- the kernel's parameters, specified by a string, or an externally
- defined optimizer passed as a callable. If a callable is passed, it
- must have the signature::
-
- def optimizer(obj_func, initial_theta, bounds):
- # * 'obj_func' is the objective function to be maximized, which
- # takes the hyperparameters theta as parameter and an
- # optional flag eval_gradient, which determines if the
- # gradient is returned additionally to the function value
- # * 'initial_theta': the initial value for theta, which can be
- # used by local optimizers
- # * 'bounds': the bounds on the values of theta
- ....
- # Returned are the best found hyperparameters theta and
- # the corresponding value of the target function.
- return theta_opt, func_min
-
- Per default, the 'fmin_l_bfgs_b' algorithm from scipy.optimize
- is used. If None is passed, the kernel's parameters are kept fixed.
- Available internal optimizers are::
-
- 'fmin_l_bfgs_b'
-
- n_restarts_optimizer: int, optional (default: 0)
- The number of restarts of the optimizer for finding the kernel's
- parameters which maximize the log-marginal likelihood. The first run
- of the optimizer is performed from the kernel's initial parameters,
- the remaining ones (if any) from thetas sampled log-uniform randomly
- from the space of allowed theta-values. If greater than 0, all bounds
- must be finite. Note that n_restarts_optimizer == 0 implies that one
- run is performed.
-
- normalize_y: boolean, optional (default: False)
- Whether the target values y are normalized, i.e., the mean of the
- observed target values become zero. This parameter should be set to
- True if the target values' mean is expected to differ considerable from
- zero. When enabled, the normalization effectively modifies the GP's
- prior based on the data, which contradicts the likelihood principle;
- normalization is thus disabled per default.
-
- copy_X_train : bool, optional (default: True)
- If True, a persistent copy of the training data is stored in the
- object. Otherwise, just a reference to the training data is stored,
- which might cause predictions to change if the data is modified
- externally.
-
- random_state : integer or numpy.RandomState, optional
- The generator used to initialize the centers. If an integer is
- given, it fixes the seed. Defaults to the global numpy random
- number generator.
-
- Attributes
- ----------
- X_train_ : array-like, shape = (n_samples, n_features)
- Feature values in training data (also required for prediction)
-
- y_train_: array-like, shape = (n_samples, [n_output_dims])
- Target values in training data (also required for prediction)
-
- kernel_: kernel object
- The kernel used for prediction. The structure of the kernel is the
- same as the one passed as parameter but with optimized hyperparameters
-
- L_: array-like, shape = (n_samples, n_samples)
- Lower-triangular Cholesky decomposition of the kernel in ``X_train_``
-
- alpha_: array-like, shape = (n_samples,)
- Dual coefficients of training data points in kernel space
-
- log_marginal_likelihood_value_: float
- The log-marginal-likelihood of ``self.kernel_.theta``
- """
- def __init__(self, kernel=None, alpha=1e-10,
- optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0,
- normalize_y=False, copy_X_train=True, random_state=None):
- self.kernel = kernel
- self.alpha = alpha
- self.optimizer = optimizer
- self.n_restarts_optimizer = n_restarts_optimizer
- self.normalize_y = normalize_y
- self.copy_X_train = copy_X_train
- self.random_state = random_state
-
- def fit(self, X, y):
- """Fit Gaussian process regression model
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
- Training data
-
- y : array-like, shape = (n_samples, [n_output_dims])
- Target values
-
- Returns
- -------
- self : returns an instance of self.
- """
- if self.kernel is None: # Use an RBF kernel as default
- self.kernel_ = C(1.0, constant_value_bounds="fixed") \
- * RBF(1.0, length_scale_bounds="fixed")
- else:
- self.kernel_ = clone(self.kernel)
-
- self.rng = check_random_state(self.random_state)
-
- X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
-
- # Normalize target value
- if self.normalize_y:
- self.y_train_mean = np.mean(y, axis=0)
- # demean y
- y = y - self.y_train_mean
- else:
- self.y_train_mean = np.zeros(1)
-
- if np.iterable(self.alpha) \
- and self.alpha.shape[0] != y.shape[0]:
- if self.alpha.shape[0] == 1:
- self.alpha = self.alpha[0]
- else:
- raise ValueError("alpha must be a scalar or an array"
- " with same number of entries as y.(%d != %d)"
- % (self.alpha.shape[0], y.shape[0]))
-
- self.X_train_ = np.copy(X) if self.copy_X_train else X
- self.y_train_ = np.copy(y) if self.copy_X_train else y
-
- if self.optimizer is not None and self.kernel_.n_dims > 0:
- # Choose hyperparameters based on maximizing the log-marginal
- # likelihood (potentially starting from several initial values)
- def obj_func(theta, eval_gradient=True):
- if eval_gradient:
- lml, grad = self.log_marginal_likelihood(
- theta, eval_gradient=True)
- return -lml, -grad
- else:
- return -self.log_marginal_likelihood(theta)
-
- # First optimize starting from theta specified in kernel
- optima = [(self._constrained_optimization(obj_func,
- self.kernel_.theta,
- self.kernel_.bounds))]
-
- # Additional runs are performed from log-uniform chosen initial
- # theta
- if self.n_restarts_optimizer > 0:
- if not np.isfinite(self.kernel_.bounds).all():
- raise ValueError(
- "Multiple optimizer restarts (n_restarts_optimizer>0) "
- "requires that all bounds are finite.")
- bounds = self.kernel_.bounds
- for iteration in range(self.n_restarts_optimizer):
- theta_initial = \
- self.rng.uniform(bounds[:, 0], bounds[:, 1])
- optima.append(
- self._constrained_optimization(obj_func, theta_initial,
- bounds))
- # Select result from run with minimal (negative) log-marginal
- # likelihood
- lml_values = list(map(itemgetter(1), optima))
- self.kernel_.theta = optima[np.argmin(lml_values)][0]
- self.log_marginal_likelihood_value_ = -np.min(lml_values)
- else:
- self.log_marginal_likelihood_value_ = \
- self.log_marginal_likelihood(self.kernel_.theta)
-
- # Precompute quantities required for predictions which are independent
- # of actual query points
- K = self.kernel_(self.X_train_)
- K[np.diag_indices_from(K)] += self.alpha
- self.L_ = cholesky(K, lower=True) # Line 2
- self.alpha_ = cho_solve((self.L_, True), self.y_train_) # Line 3
-
- return self
-
- def predict(self, X, return_std=False, return_cov=False):
- """Predict using the Gaussian process regression model
-
- We can also predict based on an unfitted model by using the GP prior.
- In addition to the mean of the predictive distribution, also its
- standard deviation (return_std=True) or covariance (return_cov=True).
- Note that at most one of the two can be requested.
-
- Parameters
- ----------
- X : array-like, shape = (n_samples, n_features)
- Query points where the GP is evaluated
-
- return_std : bool, default: False
- If True, the standard-deviation of the predictive distribution at
- the query points is returned along with the mean.
-
- return_cov : bool, default: False
- If True, the covariance of the joint predictive distribution at
- the query points is returned along with the mean
-
- Returns
- -------
- y_mean : array, shape = (n_samples, [n_output_dims])
- Mean of predictive distribution a query points
-
- y_std : array, shape = (n_samples,), optional
- Standard deviation of predictive distribution at query points.
- Only returned when return_std is True.
-
- y_cov : array, shape = (n_samples, n_samples), optional
- Covariance of joint predictive distribution a query points.
- Only returned when return_cov is True.
- """
- if return_std and return_cov:
- raise RuntimeError(
- "Not returning standard deviation of predictions when "
- "returning full covariance.")
-
- X = check_array(X)
-
- if not hasattr(self, "X_train_"): # Unfitted;predict based on GP prior
- y_mean = np.zeros(X.shape[0])
- if return_cov:
- y_cov = self.kernel(X)
- return y_mean, y_cov
- elif return_std:
- y_var = self.kernel.diag(X)
- return y_mean, np.sqrt(y_var)
- else:
- return y_mean
- else: # Predict based on GP posterior
- K_trans = self.kernel_(X, self.X_train_)
- y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star)
- y_mean = self.y_train_mean + y_mean # undo normal.
- if return_cov:
- v = cho_solve((self.L_, True), K_trans.T) # Line 5
- y_cov = self.kernel_(X) - K_trans.dot(v) # Line 6
- return y_mean, y_cov
- elif return_std:
- # compute inverse K_inv of K based on its Cholesky
- # decomposition L and its inverse L_inv
- L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0]))
- K_inv = L_inv.dot(L_inv.T)
- # Compute variance of predictive distribution
- y_var = self.kernel_.diag(X)
- y_var -= np.einsum("ki,kj,ij->k", K_trans, K_trans, K_inv)
-
- # Check if any of the variances is negative because of
- # numerical issues. If yes: set the variance to 0.
- y_var_negative = y_var < 0
- if np.any(y_var_negative):
- warnings.warn("Predicted variances smaller than 0. "
- "Setting those variances to 0.")
- y_var[y_var_negative] = 0.0
- return y_mean, np.sqrt(y_var)
- else:
- return y_mean
-
- def sample_y(self, X, n_samples=1, random_state=0):
- """Draw samples from Gaussian process and evaluate at X.
-
- Parameters
- ----------
- X : array-like, shape = (n_samples_X, n_features)
- Query points where the GP samples are evaluated
-
- n_samples : int, default: 1
- The number of samples drawn from the Gaussian process
-
- random_state: RandomState or an int seed (0 by default)
- A random number generator instance
-
- Returns
- -------
- y_samples : array, shape = (n_samples_X, [n_output_dims], n_samples)
- Values of n_samples samples drawn from Gaussian process and
- evaluated at query points.
- """
- rng = check_random_state(random_state)
-
- y_mean, y_cov = self.predict(X, return_cov=True)
- if y_mean.ndim == 1:
- y_samples = rng.multivariate_normal(y_mean, y_cov, n_samples).T
- else:
- y_samples = \
- [rng.multivariate_normal(y_mean[:, i], y_cov,
- n_samples).T[:, np.newaxis]
- for i in range(y_mean.shape[1])]
- y_samples = np.hstack(y_samples)
- return y_samples
-
- def log_marginal_likelihood(self, theta=None, eval_gradient=False):
- """Returns log-marginal likelihood of theta for training data.
-
- Parameters
- ----------
- theta : array-like, shape = (n_kernel_params,) or None
- Kernel hyperparameters for which the log-marginal likelihood is
- evaluated. If None, the precomputed log_marginal_likelihood
- of ``self.kernel_.theta`` is returned.
-
- eval_gradient : bool, default: False
- If True, the gradient of the log-marginal likelihood with respect
- to the kernel hyperparameters at position theta is returned
- additionally. If True, theta must not be None.
-
- Returns
- -------
- log_likelihood : float
- Log-marginal likelihood of theta for training data.
-
- log_likelihood_gradient : array, shape = (n_kernel_params,), optional
- Gradient of the log-marginal likelihood with respect to the kernel
- hyperparameters at position theta.
- Only returned when eval_gradient is True.
- """
- if theta is None:
- if eval_gradient:
- raise ValueError(
- "Gradient can only be evaluated for theta!=None")
- return self.log_marginal_likelihood_value_
-
- kernel = self.kernel_.clone_with_theta(theta)
-
- if eval_gradient:
- K, K_gradient = kernel(self.X_train_, eval_gradient=True)
- else:
- K = kernel(self.X_train_)
-
- K[np.diag_indices_from(K)] += self.alpha
- try:
- L = cholesky(K, lower=True) # Line 2
- except np.linalg.LinAlgError:
- return (-np.inf, np.zeros_like(theta)) \
- if eval_gradient else -np.inf
-
- # Support multi-dimensional output of self.y_train_
- y_train = self.y_train_
- if y_train.ndim == 1:
- y_train = y_train[:, np.newaxis]
-
- alpha = cho_solve((L, True), y_train) # Line 3
-
- # Compute log-likelihood (compare line 7)
- log_likelihood_dims = -0.5 * np.einsum("ik,ik->k", y_train, alpha)
- log_likelihood_dims -= np.log(np.diag(L)).sum()
- log_likelihood_dims -= K.shape[0] / 2 * np.log(2 * np.pi)
- log_likelihood = log_likelihood_dims.sum(-1) # sum over dimensions
-
- if eval_gradient: # compare Equation 5.9 from GPML
- tmp = np.einsum("ik,jk->ijk", alpha, alpha) # k: output-dimension
- tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
- # Compute "0.5 * trace(tmp.dot(K_gradient))" without
- # constructing the full matrix tmp.dot(K_gradient) since only
- # its diagonal is required
- log_likelihood_gradient_dims = \
- 0.5 * np.einsum("ijl,ijk->kl", tmp, K_gradient)
- log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)
-
- if eval_gradient:
- return log_likelihood, log_likelihood_gradient
- else:
- return log_likelihood
-
- def _constrained_optimization(self, obj_func, initial_theta, bounds):
- if self.optimizer == "fmin_l_bfgs_b":
- theta_opt, func_min, convergence_dict = \
- fmin_l_bfgs_b(obj_func, initial_theta, bounds=bounds)
- if convergence_dict["warnflag"] != 0:
- warnings.warn("fmin_l_bfgs_b terminated abnormally with the "
- " state: %s" % convergence_dict)
- elif callable(self.optimizer):
- theta_opt, func_min = \
- self.optimizer(obj_func, initial_theta, bounds=bounds)
- else:
- raise ValueError("Unknown optimizer %s." % self.optimizer)
-
- return theta_opt, func_min
diff --git a/mloop/localsklearn/gaussian_process/kernels.py b/mloop/localsklearn/gaussian_process/kernels.py
deleted file mode 100644
index 251bc10..0000000
--- a/mloop/localsklearn/gaussian_process/kernels.py
+++ /dev/null
@@ -1,1789 +0,0 @@
-"""Kernels for Gaussian process regression and classification.
-
-The kernels in this module allow kernel-engineering, i.e., they can be
-combined via the "+" and "*" operators or be exponentiated with a scalar
-via "**". These sum and product expressions can also contain scalar values,
-which are automatically converted to a constant kernel.
-
-All kernels allow (analytic) gradient-based hyperparameter optimization.
-The space of hyperparameters can be specified by giving lower und upper
-boundaries for the value of each hyperparameter (the search space is thus
-rectangular). Instead of specifying bounds, hyperparameters can also be
-declared to be "fixed", which causes these hyperparameters to be excluded from
-optimization.
-"""
-
-# Author: Jan Hendrik Metzen
-# License: BSD 3 clause
-
-# Note: this module is strongly inspired by the kernel module of the george
-# package.
-
-from abc import ABCMeta, abstractmethod
-from collections import namedtuple
-import math
-
-import numpy as np
-from scipy.special import kv, gamma
-from scipy.spatial.distance import pdist, cdist, squareform
-
-from ..metrics.pairwise import pairwise_kernels
-from ..externals import six
-from ..base import clone
-from sklearn.externals.funcsigs import signature
-
-
-class Hyperparameter(namedtuple('Hyperparameter',
- ('name', 'value_type', 'bounds',
- 'n_elements', 'fixed'))):
- """A kernel hyperparameter's specification in form of a namedtuple.
-
- Attributes
- ----------
- name : string
- The name of the hyperparameter. Note that a kernel using a
- hyperparameter with name "x" must have the attributes self.x and
- self.x_bounds
-
- value_type : string
- The type of the hyperparameter. Currently, only "numeric"
- hyperparameters are supported.
-
- bounds : pair of floats >= 0 or "fixed"
- The lower and upper bound on the parameter. If n_elements>1, a pair
- of 1d array with n_elements each may be given alternatively. If
- the string "fixed" is passed as bounds, the hyperparameter's value
- cannot be changed.
-
- n_elements : int, default=1
- The number of elements of the hyperparameter value. Defaults to 1,
- which corresponds to a scalar hyperparameter. n_elements > 1
- corresponds to a hyperparameter which is vector-valued,
- such as, e.g., anisotropic length-scales.
-
- fixed : bool, default: None
- Whether the value of this hyperparameter is fixed, i.e., cannot be
- changed during hyperparameter tuning. If None is passed, the "fixed" is
- derived based on the given bounds.
- """
- # A raw namedtuple is very memory efficient as it packs the attributes
- # in a struct to get rid of the __dict__ of attributes in particular it
- # does not copy the string for the keys on each instance.
- # By deriving a namedtuple class just to introduce the __init__ method we
- # would also reintroduce the __dict__ on the instance. By telling the
- # Python interpreter that this subclass uses static __slots__ instead of
- # dynamic attributes. Furthermore we don't need any additional slot in the
- # subclass so we set __slots__ to the empty tuple.
- __slots__ = ()
-
- def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None):
- if not isinstance(bounds, six.string_types) or bounds != "fixed":
- bounds = np.atleast_2d(bounds)
- if n_elements > 1: # vector-valued parameter
- if bounds.shape[0] == 1:
- bounds = np.repeat(bounds, n_elements, 0)
- elif bounds.shape[0] != n_elements:
- raise ValueError("Bounds on %s should have either 1 or "
- "%d dimensions. Given are %d"
- % (name, n_elements, bounds.shape[0]))
-
- if fixed is None:
- fixed = isinstance(bounds, six.string_types) and bounds == "fixed"
- return super(Hyperparameter, cls).__new__(
- cls, name, value_type, bounds, n_elements, fixed)
-
-
-class Kernel(six.with_metaclass(ABCMeta)):
- """Base class for all kernels."""
-
- def get_params(self, deep=True):
- """Get parameters of this kernel.
-
- Parameters
- ----------
- deep: boolean, optional
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
-
- Returns
- -------
- params : mapping of string to any
- Parameter names mapped to their values.
- """
- params = dict()
-
- # introspect the constructor arguments to find the model parameters
- # to represent
- cls = self.__class__
- init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
- init_sign = signature(init)
- args, varargs = [], []
- for parameter in init_sign.parameters.values():
- if (parameter.kind != parameter.VAR_KEYWORD and
- parameter.name != 'self'):
- args.append(parameter.name)
- if parameter.kind == parameter.VAR_POSITIONAL:
- varargs.append(parameter.name)
-
- if len(varargs) != 0:
- raise RuntimeError("scikit-learn kernels should always "
- "specify their parameters in the signature"
- " of their __init__ (no varargs)."
- " %s doesn't follow this convention."
- % (cls, ))
- for arg in args:
- params[arg] = getattr(self, arg, None)
- return params
-
- def set_params(self, **params):
- """Set the parameters of this kernel.
-
- The method works on simple kernels as well as on nested kernels.
- The latter have parameters of the form ``__``
- so that it's possible to update each component of a nested object.
-
- Returns
- -------
- self
- """
- if not params:
- # Simple optimisation to gain speed (inspect is slow)
- return self
- valid_params = self.get_params(deep=True)
- for key, value in six.iteritems(params):
- split = key.split('__', 1)
- if len(split) > 1:
- # nested objects case
- name, sub_name = split
- if name not in valid_params:
- raise ValueError('Invalid parameter %s for kernel %s. '
- 'Check the list of available parameters '
- 'with `kernel.get_params().keys()`.' %
- (name, self))
- sub_object = valid_params[name]
- sub_object.set_params(**{sub_name: value})
- else:
- # simple objects case
- if key not in valid_params:
- raise ValueError('Invalid parameter %s for kernel %s. '
- 'Check the list of available parameters '
- 'with `kernel.get_params().keys()`.' %
- (key, self.__class__.__name__))
- setattr(self, key, value)
- return self
-
- def clone_with_theta(self, theta):
- """Returns a clone of self with given hyperparameters theta. """
- cloned = clone(self)
- cloned.theta = theta
- return cloned
-
- @property
- def n_dims(self):
- """Returns the number of non-fixed hyperparameters of the kernel."""
- return self.theta.shape[0]
-
- @property
- def hyperparameters(self):
- """Returns a list of all hyperparameter specifications."""
- r = []
- for attr, value in sorted(self.__dict__.items()):
- if attr.startswith("hyperparameter_"):
- r.append(value)
- return r
-
- @property
- def theta(self):
- """Returns the (flattened, log-transformed) non-fixed hyperparameters.
-
- Note that theta are typically the log-transformed values of the
- kernel's hyperparameters as this representation of the search space
- is more amenable for hyperparameter search, as hyperparameters like
- length-scales naturally live on a log-scale.
-
- Returns
- -------
- theta : array, shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- theta = []
- for hyperparameter in self.hyperparameters:
- if not hyperparameter.fixed:
- theta.append(getattr(self, hyperparameter.name))
- if len(theta) > 0:
- return np.log(np.hstack(theta))
- else:
- return np.array([])
-
- @theta.setter
- def theta(self, theta):
- """Sets the (flattened, log-transformed) non-fixed hyperparameters.
-
- Parameters
- ----------
- theta : array, shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- i = 0
- for hyperparameter in self.hyperparameters:
- if hyperparameter.fixed:
- continue
- if hyperparameter.n_elements > 1:
- # vector-valued parameter
- setattr(self, hyperparameter.name,
- np.exp(theta[i:i + hyperparameter.n_elements]))
- i += hyperparameter.n_elements
- else:
- setattr(self, hyperparameter.name, np.exp(theta[i]))
- i += 1
-
- if i != len(theta):
- raise ValueError("theta has not the correct number of entries."
- " Should be %d; given are %d"
- % (i, len(theta)))
-
- @property
- def bounds(self):
- """Returns the log-transformed bounds on the theta.
-
- Returns
- -------
- bounds : array, shape (n_dims, 2)
- The log-transformed bounds on the kernel's hyperparameters theta
- """
- bounds = []
- for hyperparameter in self.hyperparameters:
- if not hyperparameter.fixed:
- bounds.append(hyperparameter.bounds)
- if len(bounds) > 0:
- return np.log(np.vstack(bounds))
- else:
- return np.array([])
-
- def __add__(self, b):
- if not isinstance(b, Kernel):
- return Sum(self, ConstantKernel(b))
- return Sum(self, b)
-
- def __radd__(self, b):
- if not isinstance(b, Kernel):
- return Sum(ConstantKernel(b), self)
- return Sum(b, self)
-
- def __mul__(self, b):
- if not isinstance(b, Kernel):
- return Product(self, ConstantKernel(b))
- return Product(self, b)
-
- def __rmul__(self, b):
- if not isinstance(b, Kernel):
- return Product(ConstantKernel(b), self)
- return Product(b, self)
-
- def __pow__(self, b):
- return Exponentiation(self, b)
-
- def __eq__(self, b):
- if type(self) != type(b):
- return False
- params_a = self.get_params()
- params_b = b.get_params()
- for key in set(list(params_a.keys()) + list(params_b.keys())):
- if np.any(params_a.get(key, None) != params_b.get(key, None)):
- return False
- return True
-
- def __repr__(self):
- return "{0}({1})".format(self.__class__.__name__,
- ", ".join(map("{0:.3g}".format, self.theta)))
-
- @abstractmethod
- def __call__(self, X, Y=None, eval_gradient=False):
- """Evaluate the kernel."""
-
- @abstractmethod
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
-
- @abstractmethod
- def is_stationary(self):
- """Returns whether the kernel is stationary. """
-
-
-class NormalizedKernelMixin(object):
- """Mixin for kernels which are normalized: k(X, X)=1."""
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return np.ones(X.shape[0])
-
-
-class StationaryKernelMixin(object):
- """Mixin for kernels which are stationary: k(X, Y)= f(X-Y)."""
-
- def is_stationary(self):
- """Returns whether the kernel is stationary. """
- return True
-
-
-class CompoundKernel(Kernel):
- """Kernel which is composed of a set of other kernels."""
-
- def __init__(self, kernels):
- self.kernels = kernels
-
- def get_params(self, deep=True):
- """Get parameters of this kernel.
-
- Parameters
- ----------
- deep: boolean, optional
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
-
- Returns
- -------
- params : mapping of string to any
- Parameter names mapped to their values.
- """
- return dict(kernels=self.kernels)
-
- @property
- def theta(self):
- """Returns the (flattened, log-transformed) non-fixed hyperparameters.
-
- Note that theta are typically the log-transformed values of the
- kernel's hyperparameters as this representation of the search space
- is more amenable for hyperparameter search, as hyperparameters like
- length-scales naturally live on a log-scale.
-
- Returns
- -------
- theta : array, shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- return np.hstack([kernel.theta for kernel in self.kernels])
-
- @theta.setter
- def theta(self, theta):
- """Sets the (flattened, log-transformed) non-fixed hyperparameters.
-
- Parameters
- ----------
- theta : array, shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- k_dims = self.k1.n_dims
- for i, kernel in enumerate(self.kernels):
- kernel.theta = theta[i * k_dims:(i + 1) * k_dims]
-
- @property
- def bounds(self):
- """Returns the log-transformed bounds on the theta.
-
- Returns
- -------
- bounds : array, shape (n_dims, 2)
- The log-transformed bounds on the kernel's hyperparameters theta
- """
- return np.vstack([kernel.bounds for kernel in self.kernels])
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Note that this compound kernel returns the results of all simple kernel
- stacked along an additional axis.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y, n_kernels)
- Kernel k(X, Y)
-
- K_gradient : array, shape (n_samples_X, n_samples_X, n_dims, n_kernels)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- if eval_gradient:
- K = []
- K_grad = []
- for kernel in self.kernels:
- K_single, K_grad_single = kernel(X, Y, eval_gradient)
- K.append(K_single)
- K_grad.append(K_grad_single[..., np.newaxis])
- return np.dstack(K), np.concatenate(K_grad, 3)
- else:
- return np.dstack([kernel(X, Y, eval_gradient)
- for kernel in self.kernels])
-
- def __eq__(self, b):
- if type(self) != type(b) or len(self.kernels) != len(b.kernels):
- return False
- return np.all([self.kernels[i] == b.kernels[i]
- for i in range(len(self.kernels))])
-
- def is_stationary(self):
- """Returns whether the kernel is stationary. """
- return np.all([kernel.is_stationary() for kernel in self.kernels])
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X, n_kernels)
- Diagonal of kernel k(X, X)
- """
- return np.vstack([kernel.diag(X) for kernel in self.kernels]).T
-
-
-class KernelOperator(Kernel):
- """Base class for all kernel operators. """
-
- def __init__(self, k1, k2):
- self.k1 = k1
- self.k2 = k2
-
- def get_params(self, deep=True):
- """Get parameters of this kernel.
-
- Parameters
- ----------
- deep: boolean, optional
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
-
- Returns
- -------
- params : mapping of string to any
- Parameter names mapped to their values.
- """
- params = dict(k1=self.k1, k2=self.k2)
- if deep:
- deep_items = self.k1.get_params().items()
- params.update(('k1__' + k, val) for k, val in deep_items)
- deep_items = self.k2.get_params().items()
- params.update(('k2__' + k, val) for k, val in deep_items)
-
- return params
-
- @property
- def hyperparameters(self):
- """Returns a list of all hyperparameter."""
- r = []
- for hyperparameter in self.k1.hyperparameters:
- r.append(Hyperparameter("k1__" + hyperparameter.name,
- hyperparameter.value_type,
- hyperparameter.bounds,
- hyperparameter.n_elements))
- for hyperparameter in self.k2.hyperparameters:
- r.append(Hyperparameter("k2__" + hyperparameter.name,
- hyperparameter.value_type,
- hyperparameter.bounds,
- hyperparameter.n_elements))
- return r
-
- @property
- def theta(self):
- """Returns the (flattened, log-transformed) non-fixed hyperparameters.
-
- Note that theta are typically the log-transformed values of the
- kernel's hyperparameters as this representation of the search space
- is more amenable for hyperparameter search, as hyperparameters like
- length-scales naturally live on a log-scale.
-
- Returns
- -------
- theta : array, shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- return np.append(self.k1.theta, self.k2.theta)
-
- @theta.setter
- def theta(self, theta):
- """Sets the (flattened, log-transformed) non-fixed hyperparameters.
-
- Parameters
- ----------
- theta : array, shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- k1_dims = self.k1.n_dims
- self.k1.theta = theta[:k1_dims]
- self.k2.theta = theta[k1_dims:]
-
- @property
- def bounds(self):
- """Returns the log-transformed bounds on the theta.
-
- Returns
- -------
- bounds : array, shape (n_dims, 2)
- The log-transformed bounds on the kernel's hyperparameters theta
- """
- if self.k1.bounds.size == 0:
- return self.k2.bounds
- if self.k2.bounds.size == 0:
- return self.k1.bounds
- return np.vstack((self.k1.bounds, self.k2.bounds))
-
- def __eq__(self, b):
- if type(self) != type(b):
- return False
- return (self.k1 == b.k1 and self.k2 == b.k2) \
- or (self.k1 == b.k2 and self.k2 == b.k1)
-
- def is_stationary(self):
- """Returns whether the kernel is stationary. """
- return self.k1.is_stationary() and self.k2.is_stationary()
-
-
-class Sum(KernelOperator):
- """Sum-kernel k1 + k2 of two kernels k1 and k2.
-
- The resulting kernel is defined as
- k_sum(X, Y) = k1(X, Y) + k2(X, Y)
-
- Parameters
- ----------
- k1 : Kernel object
- The first base-kernel of the sum-kernel
-
- k2 : Kernel object
- The second base-kernel of the sum-kernel
- """
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- if eval_gradient:
- K1, K1_gradient = self.k1(X, Y, eval_gradient=True)
- K2, K2_gradient = self.k2(X, Y, eval_gradient=True)
- return K1 + K2, np.dstack((K1_gradient, K2_gradient))
- else:
- return self.k1(X, Y) + self.k2(X, Y)
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return self.k1.diag(X) + self.k2.diag(X)
-
- def __repr__(self):
- return "{0} + {1}".format(self.k1, self.k2)
-
-
-class Product(KernelOperator):
- """Product-kernel k1 * k2 of two kernels k1 and k2.
-
- The resulting kernel is defined as
- k_prod(X, Y) = k1(X, Y) * k2(X, Y)
-
- Parameters
- ----------
- k1 : Kernel object
- The first base-kernel of the product-kernel
-
- k2 : Kernel object
- The second base-kernel of the product-kernel
- """
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- if eval_gradient:
- K1, K1_gradient = self.k1(X, Y, eval_gradient=True)
- K2, K2_gradient = self.k2(X, Y, eval_gradient=True)
- return K1 * K2, np.dstack((K1_gradient * K2[:, :, np.newaxis],
- K2_gradient * K1[:, :, np.newaxis]))
- else:
- return self.k1(X, Y) * self.k2(X, Y)
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return self.k1.diag(X) * self.k2.diag(X)
-
- def __repr__(self):
- return "{0} * {1}".format(self.k1, self.k2)
-
-
-class Exponentiation(Kernel):
- """Exponentiate kernel by given exponent.
-
- The resulting kernel is defined as
- k_exp(X, Y) = k(X, Y) ** exponent
-
- Parameters
- ----------
- kernel : Kernel object
- The base kernel
-
- exponent : float
- The exponent for the base kernel
-
- """
- def __init__(self, kernel, exponent):
- self.kernel = kernel
- self.exponent = exponent
-
- def get_params(self, deep=True):
- """Get parameters of this kernel.
-
- Parameters
- ----------
- deep: boolean, optional
- If True, will return the parameters for this estimator and
- contained subobjects that are estimators.
-
- Returns
- -------
- params : mapping of string to any
- Parameter names mapped to their values.
- """
- params = dict(kernel=self.kernel, exponent=self.exponent)
- if deep:
- deep_items = self.kernel.get_params().items()
- params.update(('kernel__' + k, val) for k, val in deep_items)
- return params
-
- @property
- def hyperparameters(self):
- """Returns a list of all hyperparameter."""
- r = []
- for hyperparameter in self.kernel.hyperparameters:
- r.append(Hyperparameter("kernel__" + hyperparameter.name,
- hyperparameter.value_type,
- hyperparameter.bounds,
- hyperparameter.n_elements))
- return r
-
- @property
- def theta(self):
- """Returns the (flattened, log-transformed) non-fixed hyperparameters.
-
- Note that theta are typically the log-transformed values of the
- kernel's hyperparameters as this representation of the search space
- is more amenable for hyperparameter search, as hyperparameters like
- length-scales naturally live on a log-scale.
-
- Returns
- -------
- theta : array, shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- return self.kernel.theta
-
- @theta.setter
- def theta(self, theta):
- """Sets the (flattened, log-transformed) non-fixed hyperparameters.
-
- Parameters
- ----------
- theta : array, shape (n_dims,)
- The non-fixed, log-transformed hyperparameters of the kernel
- """
- self.kernel.theta = theta
-
- @property
- def bounds(self):
- """Returns the log-transformed bounds on the theta.
-
- Returns
- -------
- bounds : array, shape (n_dims, 2)
- The log-transformed bounds on the kernel's hyperparameters theta
- """
- return self.kernel.bounds
-
- def __eq__(self, b):
- if type(self) != type(b):
- return False
- return (self.kernel == b.kernel and self.exponent == b.exponent)
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- if eval_gradient:
- K, K_gradient = self.kernel(X, Y, eval_gradient=True)
- K_gradient *= \
- self.exponent * K[:, :, np.newaxis] ** (self.exponent - 1)
- return K ** self.exponent, K_gradient
- else:
- K = self.kernel(X, Y, eval_gradient=False)
- return K ** self.exponent
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return self.kernel.diag(X) ** self.exponent
-
- def __repr__(self):
- return "{0} ** {1}".format(self.kernel, self.exponent)
-
- def is_stationary(self):
- """Returns whether the kernel is stationary. """
- return self.kernel.is_stationary()
-
-
-class ConstantKernel(StationaryKernelMixin, Kernel):
- """Constant kernel.
-
- Can be used as part of a product-kernel where it scales the magnitude of
- the other factor (kernel) or as part of a sum-kernel, where it modifies
- the mean of the Gaussian process.
-
- k(x_1, x_2) = constant_value for all x_1, x_2
-
- Parameters
- ----------
- constant_value : float, default: 1.0
- The constant value which defines the covariance:
- k(x_1, x_2) = constant_value
-
- constant_value_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on constant_value
- """
- def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5)):
- self.constant_value = constant_value
- self.constant_value_bounds = constant_value_bounds
-
- self.hyperparameter_constant_value = \
- Hyperparameter("constant_value", "numeric", constant_value_bounds)
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined. Only supported when Y is None.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- X = np.atleast_2d(X)
- if Y is None:
- Y = X
- elif eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
-
- K = self.constant_value * np.ones((X.shape[0], Y.shape[0]))
- if eval_gradient:
- if not self.hyperparameter_constant_value.fixed:
- return (K, self.constant_value
- * np.ones((X.shape[0], X.shape[0], 1)))
- else:
- return K, np.empty((X.shape[0], X.shape[0], 0))
- else:
- return K
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return self.constant_value * np.ones(X.shape[0])
-
- def __repr__(self):
- return "{0:.3g}**2".format(np.sqrt(self.constant_value))
-
-
-class WhiteKernel(StationaryKernelMixin, Kernel):
- """White kernel.
-
- The main use-case of this kernel is as part of a sum-kernel where it
- explains the noise-component of the signal. Tuning its parameter
- corresponds to estimating the noise-level.
-
- k(x_1, x_2) = noise_level if x_1 == x_2 else 0
-
- Parameters
- ----------
- noise_level : float, default: 1.0
- Parameter controlling the noise level
-
- noise_level_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on noise_level
- """
- def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5)):
- self.noise_level = noise_level
- self.noise_level_bounds = noise_level_bounds
-
- self.hyperparameter_noise_level = \
- Hyperparameter("noise_level", "numeric", noise_level_bounds)
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined. Only supported when Y is None.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- X = np.atleast_2d(X)
- if Y is not None and eval_gradient:
- raise ValueError("Gradient can only be evaluated when Y is None.")
-
- if Y is None:
- K = self.noise_level * np.eye(X.shape[0])
- if eval_gradient:
- if not self.hyperparameter_noise_level.fixed:
- return (K, self.noise_level
- * np.eye(X.shape[0])[:, :, np.newaxis])
- else:
- return K, np.empty((X.shape[0], X.shape[0], 0))
- else:
- return K
- else:
- return np.zeros((X.shape[0], Y.shape[0]))
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return self.noise_level * np.ones(X.shape[0])
-
- def __repr__(self):
- return "{0}(noise_level={1:.3g})".format(self.__class__.__name__,
- self.noise_level)
-
-
-class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
- """Radial-basis function kernel (aka squared-exponential kernel).
-
- The RBF kernel is a stationary kernel. It is also known as the
- "squared exponential" kernel. It is parameterized by a length-scale
- parameter length_scale>0, which can either be a scalar (isotropic variant
- of the kernel) or a vector with the same number of dimensions as the inputs
- X (anisotropic variant of the kernel). The kernel is given by:
-
- k(x_i, x_j) = exp(-1 / 2 d(x_i / length_scale, x_j / length_scale)^2)
-
- This kernel is infinitely differentiable, which implies that GPs with this
- kernel as covariance function have mean square derivatives of all orders,
- and are thus very smooth.
-
- Parameters
- -----------
- length_scale : float or array with shape (n_features,), default: 1.0
- The length scale of the kernel. If a float, an isotropic kernel is
- used. If an array, an anisotropic kernel is used where each dimension
- of l defines the length-scale of the respective feature dimension.
-
- length_scale_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on length_scale
- """
- def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):
- if np.iterable(length_scale):
- if len(length_scale) > 1:
- self.anisotropic = True
- self.length_scale = np.asarray(length_scale, dtype=np.float)
- else:
- self.anisotropic = False
- self.length_scale = float(length_scale[0])
- else:
- self.anisotropic = False
- self.length_scale = float(length_scale)
- self.length_scale_bounds = length_scale_bounds
-
- if self.anisotropic: # anisotropic length_scale
- self.hyperparameter_length_scale = \
- Hyperparameter("length_scale", "numeric", length_scale_bounds,
- len(length_scale))
- else:
- self.hyperparameter_length_scale = \
- Hyperparameter("length_scale", "numeric", length_scale_bounds)
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined. Only supported when Y is None.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- X = np.atleast_2d(X)
- if self.anisotropic and X.shape[1] != self.length_scale.shape[0]:
- raise Exception("Anisotropic kernel must have the same number of "
- "dimensions as data (%d!=%d)"
- % (self.length_scale.shape[0], X.shape[1]))
-
- if Y is None:
- dists = pdist(X / self.length_scale, metric='sqeuclidean')
- K = np.exp(-.5 * dists)
- # convert from upper-triangular matrix to square matrix
- K = squareform(K)
- np.fill_diagonal(K, 1)
- else:
- if eval_gradient:
- raise ValueError(
- "Gradient can only be evaluated when Y is None.")
- dists = cdist(X / self.length_scale, Y / self.length_scale,
- metric='sqeuclidean')
- K = np.exp(-.5 * dists)
-
- if eval_gradient:
- if self.hyperparameter_length_scale.fixed:
- # Hyperparameter l kept fixed
- return K, np.empty((X.shape[0], X.shape[0], 0))
- elif not self.anisotropic or self.length_scale.shape[0] == 1:
- K_gradient = \
- (K * squareform(dists))[:, :, np.newaxis]
- return K, K_gradient
- elif self.anisotropic:
- # We need to recompute the pairwise dimension-wise distances
- K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \
- / (self.length_scale ** 2)
- K_gradient *= K[..., np.newaxis]
- return K, K_gradient
- else:
- raise Exception("Anisotropic kernels require that the number "
- "of length scales and features match.")
- else:
- return K
-
- def __repr__(self):
- if self.anisotropic:
- return "{0}(length_scale=[{1}])".format(
- self.__class__.__name__, ", ".join(map("{0:.3g}".format,
- self.length_scale)))
- else: # isotropic
- return "{0}(length_scale={1:.3g})".format(
- self.__class__.__name__, self.length_scale)
-
-
-class Matern(RBF):
- """ Matern kernel.
-
- The class of Matern kernels is a generalization of the RBF and the
- absolute exponential kernel parameterized by an additional parameter
- nu. The smaller nu, the less smooth the approximated function is.
- For nu=inf, the kernel becomes equivalent to the RBF kernel and for nu=0.5
- to the absolute exponential kernel. Important intermediate values are
- nu=1.5 (once differentiable functions) and nu=2.5 (twice differentiable
- functions).
-
- See Rasmussen and Williams 2006, pp84 for details regarding the
- different variants of the Matern kernel.
-
- Parameters
- -----------
- length_scale : float or array with shape (n_features,), default: 1.0
- The length scale of the kernel. If a float, an isotropic kernel is
- used. If an array, an anisotropic kernel is used where each dimension
- of l defines the length-scale of the respective feature dimension.
-
- length_scale_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on length_scale
-
- nu: float, default: 1.5
- The parameter nu controlling the smoothness of the learned function.
- The smaller nu, the less smooth the approximated function is.
- For nu=inf, the kernel becomes equivalent to the RBF kernel and for
- nu=0.5 to the absolute exponential kernel. Important intermediate
- values are nu=1.5 (once differentiable functions) and nu=2.5
- (twice differentiable functions). Note that values of nu not in
- [0.5, 1.5, 2.5, inf] incur a considerably higher computational cost
- (appr. 10 times higher) since they require to evaluate the modified
- Bessel function. Furthermore, in contrast to l, nu is kept fixed to
- its initial value and not optimized.
- """
- def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5),
- nu=1.5):
- super(Matern, self).__init__(length_scale, length_scale_bounds)
- self.nu = nu
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined. Only supported when Y is None.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- X = np.atleast_2d(X)
- if self.anisotropic and X.shape[1] != self.length_scale.shape[0]:
- raise Exception("Anisotropic kernel must have the same number of "
- "dimensions as data (%d!=%d)"
- % (self.length_scale.shape[0], X.shape[1]))
-
- if Y is None:
- dists = pdist(X / self.length_scale, metric='euclidean')
- else:
- if eval_gradient:
- raise ValueError(
- "Gradient can only be evaluated when Y is None.")
- dists = cdist(X / self.length_scale, Y / self.length_scale,
- metric='euclidean')
-
- if self.nu == 0.5:
- K = np.exp(-dists)
- elif self.nu == 1.5:
- K = dists * math.sqrt(3)
- K = (1. + K) * np.exp(-K)
- elif self.nu == 2.5:
- K = dists * math.sqrt(5)
- K = (1. + K + K ** 2 / 3.0) * np.exp(-K)
- else: # general case; expensive to evaluate
- K = dists
- K[K == 0.0] += np.finfo(float).eps # strict zeros result in nan
- tmp = (math.sqrt(2 * self.nu) * K)
- K.fill((2 ** (1. - self.nu)) / gamma(self.nu))
- K *= tmp ** self.nu
- K *= kv(self.nu, tmp)
-
- if Y is None:
- # convert from upper-triangular matrix to square matrix
- K = squareform(K)
- np.fill_diagonal(K, 1)
-
- if eval_gradient:
- if self.hyperparameter_length_scale.fixed:
- # Hyperparameter l kept fixed
- K_gradient = np.empty((X.shape[0], X.shape[0], 0))
- return K, K_gradient
-
- # We need to recompute the pairwise dimension-wise distances
- if self.anisotropic:
- D = (X[:, np.newaxis, :] - X[np.newaxis, :, :])**2 \
- / (self.length_scale ** 2)
- else:
- D = squareform(dists**2)[:, :, np.newaxis]
-
- if self.nu == 0.5:
- K_gradient = K[..., np.newaxis] * D \
- / np.sqrt(D.sum(2))[:, :, np.newaxis]
- K_gradient[~np.isfinite(K_gradient)] = 0
- elif self.nu == 1.5:
- K_gradient = \
- 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis]
- elif self.nu == 2.5:
- tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis]
- K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp)
- else:
- # approximate gradient numerically
- def f(theta): # helper function
- return self.clone_with_theta(theta)(X, Y)
- return K, _approx_fprime(self.theta, f, 1e-10)
-
- if not self.anisotropic:
- return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis]
- else:
- return K, K_gradient
- else:
- return K
-
- def __repr__(self):
- if self.anisotropic:
- return "{0}(length_scale=[{1}], nu={2:.3g})".format(
- self.__class__.__name__,
- ", ".join(map("{0:.3g}".format, self.length_scale)),
- self.nu)
- else: # isotropic
- return "{0}(length_scale={1:.3g}, nu={2:.3g})".format(
- self.__class__.__name__, self.length_scale, self.nu)
-
-
-class RationalQuadratic(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
- """Rational Quadratic kernel.
-
- The RationalQuadratic kernel can be seen as a scale mixture (an infinite
- sum) of RBF kernels with different characteristic length-scales. It is
- parameterized by a length-scale parameter length_scale>0 and a scale
- mixture parameter alpha>0. Only the isotropic variant where length_scale is
- a scalar is supported at the moment. The kernel given by:
-
- k(x_i, x_j) = (1 + d(x_i, x_j)^2 / (2*alpha * length_scale^2))^-alpha
-
- Parameters
- ----------
- length_scale : float > 0, default: 1.0
- The length scale of the kernel.
-
- alpha : float > 0, default: 1.0
- Scale mixture parameter
-
- length_scale_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on length_scale
-
- alpha_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on alpha
- """
- def __init__(self, length_scale=1.0, alpha=1.0,
- length_scale_bounds=(1e-5, 1e5), alpha_bounds=(1e-5, 1e5)):
- self.length_scale = length_scale
- self.alpha = alpha
- self.length_scale_bounds = length_scale_bounds
- self.alpha_bounds = alpha_bounds
-
- self.hyperparameter_length_scale = \
- Hyperparameter("length_scale", "numeric", length_scale_bounds)
- self.hyperparameter_alpha = \
- Hyperparameter("alpha", "numeric", alpha_bounds)
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined. Only supported when Y is None.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- X = np.atleast_2d(X)
- if Y is None:
- dists = squareform(pdist(X, metric='sqeuclidean'))
- tmp = dists / (2 * self.alpha * self.length_scale ** 2)
- base = (1 + tmp)
- K = base ** -self.alpha
- np.fill_diagonal(K, 1)
- else:
- if eval_gradient:
- raise ValueError(
- "Gradient can only be evaluated when Y is None.")
- dists = cdist(X, Y, metric='sqeuclidean')
- K = (1 + dists / (2 * self.alpha * self.length_scale ** 2)) \
- ** -self.alpha
-
- if eval_gradient:
- # gradient with respect to length_scale
- if not self.hyperparameter_length_scale.fixed:
- length_scale_gradient = \
- dists * K / (self.length_scale ** 2 * base)
- length_scale_gradient = length_scale_gradient[:, :, np.newaxis]
- else: # l is kept fixed
- length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
-
- # gradient with respect to alpha
- if not self.hyperparameter_alpha.fixed:
- alpha_gradient = \
- K * (-self.alpha * np.log(base)
- + dists / (2 * self.length_scale ** 2 * base))
- alpha_gradient = alpha_gradient[:, :, np.newaxis]
- else: # alpha is kept fixed
- alpha_gradient = np.empty((K.shape[0], K.shape[1], 0))
-
- return K, np.dstack((alpha_gradient, length_scale_gradient))
- else:
- return K
-
- def __repr__(self):
- return "{0}(alpha={1:.3g}, length_scale={2:.3g})".format(
- self.__class__.__name__, self.alpha, self.length_scale)
-
-
-class ExpSineSquared(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
- """Exp-Sine-Squared kernel.
-
- The ExpSineSquared kernel allows modeling periodic functions. It is
- parameterized by a length-scale parameter length_scale>0 and a periodicity
- parameter periodicity>0. Only the isotropic variant where l is a scalar is
- supported at the moment. The kernel given by:
-
- k(x_i, x_j) = exp(-2 sin(\pi / periodicity * d(x_i, x_j)) / length_scale)^2
-
- Parameters
- ----------
- length_scale : float > 0, default: 1.0
- The length scale of the kernel.
-
- periodicity : float > 0, default: 1.0
- The periodicity of the kernel.
-
- length_scale_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on length_scale
-
- periodicity_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on periodicity
- """
- def __init__(self, length_scale=1.0, periodicity=1.0,
- length_scale_bounds=(1e-5, 1e5),
- periodicity_bounds=(1e-5, 1e5)):
- self.length_scale = length_scale
- self.periodicity = periodicity
- self.length_scale_bounds = length_scale_bounds
- self.periodicity_bounds = periodicity_bounds
-
- self.hyperparameter_length_scale = \
- Hyperparameter("length_scale", "numeric", length_scale_bounds)
- self.hyperparameter_periodicity = \
- Hyperparameter("periodicity", "numeric", periodicity_bounds)
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined. Only supported when Y is None.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- X = np.atleast_2d(X)
- if Y is None:
- dists = squareform(pdist(X, metric='euclidean'))
- arg = np.pi * dists / self.periodicity
- sin_of_arg = np.sin(arg)
- K = np.exp(- 2 * (sin_of_arg / self.length_scale) ** 2)
- else:
- if eval_gradient:
- raise ValueError(
- "Gradient can only be evaluated when Y is None.")
- dists = cdist(X, Y, metric='euclidean')
- K = np.exp(- 2 * (np.sin(np.pi / self.periodicity * dists)
- / self.length_scale) ** 2)
-
- if eval_gradient:
- cos_of_arg = np.cos(arg)
- # gradient with respect to length_scale
- if not self.hyperparameter_length_scale.fixed:
- length_scale_gradient = \
- 4 / self.length_scale**2 * sin_of_arg**2 * K
- length_scale_gradient = length_scale_gradient[:, :, np.newaxis]
- else: # length_scale is kept fixed
- length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0))
- # gradient with respect to p
- if not self.hyperparameter_periodicity.fixed:
- periodicity_gradient = \
- 4 * arg / self.length_scale**2 * cos_of_arg \
- * sin_of_arg * K
- periodicity_gradient = periodicity_gradient[:, :, np.newaxis]
- else: # p is kept fixed
- periodicity_gradient = np.empty((K.shape[0], K.shape[1], 0))
-
- return K, np.dstack((length_scale_gradient, periodicity_gradient))
- else:
- return K
-
- def __repr__(self):
- return "{0}(length_scale={1:.3g}, periodicity={2:.3g})".format(
- self.__class__.__name__, self.length_scale, self.periodicity)
-
-
-class DotProduct(Kernel):
- """Dot-Product kernel.
-
- The DotProduct kernel is non-stationary and can be obtained from linear
- regression by putting N(0, 1) priors on the coefficients of x_d (d = 1, . .
- . , D) and a prior of N(0, \sigma_0^2) on the bias. The DotProduct kernel
- is invariant to a rotation of the coordinates about the origin, but not
- translations. It is parameterized by a parameter sigma_0^2. For
- sigma_0^2 =0, the kernel is called the homogeneous linear kernel, otherwise
- it is inhomogeneous. The kernel is given by
-
- k(x_i, x_j) = sigma_0 ^ 2 + x_i \cdot x_j
-
- The DotProduct kernel is commonly combined with exponentiation.
-
- Parameters
- ----------
- sigma_0 : float >= 0, default: 1.0
- Parameter controlling the inhomogenity of the kernel. If sigma_0=0,
- the kernel is homogenous.
-
- sigma_0_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on l
- """
-
- def __init__(self, sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)):
- self.sigma_0 = sigma_0
- self.sigma_0_bounds = sigma_0_bounds
-
- self.hyperparameter_sigma_0 = \
- Hyperparameter("sigma_0", "numeric", sigma_0_bounds)
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined. Only supported when Y is None.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- X = np.atleast_2d(X)
- if Y is None:
- K = np.inner(X, X) + self.sigma_0 ** 2
- else:
- if eval_gradient:
- raise ValueError(
- "Gradient can only be evaluated when Y is None.")
- K = np.inner(X, Y) + self.sigma_0 ** 2
-
- if eval_gradient:
- if not self.hyperparameter_sigma_0.fixed:
- K_gradient = np.empty((K.shape[0], K.shape[1], 1))
- K_gradient[..., 0] = 2 * self.sigma_0 ** 2
- return K, K_gradient
- else:
- return K, np.empty((X.shape[0], X.shape[0], 0))
- else:
- return K
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- return np.einsum('ij,ij->i', X, X) + self.sigma_0 ** 2
-
- def is_stationary(self):
- """Returns whether the kernel is stationary. """
- return False
-
- def __repr__(self):
- return "{0}(sigma_0={1:.3g})".format(
- self.__class__.__name__, self.sigma_0)
-
-
-# adapted from scipy/optimize/optimize.py for functions with 2d output
-def _approx_fprime(xk, f, epsilon, args=()):
- f0 = f(*((xk,) + args))
- grad = np.zeros((f0.shape[0], f0.shape[1], len(xk)), float)
- ei = np.zeros((len(xk), ), float)
- for k in range(len(xk)):
- ei[k] = 1.0
- d = epsilon * ei
- grad[:, :, k] = (f(*((xk + d,) + args)) - f0) / d[k]
- ei[k] = 0.0
- return grad
-
-
-class PairwiseKernel(Kernel):
- """Wrapper for kernels in sklearn.metrics.pairwise.
-
- A thin wrapper around the functionality of the kernels in
- sklearn.metrics.pairwise.
-
- Note: Evaluation of eval_gradient is not analytic but numeric and all
- kernels support only isotropic distances. The parameter gamma is
- considered to be a hyperparameter and may be optimized. The other
- kernel parameters are set directly at initialization and are kept
- fixed.
-
- Parameters
- ----------
- gamma: float >= 0, default: 1.0
- Parameter gamma of the pairwise kernel specified by metric
-
- gamma_bounds : pair of floats >= 0, default: (1e-5, 1e5)
- The lower and upper bound on gamma
-
- metric : string, or callable, default: "linear"
- The metric to use when calculating kernel between instances in a
- feature array. If metric is a string, it must be one of the metrics
- in pairwise.PAIRWISE_KERNEL_FUNCTIONS.
- If metric is "precomputed", X is assumed to be a kernel matrix.
- Alternatively, if metric is a callable function, it is called on each
- pair of instances (rows) and the resulting value recorded. The callable
- should take two arrays from X as input and return a value indicating
- the distance between them.
-
- pairwise_kernels_kwargs : dict, default: None
- All entries of this dict (if any) are passed as keyword arguments to
- the pairwise kernel function.
- """
-
- def __init__(self, gamma=1.0, gamma_bounds=(1e-5, 1e5), metric="linear",
- pairwise_kernels_kwargs=None):
- self.gamma = gamma
- self.gamma_bounds = gamma_bounds
-
- self.hyperparameter_gamma = \
- Hyperparameter("gamma", "numeric", gamma_bounds)
-
- self.metric = metric
- if pairwise_kernels_kwargs is not None:
- self.pairwise_kernels_kwargs = pairwise_kernels_kwargs
- else:
- self.pairwise_kernels_kwargs = {}
-
- def __call__(self, X, Y=None, eval_gradient=False):
- """Return the kernel k(X, Y) and optionally its gradient.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Y : array, shape (n_samples_Y, n_features), (optional, default=None)
- Right argument of the returned kernel k(X, Y). If None, k(X, X)
- if evaluated instead.
-
- eval_gradient : bool (optional, default=False)
- Determines whether the gradient with respect to the kernel
- hyperparameter is determined. Only supported when Y is None.
-
- Returns
- -------
- K : array, shape (n_samples_X, n_samples_Y)
- Kernel k(X, Y)
-
- K_gradient : array (opt.), shape (n_samples_X, n_samples_X, n_dims)
- The gradient of the kernel k(X, X) with respect to the
- hyperparameter of the kernel. Only returned when eval_gradient
- is True.
- """
- X = np.atleast_2d(X)
- K = pairwise_kernels(X, Y, metric=self.metric, gamma=self.gamma,
- filter_params=True,
- **self.pairwise_kernels_kwargs)
- if eval_gradient:
- if self.hyperparameter_gamma.fixed:
- return K, np.empty((X.shape[0], X.shape[0], 0))
- else:
- # approximate gradient numerically
- def f(gamma): # helper function
- return pairwise_kernels(
- X, Y, metric=self.metric, gamma=np.exp(gamma),
- filter_params=True, **self.pairwise_kernels_kwargs)
- return K, _approx_fprime(self.theta, f, 1e-10)
- else:
- return K
-
- def diag(self, X):
- """Returns the diagonal of the kernel k(X, X).
-
- The result of this method is identical to np.diag(self(X)); however,
- it can be evaluated more efficiently since only the diagonal is
- evaluated.
-
- Parameters
- ----------
- X : array, shape (n_samples_X, n_features)
- Left argument of the returned kernel k(X, Y)
-
- Returns
- -------
- K_diag : array, shape (n_samples_X,)
- Diagonal of kernel k(X, X)
- """
- # We have to fall back to slow way of computing diagonal
- return np.apply_along_axis(self, 1, X)[:, 0]
-
- def is_stationary(self):
- """Returns whether the kernel is stationary. """
- return self.metric in ["rbf"]
-
- def __repr__(self):
- return "{0}(gamma={1}, metric={2})".format(
- self.__class__.__name__, self.gamma, self.metric)
diff --git a/mloop/localsklearn/gaussian_process/regression_models.py b/mloop/localsklearn/gaussian_process/regression_models.py
deleted file mode 100644
index 041837e..0000000
--- a/mloop/localsklearn/gaussian_process/regression_models.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Author: Vincent Dubourg
-# (mostly translation, see implementation details)
-# License: BSD 3 clause
-
-"""
-The built-in regression models submodule for the gaussian_process module.
-"""
-
-
-import numpy as np
-
-
-def constant(x):
- """
- Zero order polynomial (constant, p = 1) regression model.
-
- x --> f(x) = 1
-
- Parameters
- ----------
- x : array_like
- An array with shape (n_eval, n_features) giving the locations x at
- which the regression model should be evaluated.
-
- Returns
- -------
- f : array_like
- An array with shape (n_eval, p) with the values of the regression
- model.
- """
- x = np.asarray(x, dtype=np.float64)
- n_eval = x.shape[0]
- f = np.ones([n_eval, 1])
- return f
-
-
-def linear(x):
- """
- First order polynomial (linear, p = n+1) regression model.
-
- x --> f(x) = [ 1, x_1, ..., x_n ].T
-
- Parameters
- ----------
- x : array_like
- An array with shape (n_eval, n_features) giving the locations x at
- which the regression model should be evaluated.
-
- Returns
- -------
- f : array_like
- An array with shape (n_eval, p) with the values of the regression
- model.
- """
- x = np.asarray(x, dtype=np.float64)
- n_eval = x.shape[0]
- f = np.hstack([np.ones([n_eval, 1]), x])
- return f
-
-
-def quadratic(x):
- """
- Second order polynomial (quadratic, p = n*(n-1)/2+n+1) regression model.
-
- x --> f(x) = [ 1, { x_i, i = 1,...,n }, { x_i * x_j, (i,j) = 1,...,n } ].T
- i > j
-
- Parameters
- ----------
- x : array_like
- An array with shape (n_eval, n_features) giving the locations x at
- which the regression model should be evaluated.
-
- Returns
- -------
- f : array_like
- An array with shape (n_eval, p) with the values of the regression
- model.
- """
-
- x = np.asarray(x, dtype=np.float64)
- n_eval, n_features = x.shape
- f = np.hstack([np.ones([n_eval, 1]), x])
- for k in range(n_features):
- f = np.hstack([f, x[:, k, np.newaxis] * x[:, k:]])
-
- return f
diff --git a/mloop/localsklearn/metrics/__init__.py b/mloop/localsklearn/metrics/__init__.py
deleted file mode 100644
index 4138319..0000000
--- a/mloop/localsklearn/metrics/__init__.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""
-The :mod:`sklearn.metrics` module includes score functions, performance metrics
-and pairwise metrics and distance computations.
-"""
-
-
-from .ranking import auc
-from .ranking import average_precision_score
-from .ranking import coverage_error
-from .ranking import label_ranking_average_precision_score
-from .ranking import label_ranking_loss
-from .ranking import precision_recall_curve
-from .ranking import roc_auc_score
-from .ranking import roc_curve
-
-from .classification import accuracy_score
-from .classification import classification_report
-from .classification import cohen_kappa_score
-from .classification import confusion_matrix
-from .classification import f1_score
-from .classification import fbeta_score
-from .classification import hamming_loss
-from .classification import hinge_loss
-from .classification import jaccard_similarity_score
-from .classification import log_loss
-from .classification import matthews_corrcoef
-from .classification import precision_recall_fscore_support
-from .classification import precision_score
-from .classification import recall_score
-from .classification import zero_one_loss
-from .classification import brier_score_loss
-
-from . import cluster
-from .cluster import adjusted_mutual_info_score
-from .cluster import adjusted_rand_score
-from .cluster import completeness_score
-from .cluster import consensus_score
-from .cluster import homogeneity_completeness_v_measure
-from .cluster import homogeneity_score
-from .cluster import mutual_info_score
-from .cluster import normalized_mutual_info_score
-from .cluster import fowlkes_mallows_score
-from .cluster import silhouette_samples
-from .cluster import silhouette_score
-from .cluster import calinski_harabaz_score
-from .cluster import v_measure_score
-
-from .pairwise import euclidean_distances
-from .pairwise import pairwise_distances
-from .pairwise import pairwise_distances_argmin
-from .pairwise import pairwise_distances_argmin_min
-from .pairwise import pairwise_kernels
-
-from .regression import explained_variance_score
-from .regression import mean_absolute_error
-from .regression import mean_squared_error
-from .regression import median_absolute_error
-from .regression import r2_score
-
-from .scorer import make_scorer
-from .scorer import SCORERS
-from .scorer import get_scorer
-
-__all__ = [
- 'accuracy_score',
- 'adjusted_mutual_info_score',
- 'adjusted_rand_score',
- 'auc',
- 'average_precision_score',
- 'classification_report',
- 'cluster',
- 'completeness_score',
- 'confusion_matrix',
- 'consensus_score',
- 'coverage_error',
- 'euclidean_distances',
- 'explained_variance_score',
- 'f1_score',
- 'fbeta_score',
- 'get_scorer',
- 'hamming_loss',
- 'hinge_loss',
- 'homogeneity_completeness_v_measure',
- 'homogeneity_score',
- 'jaccard_similarity_score',
- 'label_ranking_average_precision_score',
- 'label_ranking_loss',
- 'log_loss',
- 'make_scorer',
- 'matthews_corrcoef',
- 'mean_absolute_error',
- 'mean_squared_error',
- 'median_absolute_error',
- 'mutual_info_score',
- 'normalized_mutual_info_score',
- 'pairwise_distances',
- 'pairwise_distances_argmin',
- 'pairwise_distances_argmin_min',
- 'pairwise_distances_argmin_min',
- 'pairwise_kernels',
- 'precision_recall_curve',
- 'precision_recall_fscore_support',
- 'precision_score',
- 'r2_score',
- 'recall_score',
- 'roc_auc_score',
- 'roc_curve',
- 'SCORERS',
- 'silhouette_samples',
- 'silhouette_score',
- 'v_measure_score',
- 'zero_one_loss',
- 'brier_score_loss',
-]
diff --git a/mloop/localsklearn/metrics/base.py b/mloop/localsklearn/metrics/base.py
deleted file mode 100644
index 0ad96c1..0000000
--- a/mloop/localsklearn/metrics/base.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""
-Common code for all metrics
-
-"""
-# Authors: Alexandre Gramfort
-# Mathieu Blondel
-# Olivier Grisel
-# Arnaud Joly
-# Jochen Wersdorfer
-# Lars Buitinck
-# Joel Nothman
-# Noel Dawe
-# License: BSD 3 clause
-
-from __future__ import division
-
-import numpy as np
-
-from ..utils import check_array, check_consistent_length
-from ..utils.multiclass import type_of_target
-
-from ..exceptions import UndefinedMetricWarning as _UndefinedMetricWarning
-from ..utils import deprecated
-
-
-@deprecated("UndefinedMetricWarning has been moved into the sklearn.exceptions"
- " module. It will not be available here from version 0.19")
-class UndefinedMetricWarning(_UndefinedMetricWarning):
- pass
-
-
-def _average_binary_score(binary_metric, y_true, y_score, average,
- sample_weight=None):
- """Average a binary metric for multilabel classification
-
- Parameters
- ----------
- y_true : array, shape = [n_samples] or [n_samples, n_classes]
- True binary labels in binary label indicators.
-
- y_score : array, shape = [n_samples] or [n_samples, n_classes]
- Target scores, can either be probability estimates of the positive
- class, confidence values, or binary decisions.
-
- average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
- If ``None``, the scores for each class are returned. Otherwise,
- this determines the type of averaging performed on the data:
-
- ``'micro'``:
- Calculate metrics globally by considering each element of the label
- indicator matrix as a label.
- ``'macro'``:
- Calculate metrics for each label, and find their unweighted
- mean. This does not take label imbalance into account.
- ``'weighted'``:
- Calculate metrics for each label, and find their average, weighted
- by support (the number of true instances for each label).
- ``'samples'``:
- Calculate metrics for each instance, and find their average.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- binary_metric : callable, returns shape [n_classes]
- The binary metric function to use.
-
- Returns
- -------
- score : float or array of shape [n_classes]
- If not ``None``, average the score, else return the score for each
- classes.
-
- """
- average_options = (None, 'micro', 'macro', 'weighted', 'samples')
- if average not in average_options:
- raise ValueError('average has to be one of {0}'
- ''.format(average_options))
-
- y_type = type_of_target(y_true)
- if y_type not in ("binary", "multilabel-indicator"):
- raise ValueError("{0} format is not supported".format(y_type))
-
- if y_type == "binary":
- return binary_metric(y_true, y_score, sample_weight=sample_weight)
-
- check_consistent_length(y_true, y_score, sample_weight)
- y_true = check_array(y_true)
- y_score = check_array(y_score)
-
- not_average_axis = 1
- score_weight = sample_weight
- average_weight = None
-
- if average == "micro":
- if score_weight is not None:
- score_weight = np.repeat(score_weight, y_true.shape[1])
- y_true = y_true.ravel()
- y_score = y_score.ravel()
-
- elif average == 'weighted':
- if score_weight is not None:
- average_weight = np.sum(np.multiply(
- y_true, np.reshape(score_weight, (-1, 1))), axis=0)
- else:
- average_weight = np.sum(y_true, axis=0)
- if average_weight.sum() == 0:
- return 0
-
- elif average == 'samples':
- # swap average_weight <-> score_weight
- average_weight = score_weight
- score_weight = None
- not_average_axis = 0
-
- if y_true.ndim == 1:
- y_true = y_true.reshape((-1, 1))
-
- if y_score.ndim == 1:
- y_score = y_score.reshape((-1, 1))
-
- n_classes = y_score.shape[not_average_axis]
- score = np.zeros((n_classes,))
- for c in range(n_classes):
- y_true_c = y_true.take([c], axis=not_average_axis).ravel()
- y_score_c = y_score.take([c], axis=not_average_axis).ravel()
- score[c] = binary_metric(y_true_c, y_score_c,
- sample_weight=score_weight)
-
- # Average the results
- if average is not None:
- return np.average(score, weights=average_weight)
- else:
- return score
diff --git a/mloop/localsklearn/metrics/classification.py b/mloop/localsklearn/metrics/classification.py
deleted file mode 100644
index 7cac2ec..0000000
--- a/mloop/localsklearn/metrics/classification.py
+++ /dev/null
@@ -1,1848 +0,0 @@
-"""Metrics to assess performance on classification task given class prediction
-
-Functions named as ``*_score`` return a scalar value to maximize: the higher
-the better
-
-Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
-the lower the better
-"""
-
-# Authors: Alexandre Gramfort
-# Mathieu Blondel
-# Olivier Grisel
-# Arnaud Joly
-# Jochen Wersdorfer
-# Lars Buitinck
-# Joel Nothman
-# Noel Dawe
-# Jatin Shah
-# Saurabh Jha
-# Bernardo Stein
-# License: BSD 3 clause
-
-from __future__ import division
-
-import warnings
-import numpy as np
-
-from scipy.sparse import coo_matrix
-from scipy.sparse import csr_matrix
-
-from ..preprocessing import LabelBinarizer, label_binarize
-from ..preprocessing import LabelEncoder
-from ..utils import check_array
-from ..utils import check_consistent_length
-from ..utils import column_or_1d
-from ..utils.multiclass import unique_labels
-from ..utils.multiclass import type_of_target
-from ..utils.validation import _num_samples
-from ..utils.sparsefuncs import count_nonzero
-from ..utils.fixes import bincount
-from ..exceptions import UndefinedMetricWarning
-
-
-def _check_targets(y_true, y_pred):
- """Check that y_true and y_pred belong to the same classification task
-
- This converts multiclass or binary types to a common shape, and raises a
- ValueError for a mix of multilabel and multiclass targets, a mix of
- multilabel formats, for the presence of continuous-valued or multioutput
- targets, or for targets of different lengths.
-
- Column vectors are squeezed to 1d, while multilabel formats are returned
- as CSR sparse label indicators.
-
- Parameters
- ----------
- y_true : array-like
-
- y_pred : array-like
-
- Returns
- -------
- type_true : one of {'multilabel-indicator', 'multiclass', 'binary'}
- The type of the true target data, as output by
- ``utils.multiclass.type_of_target``
-
- y_true : array or indicator matrix
-
- y_pred : array or indicator matrix
- """
- check_consistent_length(y_true, y_pred)
- type_true = type_of_target(y_true)
- type_pred = type_of_target(y_pred)
-
- y_type = set([type_true, type_pred])
- if y_type == set(["binary", "multiclass"]):
- y_type = set(["multiclass"])
-
- if len(y_type) > 1:
- raise ValueError("Can't handle mix of {0} and {1}"
- "".format(type_true, type_pred))
-
- # We can't have more than one value on y_type => The set is no more needed
- y_type = y_type.pop()
-
- # No metrics support "multiclass-multioutput" format
- if (y_type not in ["binary", "multiclass", "multilabel-indicator"]):
- raise ValueError("{0} is not supported".format(y_type))
-
- if y_type in ["binary", "multiclass"]:
- y_true = column_or_1d(y_true)
- y_pred = column_or_1d(y_pred)
-
- if y_type.startswith('multilabel'):
- y_true = csr_matrix(y_true)
- y_pred = csr_matrix(y_pred)
- y_type = 'multilabel-indicator'
-
- return y_type, y_true, y_pred
-
-
-def _weighted_sum(sample_score, sample_weight, normalize=False):
- if normalize:
- return np.average(sample_score, weights=sample_weight)
- elif sample_weight is not None:
- return np.dot(sample_score, sample_weight)
- else:
- return sample_score.sum()
-
-
-def accuracy_score(y_true, y_pred, normalize=True, sample_weight=None):
- """Accuracy classification score.
-
- In multilabel classification, this function computes subset accuracy:
- the set of labels predicted for a sample must *exactly* match the
- corresponding set of labels in y_true.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) labels.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Predicted labels, as returned by a classifier.
-
- normalize : bool, optional (default=True)
- If ``False``, return the number of correctly classified samples.
- Otherwise, return the fraction of correctly classified samples.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- score : float
- If ``normalize == True``, return the correctly classified samples
- (float), else it returns the number of correctly classified samples
- (int).
-
- The best performance is 1 with ``normalize == True`` and the number
- of samples with ``normalize == False``.
-
- See also
- --------
- jaccard_similarity_score, hamming_loss, zero_one_loss
-
- Notes
- -----
- In binary and multiclass classification, this function is equal
- to the ``jaccard_similarity_score`` function.
-
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.metrics import accuracy_score
- >>> y_pred = [0, 2, 1, 3]
- >>> y_true = [0, 1, 2, 3]
- >>> accuracy_score(y_true, y_pred)
- 0.5
- >>> accuracy_score(y_true, y_pred, normalize=False)
- 2
-
- In the multilabel case with binary label indicators:
- >>> accuracy_score(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
- 0.5
- """
-
- # Compute accuracy for each possible representation
- y_type, y_true, y_pred = _check_targets(y_true, y_pred)
- if y_type.startswith('multilabel'):
- differing_labels = count_nonzero(y_true - y_pred, axis=1)
- score = differing_labels == 0
- else:
- score = y_true == y_pred
-
- return _weighted_sum(score, sample_weight, normalize)
-
-
-def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None):
- """Compute confusion matrix to evaluate the accuracy of a classification
-
- By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}`
- is equal to the number of observations known to be in group :math:`i` but
- predicted to be in group :math:`j`.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : array, shape = [n_samples]
- Ground truth (correct) target values.
-
- y_pred : array, shape = [n_samples]
- Estimated targets as returned by a classifier.
-
- labels : array, shape = [n_classes], optional
- List of labels to index the matrix. This may be used to reorder
- or select a subset of labels.
- If none is given, those that appear at least once
- in ``y_true`` or ``y_pred`` are used in sorted order.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- C : array, shape = [n_classes, n_classes]
- Confusion matrix
-
- References
- ----------
- .. [1] `Wikipedia entry for the Confusion matrix
- `_
-
- Examples
- --------
- >>> from sklearn.metrics import confusion_matrix
- >>> y_true = [2, 0, 2, 2, 0, 1]
- >>> y_pred = [0, 0, 2, 2, 0, 2]
- >>> confusion_matrix(y_true, y_pred)
- array([[2, 0, 0],
- [0, 0, 1],
- [1, 0, 2]])
-
- >>> y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
- >>> y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
- >>> confusion_matrix(y_true, y_pred, labels=["ant", "bird", "cat"])
- array([[2, 0, 0],
- [0, 0, 1],
- [1, 0, 2]])
-
- """
- y_type, y_true, y_pred = _check_targets(y_true, y_pred)
- if y_type not in ("binary", "multiclass"):
- raise ValueError("%s is not supported" % y_type)
-
- if labels is None:
- labels = unique_labels(y_true, y_pred)
- else:
- labels = np.asarray(labels)
-
- if sample_weight is None:
- sample_weight = np.ones(y_true.shape[0], dtype=np.int)
- else:
- sample_weight = np.asarray(sample_weight)
-
- check_consistent_length(sample_weight, y_true, y_pred)
-
- n_labels = labels.size
- label_to_ind = dict((y, x) for x, y in enumerate(labels))
- # convert yt, yp into index
- y_pred = np.array([label_to_ind.get(x, n_labels + 1) for x in y_pred])
- y_true = np.array([label_to_ind.get(x, n_labels + 1) for x in y_true])
-
- # intersect y_pred, y_true with labels, eliminate items not in labels
- ind = np.logical_and(y_pred < n_labels, y_true < n_labels)
- y_pred = y_pred[ind]
- y_true = y_true[ind]
- # also eliminate weights of eliminated items
- sample_weight = sample_weight[ind]
-
- CM = coo_matrix((sample_weight, (y_true, y_pred)),
- shape=(n_labels, n_labels)
- ).toarray()
-
- return CM
-
-
-def cohen_kappa_score(y1, y2, labels=None, weights=None):
- """Cohen's kappa: a statistic that measures inter-annotator agreement.
-
- This function computes Cohen's kappa [1]_, a score that expresses the level
- of agreement between two annotators on a classification problem. It is
- defined as
-
- .. math::
- \kappa = (p_o - p_e) / (1 - p_e)
-
- where :math:`p_o` is the empirical probability of agreement on the label
- assigned to any sample (the observed agreement ratio), and :math:`p_e` is
- the expected agreement when both annotators assign labels randomly.
- :math:`p_e` is estimated using a per-annotator empirical prior over the
- class labels [2]_.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y1 : array, shape = [n_samples]
- Labels assigned by the first annotator.
-
- y2 : array, shape = [n_samples]
- Labels assigned by the second annotator. The kappa statistic is
- symmetric, so swapping ``y1`` and ``y2`` doesn't change the value.
-
- labels : array, shape = [n_classes], optional
- List of labels to index the matrix. This may be used to select a
- subset of labels. If None, all labels that appear at least once in
- ``y1`` or ``y2`` are used.
-
- weights : str, optional
- List of weighting type to calculate the score. None means no weighted;
- "linear" means linear weighted; "quadratic" means quadratic weighted.
-
- Returns
- -------
- kappa : float
- The kappa statistic, which is a number between -1 and 1. The maximum
- value means complete agreement; zero or lower means chance agreement.
-
- References
- ----------
- .. [1] J. Cohen (1960). "A coefficient of agreement for nominal scales".
- Educational and Psychological Measurement 20(1):37-46.
- doi:10.1177/001316446002000104.
- .. [2] `R. Artstein and M. Poesio (2008). "Inter-coder agreement for
- computational linguistics". Computational Linguistics 34(4):555-596.
- `_
- .. [3] `Wikipedia entry for the Cohen's kappa.
- `_
- """
- confusion = confusion_matrix(y1, y2, labels=labels)
- n_classes = confusion.shape[0]
- sum0 = np.sum(confusion, axis=0)
- sum1 = np.sum(confusion, axis=1)
- expected = np.outer(sum0, sum1) / np.sum(sum0)
-
- if weights is None:
- w_mat = np.ones([n_classes, n_classes], dtype=np.int)
- w_mat.flat[:: n_classes + 1] = 0
- elif weights == "linear" or weights == "quadratic":
- w_mat = np.zeros([n_classes, n_classes], dtype=np.int)
- w_mat += np.arange(n_classes)
- if weights == "linear":
- w_mat = np.abs(w_mat - w_mat.T)
- else:
- w_mat = (w_mat - w_mat.T) ** 2
- else:
- raise ValueError("Unknown kappa weighting type.")
-
- k = np.sum(w_mat * confusion) / np.sum(w_mat * expected)
- return 1 - k
-
-
-def jaccard_similarity_score(y_true, y_pred, normalize=True,
- sample_weight=None):
- """Jaccard similarity coefficient score
-
- The Jaccard index [1], or Jaccard similarity coefficient, defined as
- the size of the intersection divided by the size of the union of two label
- sets, is used to compare set of predicted labels for a sample to the
- corresponding set of labels in ``y_true``.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) labels.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Predicted labels, as returned by a classifier.
-
- normalize : bool, optional (default=True)
- If ``False``, return the sum of the Jaccard similarity coefficient
- over the sample set. Otherwise, return the average of Jaccard
- similarity coefficient.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- score : float
- If ``normalize == True``, return the average Jaccard similarity
- coefficient, else it returns the sum of the Jaccard similarity
- coefficient over the sample set.
-
- The best performance is 1 with ``normalize == True`` and the number
- of samples with ``normalize == False``.
-
- See also
- --------
- accuracy_score, hamming_loss, zero_one_loss
-
- Notes
- -----
- In binary and multiclass classification, this function is equivalent
- to the ``accuracy_score``. It differs in the multilabel classification
- problem.
-
- References
- ----------
- .. [1] `Wikipedia entry for the Jaccard index
- `_
-
-
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.metrics import jaccard_similarity_score
- >>> y_pred = [0, 2, 1, 3]
- >>> y_true = [0, 1, 2, 3]
- >>> jaccard_similarity_score(y_true, y_pred)
- 0.5
- >>> jaccard_similarity_score(y_true, y_pred, normalize=False)
- 2
-
- In the multilabel case with binary label indicators:
-
- >>> jaccard_similarity_score(np.array([[0, 1], [1, 1]]),\
- np.ones((2, 2)))
- 0.75
- """
-
- # Compute accuracy for each possible representation
- y_type, y_true, y_pred = _check_targets(y_true, y_pred)
- if y_type.startswith('multilabel'):
- with np.errstate(divide='ignore', invalid='ignore'):
- # oddly, we may get an "invalid" rather than a "divide" error here
- pred_or_true = count_nonzero(y_true + y_pred, axis=1)
- pred_and_true = count_nonzero(y_true.multiply(y_pred), axis=1)
- score = pred_and_true / pred_or_true
-
- # If there is no label, it results in a Nan instead, we set
- # the jaccard to 1: lim_{x->0} x/x = 1
- # Note with py2.6 and np 1.3: we can't check safely for nan.
- score[pred_or_true == 0.0] = 1.0
- else:
- score = y_true == y_pred
-
- return _weighted_sum(score, sample_weight, normalize)
-
-
-def matthews_corrcoef(y_true, y_pred, sample_weight=None):
- """Compute the Matthews correlation coefficient (MCC) for binary classes
-
- The Matthews correlation coefficient is used in machine learning as a
- measure of the quality of binary (two-class) classifications. It takes into
- account true and false positives and negatives and is generally regarded as
- a balanced measure which can be used even if the classes are of very
- different sizes. The MCC is in essence a correlation coefficient value
- between -1 and +1. A coefficient of +1 represents a perfect prediction, 0
- an average random prediction and -1 an inverse prediction. The statistic
- is also known as the phi coefficient. [source: Wikipedia]
-
- Only in the binary case does this relate to information about true and
- false positives and negatives. See references below.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : array, shape = [n_samples]
- Ground truth (correct) target values.
-
- y_pred : array, shape = [n_samples]
- Estimated targets as returned by a classifier.
-
- sample_weight : array-like of shape = [n_samples], default None
- Sample weights.
-
- Returns
- -------
- mcc : float
- The Matthews correlation coefficient (+1 represents a perfect
- prediction, 0 an average random prediction and -1 and inverse
- prediction).
-
- References
- ----------
- .. [1] `Baldi, Brunak, Chauvin, Andersen and Nielsen, (2000). Assessing the
- accuracy of prediction algorithms for classification: an overview
- `_
-
- .. [2] `Wikipedia entry for the Matthews Correlation Coefficient
- `_
-
- Examples
- --------
- >>> from sklearn.metrics import matthews_corrcoef
- >>> y_true = [+1, +1, +1, -1]
- >>> y_pred = [+1, -1, +1, +1]
- >>> matthews_corrcoef(y_true, y_pred) # doctest: +ELLIPSIS
- -0.33...
-
- """
- y_type, y_true, y_pred = _check_targets(y_true, y_pred)
-
- if y_type != "binary":
- raise ValueError("%s is not supported" % y_type)
-
- lb = LabelEncoder()
- lb.fit(np.hstack([y_true, y_pred]))
- y_true = lb.transform(y_true)
- y_pred = lb.transform(y_pred)
- mean_yt = np.average(y_true, weights=sample_weight)
- mean_yp = np.average(y_pred, weights=sample_weight)
-
- y_true_u_cent = y_true - mean_yt
- y_pred_u_cent = y_pred - mean_yp
-
- cov_ytyp = np.average(y_true_u_cent * y_pred_u_cent, weights=sample_weight)
- var_yt = np.average(y_true_u_cent ** 2, weights=sample_weight)
- var_yp = np.average(y_pred_u_cent ** 2, weights=sample_weight)
-
- mcc = cov_ytyp / np.sqrt(var_yt * var_yp)
-
- if np.isnan(mcc):
- return 0.
- else:
- return mcc
-
-
-def zero_one_loss(y_true, y_pred, normalize=True, sample_weight=None):
- """Zero-one classification loss.
-
- If normalize is ``True``, return the fraction of misclassifications
- (float), else it returns the number of misclassifications (int). The best
- performance is 0.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) labels.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Predicted labels, as returned by a classifier.
-
- normalize : bool, optional (default=True)
- If ``False``, return the number of misclassifications.
- Otherwise, return the fraction of misclassifications.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- loss : float or int,
- If ``normalize == True``, return the fraction of misclassifications
- (float), else it returns the number of misclassifications (int).
-
- Notes
- -----
- In multilabel classification, the zero_one_loss function corresponds to
- the subset zero-one loss: for each sample, the entire set of labels must be
- correctly predicted, otherwise the loss for that sample is equal to one.
-
- See also
- --------
- accuracy_score, hamming_loss, jaccard_similarity_score
-
- Examples
- --------
- >>> from sklearn.metrics import zero_one_loss
- >>> y_pred = [1, 2, 3, 4]
- >>> y_true = [2, 2, 3, 4]
- >>> zero_one_loss(y_true, y_pred)
- 0.25
- >>> zero_one_loss(y_true, y_pred, normalize=False)
- 1
-
- In the multilabel case with binary label indicators:
-
- >>> zero_one_loss(np.array([[0, 1], [1, 1]]), np.ones((2, 2)))
- 0.5
- """
- score = accuracy_score(y_true, y_pred,
- normalize=normalize,
- sample_weight=sample_weight)
-
- if normalize:
- return 1 - score
- else:
- if sample_weight is not None:
- n_samples = np.sum(sample_weight)
- else:
- n_samples = _num_samples(y_true)
- return n_samples - score
-
-
-def f1_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
- sample_weight=None):
- """Compute the F1 score, also known as balanced F-score or F-measure
-
- The F1 score can be interpreted as a weighted average of the precision and
- recall, where an F1 score reaches its best value at 1 and worst score at 0.
- The relative contribution of precision and recall to the F1 score are
- equal. The formula for the F1 score is::
-
- F1 = 2 * (precision * recall) / (precision + recall)
-
- In the multi-class and multi-label case, this is the weighted average of
- the F1 score of each class.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) target values.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Estimated targets as returned by a classifier.
-
- labels : list, optional
- The set of labels to include when ``average != 'binary'``, and their
- order if ``average is None``. Labels present in the data can be
- excluded, for example to calculate a multiclass average ignoring a
- majority negative class, while labels not present in the data will
- result in 0 components in a macro average. For multilabel targets,
- labels are column indices. By default, all labels in ``y_true`` and
- ``y_pred`` are used in sorted order.
-
- .. versionchanged:: 0.17
- parameter *labels* improved for multiclass problem.
-
- pos_label : str or int, 1 by default
- The class to report if ``average='binary'``. Until version 0.18 it is
- necessary to set ``pos_label=None`` if seeking to use another averaging
- method over binary targets.
-
- average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
- 'weighted']
- This parameter is required for multiclass/multilabel targets.
- If ``None``, the scores for each class are returned. Otherwise, this
- determines the type of averaging performed on the data:
-
- ``'binary'``:
- Only report results for the class specified by ``pos_label``.
- This is applicable only if targets (``y_{true,pred}``) are binary.
- ``'micro'``:
- Calculate metrics globally by counting the total true positives,
- false negatives and false positives.
- ``'macro'``:
- Calculate metrics for each label, and find their unweighted
- mean. This does not take label imbalance into account.
- ``'weighted'``:
- Calculate metrics for each label, and find their average, weighted
- by support (the number of true instances for each label). This
- alters 'macro' to account for label imbalance; it can result in an
- F-score that is not between precision and recall.
- ``'samples'``:
- Calculate metrics for each instance, and find their average (only
- meaningful for multilabel classification where this differs from
- :func:`accuracy_score`).
-
- Note that if ``pos_label`` is given in binary classification with
- `average != 'binary'`, only that positive class is reported. This
- behavior is deprecated and will change in version 0.18.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- f1_score : float or array of float, shape = [n_unique_labels]
- F1 score of the positive class in binary classification or weighted
- average of the F1 scores of each class for the multiclass task.
-
- References
- ----------
- .. [1] `Wikipedia entry for the F1-score `_
-
- Examples
- --------
- >>> from sklearn.metrics import f1_score
- >>> y_true = [0, 1, 2, 0, 1, 2]
- >>> y_pred = [0, 2, 1, 0, 0, 1]
- >>> f1_score(y_true, y_pred, average='macro') # doctest: +ELLIPSIS
- 0.26...
- >>> f1_score(y_true, y_pred, average='micro') # doctest: +ELLIPSIS
- 0.33...
- >>> f1_score(y_true, y_pred, average='weighted') # doctest: +ELLIPSIS
- 0.26...
- >>> f1_score(y_true, y_pred, average=None)
- array([ 0.8, 0. , 0. ])
-
-
- """
- return fbeta_score(y_true, y_pred, 1, labels=labels,
- pos_label=pos_label, average=average,
- sample_weight=sample_weight)
-
-
-def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
- average='binary', sample_weight=None):
- """Compute the F-beta score
-
- The F-beta score is the weighted harmonic mean of precision and recall,
- reaching its optimal value at 1 and its worst value at 0.
-
- The `beta` parameter determines the weight of precision in the combined
- score. ``beta < 1`` lends more weight to precision, while ``beta > 1``
- favors recall (``beta -> 0`` considers only precision, ``beta -> inf``
- only recall).
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) target values.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Estimated targets as returned by a classifier.
-
- beta: float
- Weight of precision in harmonic mean.
-
- labels : list, optional
- The set of labels to include when ``average != 'binary'``, and their
- order if ``average is None``. Labels present in the data can be
- excluded, for example to calculate a multiclass average ignoring a
- majority negative class, while labels not present in the data will
- result in 0 components in a macro average. For multilabel targets,
- labels are column indices. By default, all labels in ``y_true`` and
- ``y_pred`` are used in sorted order.
-
- .. versionchanged:: 0.17
- parameter *labels* improved for multiclass problem.
-
- pos_label : str or int, 1 by default
- The class to report if ``average='binary'``. Until version 0.18 it is
- necessary to set ``pos_label=None`` if seeking to use another averaging
- method over binary targets.
-
- average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
- 'weighted']
- This parameter is required for multiclass/multilabel targets.
- If ``None``, the scores for each class are returned. Otherwise, this
- determines the type of averaging performed on the data:
-
- ``'binary'``:
- Only report results for the class specified by ``pos_label``.
- This is applicable only if targets (``y_{true,pred}``) are binary.
- ``'micro'``:
- Calculate metrics globally by counting the total true positives,
- false negatives and false positives.
- ``'macro'``:
- Calculate metrics for each label, and find their unweighted
- mean. This does not take label imbalance into account.
- ``'weighted'``:
- Calculate metrics for each label, and find their average, weighted
- by support (the number of true instances for each label). This
- alters 'macro' to account for label imbalance; it can result in an
- F-score that is not between precision and recall.
- ``'samples'``:
- Calculate metrics for each instance, and find their average (only
- meaningful for multilabel classification where this differs from
- :func:`accuracy_score`).
-
- Note that if ``pos_label`` is given in binary classification with
- `average != 'binary'`, only that positive class is reported. This
- behavior is deprecated and will change in version 0.18.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- fbeta_score : float (if average is not None) or array of float, shape =\
- [n_unique_labels]
- F-beta score of the positive class in binary classification or weighted
- average of the F-beta score of each class for the multiclass task.
-
- References
- ----------
- .. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011).
- Modern Information Retrieval. Addison Wesley, pp. 327-328.
-
- .. [2] `Wikipedia entry for the F1-score
- `_
-
- Examples
- --------
- >>> from sklearn.metrics import fbeta_score
- >>> y_true = [0, 1, 2, 0, 1, 2]
- >>> y_pred = [0, 2, 1, 0, 0, 1]
- >>> fbeta_score(y_true, y_pred, average='macro', beta=0.5)
- ... # doctest: +ELLIPSIS
- 0.23...
- >>> fbeta_score(y_true, y_pred, average='micro', beta=0.5)
- ... # doctest: +ELLIPSIS
- 0.33...
- >>> fbeta_score(y_true, y_pred, average='weighted', beta=0.5)
- ... # doctest: +ELLIPSIS
- 0.23...
- >>> fbeta_score(y_true, y_pred, average=None, beta=0.5)
- ... # doctest: +ELLIPSIS
- array([ 0.71..., 0. , 0. ])
-
- """
- _, _, f, _ = precision_recall_fscore_support(y_true, y_pred,
- beta=beta,
- labels=labels,
- pos_label=pos_label,
- average=average,
- warn_for=('f-score',),
- sample_weight=sample_weight)
- return f
-
-
-def _prf_divide(numerator, denominator, metric, modifier, average, warn_for):
- """Performs division and handles divide-by-zero.
-
- On zero-division, sets the corresponding result elements to zero
- and raises a warning.
-
- The metric, modifier and average arguments are used only for determining
- an appropriate warning.
- """
- result = numerator / denominator
- mask = denominator == 0.0
- if not np.any(mask):
- return result
-
- # remove infs
- result[mask] = 0.0
-
- # build appropriate warning
- # E.g. "Precision and F-score are ill-defined and being set to 0.0 in
- # labels with no predicted samples"
- axis0 = 'sample'
- axis1 = 'label'
- if average == 'samples':
- axis0, axis1 = axis1, axis0
-
- if metric in warn_for and 'f-score' in warn_for:
- msg_start = '{0} and F-score are'.format(metric.title())
- elif metric in warn_for:
- msg_start = '{0} is'.format(metric.title())
- elif 'f-score' in warn_for:
- msg_start = 'F-score is'
- else:
- return result
-
- msg = ('{0} ill-defined and being set to 0.0 {{0}} '
- 'no {1} {2}s.'.format(msg_start, modifier, axis0))
- if len(mask) == 1:
- msg = msg.format('due to')
- else:
- msg = msg.format('in {0}s with'.format(axis1))
- warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
- return result
-
-
-def precision_recall_fscore_support(y_true, y_pred, beta=1.0, labels=None,
- pos_label=1, average=None,
- warn_for=('precision', 'recall',
- 'f-score'),
- sample_weight=None):
- """Compute precision, recall, F-measure and support for each class
-
- The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
- true positives and ``fp`` the number of false positives. The precision is
- intuitively the ability of the classifier not to label as positive a sample
- that is negative.
-
- The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
- true positives and ``fn`` the number of false negatives. The recall is
- intuitively the ability of the classifier to find all the positive samples.
-
- The F-beta score can be interpreted as a weighted harmonic mean of
- the precision and recall, where an F-beta score reaches its best
- value at 1 and worst score at 0.
-
- The F-beta score weights recall more than precision by a factor of
- ``beta``. ``beta == 1.0`` means recall and precision are equally important.
-
- The support is the number of occurrences of each class in ``y_true``.
-
- If ``pos_label is None`` and in binary classification, this function
- returns the average precision, recall and F-measure if ``average``
- is one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) target values.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Estimated targets as returned by a classifier.
-
- beta : float, 1.0 by default
- The strength of recall versus precision in the F-score.
-
- labels : list, optional
- The set of labels to include when ``average != 'binary'``, and their
- order if ``average is None``. Labels present in the data can be
- excluded, for example to calculate a multiclass average ignoring a
- majority negative class, while labels not present in the data will
- result in 0 components in a macro average. For multilabel targets,
- labels are column indices. By default, all labels in ``y_true`` and
- ``y_pred`` are used in sorted order.
-
- pos_label : str or int, 1 by default
- The class to report if ``average='binary'``. Until version 0.18 it is
- necessary to set ``pos_label=None`` if seeking to use another averaging
- method over binary targets.
-
- average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \
- 'weighted']
- If ``None``, the scores for each class are returned. Otherwise, this
- determines the type of averaging performed on the data:
-
- ``'binary'``:
- Only report results for the class specified by ``pos_label``.
- This is applicable only if targets (``y_{true,pred}``) are binary.
- ``'micro'``:
- Calculate metrics globally by counting the total true positives,
- false negatives and false positives.
- ``'macro'``:
- Calculate metrics for each label, and find their unweighted
- mean. This does not take label imbalance into account.
- ``'weighted'``:
- Calculate metrics for each label, and find their average, weighted
- by support (the number of true instances for each label). This
- alters 'macro' to account for label imbalance; it can result in an
- F-score that is not between precision and recall.
- ``'samples'``:
- Calculate metrics for each instance, and find their average (only
- meaningful for multilabel classification where this differs from
- :func:`accuracy_score`).
-
- Note that if ``pos_label`` is given in binary classification with
- `average != 'binary'`, only that positive class is reported. This
- behavior is deprecated and will change in version 0.18.
-
- warn_for : tuple or set, for internal use
- This determines which warnings will be made in the case that this
- function is being used to return only one of its metrics.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- precision: float (if average is not None) or array of float, shape =\
- [n_unique_labels]
-
- recall: float (if average is not None) or array of float, , shape =\
- [n_unique_labels]
-
- fbeta_score: float (if average is not None) or array of float, shape =\
- [n_unique_labels]
-
- support: int (if average is not None) or array of int, shape =\
- [n_unique_labels]
- The number of occurrences of each label in ``y_true``.
-
- References
- ----------
- .. [1] `Wikipedia entry for the Precision and recall
- `_
-
- .. [2] `Wikipedia entry for the F1-score
- `_
-
- .. [3] `Discriminative Methods for Multi-labeled Classification Advances
- in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu
- Godbole, Sunita Sarawagi
- `
-
- Examples
- --------
- >>> from sklearn.metrics import precision_recall_fscore_support
- >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
- >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
- >>> precision_recall_fscore_support(y_true, y_pred, average='macro')
- ... # doctest: +ELLIPSIS
- (0.22..., 0.33..., 0.26..., None)
- >>> precision_recall_fscore_support(y_true, y_pred, average='micro')
- ... # doctest: +ELLIPSIS
- (0.33..., 0.33..., 0.33..., None)
- >>> precision_recall_fscore_support(y_true, y_pred, average='weighted')
- ... # doctest: +ELLIPSIS
- (0.22..., 0.33..., 0.26..., None)
-
- It is possible to compute per-label precisions, recalls, F1-scores and
- supports instead of averaging:
- >>> precision_recall_fscore_support(y_true, y_pred, average=None,
- ... labels=['pig', 'dog', 'cat'])
- ... # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE
- (array([ 0. , 0. , 0.66...]),
- array([ 0., 0., 1.]),
- array([ 0. , 0. , 0.8]),
- array([2, 2, 2]))
-
- """
- average_options = (None, 'micro', 'macro', 'weighted', 'samples')
- if average not in average_options and average != 'binary':
- raise ValueError('average has to be one of ' +
- str(average_options))
- if beta <= 0:
- raise ValueError("beta should be >0 in the F-beta score")
-
- y_type, y_true, y_pred = _check_targets(y_true, y_pred)
- present_labels = unique_labels(y_true, y_pred)
-
- if average == 'binary' and (y_type != 'binary' or pos_label is None):
- warnings.warn('The default `weighted` averaging is deprecated, '
- 'and from version 0.18, use of precision, recall or '
- 'F-score with multiclass or multilabel data or '
- 'pos_label=None will result in an exception. '
- 'Please set an explicit value for `average`, one of '
- '%s. In cross validation use, for instance, '
- 'scoring="f1_weighted" instead of scoring="f1".'
- % str(average_options), DeprecationWarning, stacklevel=2)
- average = 'weighted'
-
- if y_type == 'binary' and pos_label is not None and average is not None:
- if average != 'binary':
- warnings.warn('From version 0.18, binary input will not be '
- 'handled specially when using averaged '
- 'precision/recall/F-score. '
- 'Please use average=\'binary\' to report only the '
- 'positive class performance.', DeprecationWarning)
- if labels is None or len(labels) <= 2:
- if pos_label not in present_labels:
- if len(present_labels) < 2:
- # Only negative labels
- return (0., 0., 0., 0)
- else:
- raise ValueError("pos_label=%r is not a valid label: %r" %
- (pos_label, present_labels))
- labels = [pos_label]
- if labels is None:
- labels = present_labels
- n_labels = None
- else:
- n_labels = len(labels)
- labels = np.hstack([labels, np.setdiff1d(present_labels, labels,
- assume_unique=True)])
-
- # Calculate tp_sum, pred_sum, true_sum ###
-
- if y_type.startswith('multilabel'):
- sum_axis = 1 if average == 'samples' else 0
-
- # All labels are index integers for multilabel.
- # Select labels:
- if not np.all(labels == present_labels):
- if np.max(labels) > np.max(present_labels):
- raise ValueError('All labels must be in [0, n labels). '
- 'Got %d > %d' %
- (np.max(labels), np.max(present_labels)))
- if np.min(labels) < 0:
- raise ValueError('All labels must be in [0, n labels). '
- 'Got %d < 0' % np.min(labels))
-
- y_true = y_true[:, labels[:n_labels]]
- y_pred = y_pred[:, labels[:n_labels]]
-
- # calculate weighted counts
- true_and_pred = y_true.multiply(y_pred)
- tp_sum = count_nonzero(true_and_pred, axis=sum_axis,
- sample_weight=sample_weight)
- pred_sum = count_nonzero(y_pred, axis=sum_axis,
- sample_weight=sample_weight)
- true_sum = count_nonzero(y_true, axis=sum_axis,
- sample_weight=sample_weight)
-
- elif average == 'samples':
- raise ValueError("Sample-based precision, recall, fscore is "
- "not meaningful outside multilabel "
- "classification. See the accuracy_score instead.")
- else:
- le = LabelEncoder()
- le.fit(labels)
- y_true = le.transform(y_true)
- y_pred = le.transform(y_pred)
- sorted_labels = le.classes_
-
- # labels are now from 0 to len(labels) - 1 -> use bincount
- tp = y_true == y_pred
- tp_bins = y_true[tp]
- if sample_weight is not None:
- tp_bins_weights = np.asarray(sample_weight)[tp]
- else:
- tp_bins_weights = None
-
- if len(tp_bins):
- tp_sum = bincount(tp_bins, weights=tp_bins_weights,
- minlength=len(labels))
- else:
- # Pathological case
- true_sum = pred_sum = tp_sum = np.zeros(len(labels))
- if len(y_pred):
- pred_sum = bincount(y_pred, weights=sample_weight,
- minlength=len(labels))
- if len(y_true):
- true_sum = bincount(y_true, weights=sample_weight,
- minlength=len(labels))
-
- # Retain only selected labels
- indices = np.searchsorted(sorted_labels, labels[:n_labels])
- tp_sum = tp_sum[indices]
- true_sum = true_sum[indices]
- pred_sum = pred_sum[indices]
-
- if average == 'micro':
- tp_sum = np.array([tp_sum.sum()])
- pred_sum = np.array([pred_sum.sum()])
- true_sum = np.array([true_sum.sum()])
-
- # Finally, we have all our sufficient statistics. Divide! #
-
- beta2 = beta ** 2
- with np.errstate(divide='ignore', invalid='ignore'):
- # Divide, and on zero-division, set scores to 0 and warn:
-
- # Oddly, we may get an "invalid" rather than a "divide" error
- # here.
- precision = _prf_divide(tp_sum, pred_sum,
- 'precision', 'predicted', average, warn_for)
- recall = _prf_divide(tp_sum, true_sum,
- 'recall', 'true', average, warn_for)
- # Don't need to warn for F: either P or R warned, or tp == 0 where pos
- # and true are nonzero, in which case, F is well-defined and zero
- f_score = ((1 + beta2) * precision * recall /
- (beta2 * precision + recall))
- f_score[tp_sum == 0] = 0.0
-
- # Average the results
-
- if average == 'weighted':
- weights = true_sum
- if weights.sum() == 0:
- return 0, 0, 0, None
- elif average == 'samples':
- weights = sample_weight
- else:
- weights = None
-
- if average is not None:
- assert average != 'binary' or len(precision) == 1
- precision = np.average(precision, weights=weights)
- recall = np.average(recall, weights=weights)
- f_score = np.average(f_score, weights=weights)
- true_sum = None # return no support
-
- return precision, recall, f_score, true_sum
-
-
-def precision_score(y_true, y_pred, labels=None, pos_label=1,
- average='binary', sample_weight=None):
- """Compute the precision
-
- The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of
- true positives and ``fp`` the number of false positives. The precision is
- intuitively the ability of the classifier not to label as positive a sample
- that is negative.
-
- The best value is 1 and the worst value is 0.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) target values.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Estimated targets as returned by a classifier.
-
- labels : list, optional
- The set of labels to include when ``average != 'binary'``, and their
- order if ``average is None``. Labels present in the data can be
- excluded, for example to calculate a multiclass average ignoring a
- majority negative class, while labels not present in the data will
- result in 0 components in a macro average. For multilabel targets,
- labels are column indices. By default, all labels in ``y_true`` and
- ``y_pred`` are used in sorted order.
-
- .. versionchanged:: 0.17
- parameter *labels* improved for multiclass problem.
-
- pos_label : str or int, 1 by default
- The class to report if ``average='binary'``. Until version 0.18 it is
- necessary to set ``pos_label=None`` if seeking to use another averaging
- method over binary targets.
-
- average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
- 'weighted']
- This parameter is required for multiclass/multilabel targets.
- If ``None``, the scores for each class are returned. Otherwise, this
- determines the type of averaging performed on the data:
-
- ``'binary'``:
- Only report results for the class specified by ``pos_label``.
- This is applicable only if targets (``y_{true,pred}``) are binary.
- ``'micro'``:
- Calculate metrics globally by counting the total true positives,
- false negatives and false positives.
- ``'macro'``:
- Calculate metrics for each label, and find their unweighted
- mean. This does not take label imbalance into account.
- ``'weighted'``:
- Calculate metrics for each label, and find their average, weighted
- by support (the number of true instances for each label). This
- alters 'macro' to account for label imbalance; it can result in an
- F-score that is not between precision and recall.
- ``'samples'``:
- Calculate metrics for each instance, and find their average (only
- meaningful for multilabel classification where this differs from
- :func:`accuracy_score`).
-
- Note that if ``pos_label`` is given in binary classification with
- `average != 'binary'`, only that positive class is reported. This
- behavior is deprecated and will change in version 0.18.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- precision : float (if average is not None) or array of float, shape =\
- [n_unique_labels]
- Precision of the positive class in binary classification or weighted
- average of the precision of each class for the multiclass task.
-
- Examples
- --------
-
- >>> from sklearn.metrics import precision_score
- >>> y_true = [0, 1, 2, 0, 1, 2]
- >>> y_pred = [0, 2, 1, 0, 0, 1]
- >>> precision_score(y_true, y_pred, average='macro') # doctest: +ELLIPSIS
- 0.22...
- >>> precision_score(y_true, y_pred, average='micro') # doctest: +ELLIPSIS
- 0.33...
- >>> precision_score(y_true, y_pred, average='weighted')
- ... # doctest: +ELLIPSIS
- 0.22...
- >>> precision_score(y_true, y_pred, average=None) # doctest: +ELLIPSIS
- array([ 0.66..., 0. , 0. ])
-
- """
- p, _, _, _ = precision_recall_fscore_support(y_true, y_pred,
- labels=labels,
- pos_label=pos_label,
- average=average,
- warn_for=('precision',),
- sample_weight=sample_weight)
- return p
-
-
-def recall_score(y_true, y_pred, labels=None, pos_label=1, average='binary',
- sample_weight=None):
- """Compute the recall
-
- The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of
- true positives and ``fn`` the number of false negatives. The recall is
- intuitively the ability of the classifier to find all the positive samples.
-
- The best value is 1 and the worst value is 0.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) target values.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Estimated targets as returned by a classifier.
-
- labels : list, optional
- The set of labels to include when ``average != 'binary'``, and their
- order if ``average is None``. Labels present in the data can be
- excluded, for example to calculate a multiclass average ignoring a
- majority negative class, while labels not present in the data will
- result in 0 components in a macro average. For multilabel targets,
- labels are column indices. By default, all labels in ``y_true`` and
- ``y_pred`` are used in sorted order.
-
- .. versionchanged:: 0.17
- parameter *labels* improved for multiclass problem.
-
- pos_label : str or int, 1 by default
- The class to report if ``average='binary'``. Until version 0.18 it is
- necessary to set ``pos_label=None`` if seeking to use another averaging
- method over binary targets.
-
- average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \
- 'weighted']
- This parameter is required for multiclass/multilabel targets.
- If ``None``, the scores for each class are returned. Otherwise, this
- determines the type of averaging performed on the data:
-
- ``'binary'``:
- Only report results for the class specified by ``pos_label``.
- This is applicable only if targets (``y_{true,pred}``) are binary.
- ``'micro'``:
- Calculate metrics globally by counting the total true positives,
- false negatives and false positives.
- ``'macro'``:
- Calculate metrics for each label, and find their unweighted
- mean. This does not take label imbalance into account.
- ``'weighted'``:
- Calculate metrics for each label, and find their average, weighted
- by support (the number of true instances for each label). This
- alters 'macro' to account for label imbalance; it can result in an
- F-score that is not between precision and recall.
- ``'samples'``:
- Calculate metrics for each instance, and find their average (only
- meaningful for multilabel classification where this differs from
- :func:`accuracy_score`).
-
- Note that if ``pos_label`` is given in binary classification with
- `average != 'binary'`, only that positive class is reported. This
- behavior is deprecated and will change in version 0.18.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- recall : float (if average is not None) or array of float, shape =\
- [n_unique_labels]
- Recall of the positive class in binary classification or weighted
- average of the recall of each class for the multiclass task.
-
- Examples
- --------
- >>> from sklearn.metrics import recall_score
- >>> y_true = [0, 1, 2, 0, 1, 2]
- >>> y_pred = [0, 2, 1, 0, 0, 1]
- >>> recall_score(y_true, y_pred, average='macro') # doctest: +ELLIPSIS
- 0.33...
- >>> recall_score(y_true, y_pred, average='micro') # doctest: +ELLIPSIS
- 0.33...
- >>> recall_score(y_true, y_pred, average='weighted') # doctest: +ELLIPSIS
- 0.33...
- >>> recall_score(y_true, y_pred, average=None)
- array([ 1., 0., 0.])
-
-
- """
- _, r, _, _ = precision_recall_fscore_support(y_true, y_pred,
- labels=labels,
- pos_label=pos_label,
- average=average,
- warn_for=('recall',),
- sample_weight=sample_weight)
- return r
-
-
-def classification_report(y_true, y_pred, labels=None, target_names=None,
- sample_weight=None, digits=2):
- """Build a text report showing the main classification metrics
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) target values.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Estimated targets as returned by a classifier.
-
- labels : array, shape = [n_labels]
- Optional list of label indices to include in the report.
-
- target_names : list of strings
- Optional display names matching the labels (same order).
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- digits : int
- Number of digits for formatting output floating point values
-
- Returns
- -------
- report : string
- Text summary of the precision, recall, F1 score for each class.
-
- Examples
- --------
- >>> from sklearn.metrics import classification_report
- >>> y_true = [0, 1, 2, 2, 2]
- >>> y_pred = [0, 0, 2, 2, 1]
- >>> target_names = ['class 0', 'class 1', 'class 2']
- >>> print(classification_report(y_true, y_pred, target_names=target_names))
- precision recall f1-score support
-
- class 0 0.50 1.00 0.67 1
- class 1 0.00 0.00 0.00 1
- class 2 1.00 0.67 0.80 3
-
- avg / total 0.70 0.60 0.61 5
-
-
- """
-
- if labels is None:
- labels = unique_labels(y_true, y_pred)
- else:
- labels = np.asarray(labels)
-
- last_line_heading = 'avg / total'
-
- if target_names is None:
- target_names = ['%s' % l for l in labels]
- name_width = max(len(cn) for cn in target_names)
- width = max(name_width, len(last_line_heading), digits)
-
- headers = ["precision", "recall", "f1-score", "support"]
- fmt = '%% %ds' % width # first column: class name
- fmt += ' '
- fmt += ' '.join(['% 9s' for _ in headers])
- fmt += '\n'
-
- headers = [""] + headers
- report = fmt % tuple(headers)
- report += '\n'
-
- p, r, f1, s = precision_recall_fscore_support(y_true, y_pred,
- labels=labels,
- average=None,
- sample_weight=sample_weight)
-
- for i, label in enumerate(labels):
- values = [target_names[i]]
- for v in (p[i], r[i], f1[i]):
- values += ["{0:0.{1}f}".format(v, digits)]
- values += ["{0}".format(s[i])]
- report += fmt % tuple(values)
-
- report += '\n'
-
- # compute averages
- values = [last_line_heading]
- for v in (np.average(p, weights=s),
- np.average(r, weights=s),
- np.average(f1, weights=s)):
- values += ["{0:0.{1}f}".format(v, digits)]
- values += ['{0}'.format(np.sum(s))]
- report += fmt % tuple(values)
- return report
-
-
-def hamming_loss(y_true, y_pred, classes=None, sample_weight=None):
- """Compute the average Hamming loss.
-
- The Hamming loss is the fraction of labels that are incorrectly predicted.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : 1d array-like, or label indicator array / sparse matrix
- Ground truth (correct) labels.
-
- y_pred : 1d array-like, or label indicator array / sparse matrix
- Predicted labels, as returned by a classifier.
-
- classes : array, shape = [n_labels], optional
- Integer array of labels.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- loss : float or int,
- Return the average Hamming loss between element of ``y_true`` and
- ``y_pred``.
-
- See Also
- --------
- accuracy_score, jaccard_similarity_score, zero_one_loss
-
- Notes
- -----
- In multiclass classification, the Hamming loss correspond to the Hamming
- distance between ``y_true`` and ``y_pred`` which is equivalent to the
- subset ``zero_one_loss`` function.
-
- In multilabel classification, the Hamming loss is different from the
- subset zero-one loss. The zero-one loss considers the entire set of labels
- for a given sample incorrect if it does entirely match the true set of
- labels. Hamming loss is more forgiving in that it penalizes the individual
- labels.
-
- The Hamming loss is upperbounded by the subset zero-one loss. When
- normalized over samples, the Hamming loss is always between 0 and 1.
-
- References
- ----------
- .. [1] Grigorios Tsoumakas, Ioannis Katakis. Multi-Label Classification:
- An Overview. International Journal of Data Warehousing & Mining,
- 3(3), 1-13, July-September 2007.
-
- .. [2] `Wikipedia entry on the Hamming distance
- `_
-
- Examples
- --------
- >>> from sklearn.metrics import hamming_loss
- >>> y_pred = [1, 2, 3, 4]
- >>> y_true = [2, 2, 3, 4]
- >>> hamming_loss(y_true, y_pred)
- 0.25
-
- In the multilabel case with binary label indicators:
-
- >>> hamming_loss(np.array([[0, 1], [1, 1]]), np.zeros((2, 2)))
- 0.75
- """
- y_type, y_true, y_pred = _check_targets(y_true, y_pred)
-
- if classes is None:
- classes = unique_labels(y_true, y_pred)
- else:
- classes = np.asarray(classes)
-
- if sample_weight is None:
- weight_average = 1.
- else:
- weight_average = np.mean(sample_weight)
-
- if y_type.startswith('multilabel'):
- n_differences = count_nonzero(y_true - y_pred,
- sample_weight=sample_weight)
- return (n_differences /
- (y_true.shape[0] * len(classes) * weight_average))
-
- elif y_type in ["binary", "multiclass"]:
- return _weighted_sum(y_true != y_pred, sample_weight, normalize=True)
- else:
- raise ValueError("{0} is not supported".format(y_type))
-
-
-def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
- """Log loss, aka logistic loss or cross-entropy loss.
-
- This is the loss function used in (multinomial) logistic regression
- and extensions of it such as neural networks, defined as the negative
- log-likelihood of the true labels given a probabilistic classifier's
- predictions. For a single sample with true label yt in {0,1} and
- estimated probability yp that yt = 1, the log loss is
-
- -log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp))
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : array-like or label indicator matrix
- Ground truth (correct) labels for n_samples samples.
-
- y_pred : array-like of float, shape = (n_samples, n_classes)
- Predicted probabilities, as returned by a classifier's
- predict_proba method.
-
- eps : float
- Log loss is undefined for p=0 or p=1, so probabilities are
- clipped to max(eps, min(1 - eps, p)).
-
- normalize : bool, optional (default=True)
- If true, return the mean loss per sample.
- Otherwise, return the sum of the per-sample losses.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- loss : float
-
- Examples
- --------
- >>> log_loss(["spam", "ham", "ham", "spam"], # doctest: +ELLIPSIS
- ... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
- 0.21616...
-
- References
- ----------
- C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
- p. 209.
-
- Notes
- -----
- The logarithm used is the natural logarithm (base-e).
- """
- lb = LabelBinarizer()
- T = lb.fit_transform(y_true)
- if T.shape[1] == 1:
- T = np.append(1 - T, T, axis=1)
-
- y_pred = check_array(y_pred, ensure_2d=False)
- # Clipping
- Y = np.clip(y_pred, eps, 1 - eps)
-
- # This happens in cases when elements in y_pred have type "str".
- if not isinstance(Y, np.ndarray):
- raise ValueError("y_pred should be an array of floats.")
-
- # If y_pred is of single dimension, assume y_true to be binary
- # and then check.
- if Y.ndim == 1:
- Y = Y[:, np.newaxis]
- if Y.shape[1] == 1:
- Y = np.append(1 - Y, Y, axis=1)
-
- # Check if dimensions are consistent.
- check_consistent_length(T, Y)
- T = check_array(T)
- Y = check_array(Y)
- if T.shape[1] != Y.shape[1]:
- raise ValueError("y_true and y_pred have different number of classes "
- "%d, %d" % (T.shape[1], Y.shape[1]))
-
- # Renormalize
- Y /= Y.sum(axis=1)[:, np.newaxis]
- loss = -(T * np.log(Y)).sum(axis=1)
-
- return _weighted_sum(loss, sample_weight, normalize)
-
-
-def hinge_loss(y_true, pred_decision, labels=None, sample_weight=None):
- """Average hinge loss (non-regularized)
-
- In binary class case, assuming labels in y_true are encoded with +1 and -1,
- when a prediction mistake is made, ``margin = y_true * pred_decision`` is
- always negative (since the signs disagree), implying ``1 - margin`` is
- always greater than 1. The cumulated hinge loss is therefore an upper
- bound of the number of mistakes made by the classifier.
-
- In multiclass case, the function expects that either all the labels are
- included in y_true or an optional labels argument is provided which
- contains all the labels. The multilabel margin is calculated according
- to Crammer-Singer's method. As in the binary case, the cumulated hinge loss
- is an upper bound of the number of mistakes made by the classifier.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : array, shape = [n_samples]
- True target, consisting of integers of two values. The positive label
- must be greater than the negative label.
-
- pred_decision : array, shape = [n_samples] or [n_samples, n_classes]
- Predicted decisions, as output by decision_function (floats).
-
- labels : array, optional, default None
- Contains all the labels for the problem. Used in multiclass hinge loss.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- Returns
- -------
- loss : float
-
- References
- ----------
- .. [1] `Wikipedia entry on the Hinge loss
- `_
-
- .. [2] Koby Crammer, Yoram Singer. On the Algorithmic
- Implementation of Multiclass Kernel-based Vector
- Machines. Journal of Machine Learning Research 2,
- (2001), 265-292
-
- .. [3] `L1 AND L2 Regularization for Multiclass Hinge Loss Models
- by Robert C. Moore, John DeNero.
- `_
-
- Examples
- --------
- >>> from sklearn import svm
- >>> from sklearn.metrics import hinge_loss
- >>> X = [[0], [1]]
- >>> y = [-1, 1]
- >>> est = svm.LinearSVC(random_state=0)
- >>> est.fit(X, y)
- LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
- intercept_scaling=1, loss='squared_hinge', max_iter=1000,
- multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
- verbose=0)
- >>> pred_decision = est.decision_function([[-2], [3], [0.5]])
- >>> pred_decision # doctest: +ELLIPSIS
- array([-2.18..., 2.36..., 0.09...])
- >>> hinge_loss([-1, 1, 1], pred_decision) # doctest: +ELLIPSIS
- 0.30...
-
- In the multiclass case:
-
- >>> X = np.array([[0], [1], [2], [3]])
- >>> Y = np.array([0, 1, 2, 3])
- >>> labels = np.array([0, 1, 2, 3])
- >>> est = svm.LinearSVC()
- >>> est.fit(X, Y)
- LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
- intercept_scaling=1, loss='squared_hinge', max_iter=1000,
- multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
- verbose=0)
- >>> pred_decision = est.decision_function([[-1], [2], [3]])
- >>> y_true = [0, 2, 3]
- >>> hinge_loss(y_true, pred_decision, labels) #doctest: +ELLIPSIS
- 0.56...
- """
- check_consistent_length(y_true, pred_decision, sample_weight)
- pred_decision = check_array(pred_decision, ensure_2d=False)
- y_true = column_or_1d(y_true)
- y_true_unique = np.unique(y_true)
- if y_true_unique.size > 2:
- if (labels is None and pred_decision.ndim > 1 and
- (np.size(y_true_unique) != pred_decision.shape[1])):
- raise ValueError("Please include all labels in y_true "
- "or pass labels as third argument")
- if labels is None:
- labels = y_true_unique
- le = LabelEncoder()
- le.fit(labels)
- y_true = le.transform(y_true)
- mask = np.ones_like(pred_decision, dtype=bool)
- mask[np.arange(y_true.shape[0]), y_true] = False
- margin = pred_decision[~mask]
- margin -= np.max(pred_decision[mask].reshape(y_true.shape[0], -1),
- axis=1)
-
- else:
- # Handles binary class case
- # this code assumes that positive and negative labels
- # are encoded as +1 and -1 respectively
- pred_decision = column_or_1d(pred_decision)
- pred_decision = np.ravel(pred_decision)
-
- lbin = LabelBinarizer(neg_label=-1)
- y_true = lbin.fit_transform(y_true)[:, 0]
-
- try:
- margin = y_true * pred_decision
- except TypeError:
- raise TypeError("pred_decision should be an array of floats.")
-
- losses = 1 - margin
- # The hinge_loss doesn't penalize good enough predictions.
- losses[losses <= 0] = 0
- return np.average(losses, weights=sample_weight)
-
-
-def _check_binary_probabilistic_predictions(y_true, y_prob):
- """Check that y_true is binary and y_prob contains valid probabilities"""
- check_consistent_length(y_true, y_prob)
-
- labels = np.unique(y_true)
-
- if len(labels) > 2:
- raise ValueError("Only binary classification is supported. "
- "Provided labels %s." % labels)
-
- if y_prob.max() > 1:
- raise ValueError("y_prob contains values greater than 1.")
-
- if y_prob.min() < 0:
- raise ValueError("y_prob contains values less than 0.")
-
- return label_binarize(y_true, labels)[:, 0]
-
-
-def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None):
- """Compute the Brier score.
-
- The smaller the Brier score, the better, hence the naming with "loss".
-
- Across all items in a set N predictions, the Brier score measures the
- mean squared difference between (1) the predicted probability assigned
- to the possible outcomes for item i, and (2) the actual outcome.
- Therefore, the lower the Brier score is for a set of predictions, the
- better the predictions are calibrated. Note that the Brier score always
- takes on a value between zero and one, since this is the largest
- possible difference between a predicted probability (which must be
- between zero and one) and the actual outcome (which can take on values
- of only 0 and 1).
-
- The Brier score is appropriate for binary and categorical outcomes that
- can be structured as true or false, but is inappropriate for ordinal
- variables which can take on three or more values (this is because the
- Brier score assumes that all possible outcomes are equivalently
- "distant" from one another). Which label is considered to be the positive
- label is controlled via the parameter pos_label, which defaults to 1.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- y_true : array, shape (n_samples,)
- True targets.
-
- y_prob : array, shape (n_samples,)
- Probabilities of the positive class.
-
- sample_weight : array-like of shape = [n_samples], optional
- Sample weights.
-
- pos_label : int (default: None)
- Label of the positive class. If None, the maximum label is used as
- positive class
-
- Returns
- -------
- score : float
- Brier score
-
- Examples
- --------
- >>> import numpy as np
- >>> from sklearn.metrics import brier_score_loss
- >>> y_true = np.array([0, 1, 1, 0])
- >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"])
- >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])
- >>> brier_score_loss(y_true, y_prob) # doctest: +ELLIPSIS
- 0.037...
- >>> brier_score_loss(y_true, 1-y_prob, pos_label=0) # doctest: +ELLIPSIS
- 0.037...
- >>> brier_score_loss(y_true_categorical, y_prob, \
- pos_label="ham") # doctest: +ELLIPSIS
- 0.037...
- >>> brier_score_loss(y_true, np.array(y_prob) > 0.5)
- 0.0
-
- References
- ----------
- .. [1] `Wikipedia entry for the Brier score.
- `_
- """
- y_true = column_or_1d(y_true)
- y_prob = column_or_1d(y_prob)
- if pos_label is None:
- pos_label = y_true.max()
- y_true = np.array(y_true == pos_label, int)
- y_true = _check_binary_probabilistic_predictions(y_true, y_prob)
- return np.average((y_true - y_prob) ** 2, weights=sample_weight)
diff --git a/mloop/localsklearn/metrics/cluster/__init__.py b/mloop/localsklearn/metrics/cluster/__init__.py
deleted file mode 100644
index 911578d..0000000
--- a/mloop/localsklearn/metrics/cluster/__init__.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""
-The :mod:`sklearn.metrics.cluster` submodule contains evaluation metrics for
-cluster analysis results. There are two forms of evaluation:
-
-- supervised, which uses a ground truth class values for each sample.
-- unsupervised, which does not and measures the 'quality' of the model itself.
-"""
-from .supervised import adjusted_mutual_info_score
-from .supervised import normalized_mutual_info_score
-from .supervised import adjusted_rand_score
-from .supervised import completeness_score
-from .supervised import contingency_matrix
-#from .supervised import expected_mutual_information
-from .supervised import homogeneity_completeness_v_measure
-from .supervised import homogeneity_score
-from .supervised import mutual_info_score
-from .supervised import v_measure_score
-from .supervised import fowlkes_mallows_score
-from .supervised import entropy
-from .unsupervised import silhouette_samples
-from .unsupervised import silhouette_score
-from .unsupervised import calinski_harabaz_score
-from .bicluster import consensus_score
-
-__all__ = ["adjusted_mutual_info_score", "normalized_mutual_info_score",
- "adjusted_rand_score", "completeness_score", "contingency_matrix",
- "expected_mutual_information", "homogeneity_completeness_v_measure",
- "homogeneity_score", "mutual_info_score", "v_measure_score",
- "fowlkes_mallows_score", "entropy", "silhouette_samples",
- "silhouette_score", "calinski_harabaz_score", "consensus_score"]
diff --git a/mloop/localsklearn/metrics/cluster/bicluster.py b/mloop/localsklearn/metrics/cluster/bicluster.py
deleted file mode 100644
index 6a91127..0000000
--- a/mloop/localsklearn/metrics/cluster/bicluster.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from __future__ import division
-
-import numpy as np
-
-from sklearn.utils.linear_assignment_ import linear_assignment
-from sklearn.utils.validation import check_consistent_length, check_array
-
-__all__ = ["consensus_score"]
-
-
-def _check_rows_and_columns(a, b):
- """Unpacks the row and column arrays and checks their shape."""
- check_consistent_length(*a)
- check_consistent_length(*b)
- checks = lambda x: check_array(x, ensure_2d=False)
- a_rows, a_cols = map(checks, a)
- b_rows, b_cols = map(checks, b)
- return a_rows, a_cols, b_rows, b_cols
-
-
-def _jaccard(a_rows, a_cols, b_rows, b_cols):
- """Jaccard coefficient on the elements of the two biclusters."""
- intersection = ((a_rows * b_rows).sum() *
- (a_cols * b_cols).sum())
-
- a_size = a_rows.sum() * a_cols.sum()
- b_size = b_rows.sum() * b_cols.sum()
-
- return intersection / (a_size + b_size - intersection)
-
-
-def _pairwise_similarity(a, b, similarity):
- """Computes pairwise similarity matrix.
-
- result[i, j] is the Jaccard coefficient of a's bicluster i and b's
- bicluster j.
-
- """
- a_rows, a_cols, b_rows, b_cols = _check_rows_and_columns(a, b)
- n_a = a_rows.shape[0]
- n_b = b_rows.shape[0]
- result = np.array(list(list(similarity(a_rows[i], a_cols[i],
- b_rows[j], b_cols[j])
- for j in range(n_b))
- for i in range(n_a)))
- return result
-
-
-def consensus_score(a, b, similarity="jaccard"):
- """The similarity of two sets of biclusters.
-
- Similarity between individual biclusters is computed. Then the
- best matching between sets is found using the Hungarian algorithm.
- The final score is the sum of similarities divided by the size of
- the larger set.
-
- Read more in the :ref:`User Guide `.
-
- Parameters
- ----------
- a : (rows, columns)
- Tuple of row and column indicators for a set of biclusters.
-
- b : (rows, columns)
- Another set of biclusters like ``a``.
-
- similarity : string or function, optional, default: "jaccard"
- May be the string "jaccard" to use the Jaccard coefficient, or
- any function that takes four arguments, each of which is a 1d
- indicator vector: (a_rows, a_columns, b_rows, b_columns).
-
- References
- ----------
-
- * Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis
- for bicluster acquisition
- `__.
-
- """
- if similarity == "jaccard":
- similarity = _jaccard
- matrix = _pairwise_similarity(a, b, similarity)
- indices = linear_assignment(1. - matrix)
- n_a = len(a[0])
- n_b = len(b[0])
- return matrix[indices[:, 0], indices[:, 1]].sum() / max(n_a, n_b)
diff --git a/mloop/localsklearn/metrics/cluster/expected_mutual_info_fast.c b/mloop/localsklearn/metrics/cluster/expected_mutual_info_fast.c
deleted file mode 100644
index 9a4ece1..0000000
--- a/mloop/localsklearn/metrics/cluster/expected_mutual_info_fast.c
+++ /dev/null
@@ -1,8145 +0,0 @@
-/* Generated by Cython 0.24 */
-
-#define PY_SSIZE_T_CLEAN
-#include "Python.h"
-#ifndef Py_PYTHON_H
- #error Python headers needed to compile C extensions, please install development version of Python.
-#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000)
- #error Cython requires Python 2.6+ or Python 3.2+.
-#else
-#define CYTHON_ABI "0_24"
-#include
-#ifndef offsetof
- #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
-#endif
-#if !defined(WIN32) && !defined(MS_WINDOWS)
- #ifndef __stdcall
- #define __stdcall
- #endif
- #ifndef __cdecl
- #define __cdecl
- #endif
- #ifndef __fastcall
- #define __fastcall
- #endif
-#endif
-#ifndef DL_IMPORT
- #define DL_IMPORT(t) t
-#endif
-#ifndef DL_EXPORT
- #define DL_EXPORT(t) t
-#endif
-#ifndef PY_LONG_LONG
- #define PY_LONG_LONG LONG_LONG
-#endif
-#ifndef Py_HUGE_VAL
- #define Py_HUGE_VAL HUGE_VAL
-#endif
-#ifdef PYPY_VERSION
- #define CYTHON_COMPILING_IN_PYPY 1
- #define CYTHON_COMPILING_IN_CPYTHON 0
-#else
- #define CYTHON_COMPILING_IN_PYPY 0
- #define CYTHON_COMPILING_IN_CPYTHON 1
-#endif
-#if !defined(CYTHON_USE_PYLONG_INTERNALS) && CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x02070000
- #define CYTHON_USE_PYLONG_INTERNALS 1
-#endif
-#if CYTHON_USE_PYLONG_INTERNALS
- #include "longintrepr.h"
- #undef SHIFT
- #undef BASE
- #undef MASK
-#endif
-#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
- #define Py_OptimizeFlag 0
-#endif
-#define __PYX_BUILD_PY_SSIZE_T "n"
-#define CYTHON_FORMAT_SSIZE_T "z"
-#if PY_MAJOR_VERSION < 3
- #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
- #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
- PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
- #define __Pyx_DefaultClassType PyClass_Type
-#else
- #define __Pyx_BUILTIN_MODULE_NAME "builtins"
- #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
- PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
- #define __Pyx_DefaultClassType PyType_Type
-#endif
-#ifndef Py_TPFLAGS_CHECKTYPES
- #define Py_TPFLAGS_CHECKTYPES 0
-#endif
-#ifndef Py_TPFLAGS_HAVE_INDEX
- #define Py_TPFLAGS_HAVE_INDEX 0
-#endif
-#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
- #define Py_TPFLAGS_HAVE_NEWBUFFER 0
-#endif
-#ifndef Py_TPFLAGS_HAVE_FINALIZE
- #define Py_TPFLAGS_HAVE_FINALIZE 0
-#endif
-#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
- #define CYTHON_PEP393_ENABLED 1
- #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\
- 0 : _PyUnicode_Ready((PyObject *)(op)))
- #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
- #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
- #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
- #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
- #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
- #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
-#else
- #define CYTHON_PEP393_ENABLED 0
- #define __Pyx_PyUnicode_READY(op) (0)
- #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
- #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
- #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
- #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
- #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
- #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u))
-#endif
-#if CYTHON_COMPILING_IN_PYPY
- #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
- #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
-#else
- #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
- #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
- PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
-#endif
-#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
- #define PyUnicode_Contains(u, s) PySequence_Contains(u, s)
-#endif
-#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format)
- #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt)
-#endif
-#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
- #define PyObject_Malloc(s) PyMem_Malloc(s)
- #define PyObject_Free(p) PyMem_Free(p)
- #define PyObject_Realloc(p) PyMem_Realloc(p)
-#endif
-#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
-#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
-#if PY_MAJOR_VERSION >= 3
- #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
-#else
- #define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
-#endif
-#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
- #define PyObject_ASCII(o) PyObject_Repr(o)
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define PyBaseString_Type PyUnicode_Type
- #define PyStringObject PyUnicodeObject
- #define PyString_Type PyUnicode_Type
- #define PyString_Check PyUnicode_Check
- #define PyString_CheckExact PyUnicode_CheckExact
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
- #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
-#else
- #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
- #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
-#endif
-#ifndef PySet_CheckExact
- #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
-#endif
-#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
-#if PY_MAJOR_VERSION >= 3
- #define PyIntObject PyLongObject
- #define PyInt_Type PyLong_Type
- #define PyInt_Check(op) PyLong_Check(op)
- #define PyInt_CheckExact(op) PyLong_CheckExact(op)
- #define PyInt_FromString PyLong_FromString
- #define PyInt_FromUnicode PyLong_FromUnicode
- #define PyInt_FromLong PyLong_FromLong
- #define PyInt_FromSize_t PyLong_FromSize_t
- #define PyInt_FromSsize_t PyLong_FromSsize_t
- #define PyInt_AsLong PyLong_AsLong
- #define PyInt_AS_LONG PyLong_AS_LONG
- #define PyInt_AsSsize_t PyLong_AsSsize_t
- #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
- #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
- #define PyNumber_Int PyNumber_Long
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define PyBoolObject PyLongObject
-#endif
-#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
- #ifndef PyUnicode_InternFromString
- #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
- #endif
-#endif
-#if PY_VERSION_HEX < 0x030200A4
- typedef long Py_hash_t;
- #define __Pyx_PyInt_FromHash_t PyInt_FromLong
- #define __Pyx_PyInt_AsHash_t PyInt_AsLong
-#else
- #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
- #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
-#else
- #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
-#endif
-#if PY_VERSION_HEX >= 0x030500B1
-#define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
-#define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
-#elif CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
-typedef struct {
- unaryfunc am_await;
- unaryfunc am_aiter;
- unaryfunc am_anext;
-} __Pyx_PyAsyncMethodsStruct;
-#define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
-#else
-#define __Pyx_PyType_AsAsync(obj) NULL
-#endif
-#ifndef CYTHON_RESTRICT
- #if defined(__GNUC__)
- #define CYTHON_RESTRICT __restrict__
- #elif defined(_MSC_VER) && _MSC_VER >= 1400
- #define CYTHON_RESTRICT __restrict
- #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
- #define CYTHON_RESTRICT restrict
- #else
- #define CYTHON_RESTRICT
- #endif
-#endif
-#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
-
-#ifndef CYTHON_INLINE
- #if defined(__GNUC__)
- #define CYTHON_INLINE __inline__
- #elif defined(_MSC_VER)
- #define CYTHON_INLINE __inline
- #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
- #define CYTHON_INLINE inline
- #else
- #define CYTHON_INLINE
- #endif
-#endif
-
-#if defined(WIN32) || defined(MS_WINDOWS)
- #define _USE_MATH_DEFINES
-#endif
-#include
-#ifdef NAN
-#define __PYX_NAN() ((float) NAN)
-#else
-static CYTHON_INLINE float __PYX_NAN() {
- float value;
- memset(&value, 0xFF, sizeof(value));
- return value;
-}
-#endif
-
-
-#define __PYX_ERR(f_index, lineno, Ln_error) \
-{ \
- __pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \
-}
-
-#if PY_MAJOR_VERSION >= 3
- #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
- #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
-#else
- #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
- #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
-#endif
-
-#ifndef __PYX_EXTERN_C
- #ifdef __cplusplus
- #define __PYX_EXTERN_C extern "C"
- #else
- #define __PYX_EXTERN_C extern
- #endif
-#endif
-
-#define __PYX_HAVE__sklearn__metrics__cluster__expected_mutual_info_fast
-#define __PYX_HAVE_API__sklearn__metrics__cluster__expected_mutual_info_fast
-#include "math.h"
-#include "string.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include "numpy/arrayobject.h"
-#include "numpy/ufuncobject.h"
-#ifdef _OPENMP
-#include
-#endif /* _OPENMP */
-
-#ifdef PYREX_WITHOUT_ASSERTIONS
-#define CYTHON_WITHOUT_ASSERTIONS
-#endif
-
-#ifndef CYTHON_UNUSED
-# if defined(__GNUC__)
-# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-# define CYTHON_UNUSED __attribute__ ((__unused__))
-# else
-# define CYTHON_UNUSED
-# endif
-# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
-# define CYTHON_UNUSED __attribute__ ((__unused__))
-# else
-# define CYTHON_UNUSED
-# endif
-#endif
-#ifndef CYTHON_NCP_UNUSED
-# if CYTHON_COMPILING_IN_CPYTHON
-# define CYTHON_NCP_UNUSED
-# else
-# define CYTHON_NCP_UNUSED CYTHON_UNUSED
-# endif
-#endif
-typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
- const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
-
-#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
-#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0
-#define __PYX_DEFAULT_STRING_ENCODING ""
-#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
-#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
-#define __Pyx_uchar_cast(c) ((unsigned char)c)
-#define __Pyx_long_cast(x) ((long)x)
-#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\
- (sizeof(type) < sizeof(Py_ssize_t)) ||\
- (sizeof(type) > sizeof(Py_ssize_t) &&\
- likely(v < (type)PY_SSIZE_T_MAX ||\
- v == (type)PY_SSIZE_T_MAX) &&\
- (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
- v == (type)PY_SSIZE_T_MIN))) ||\
- (sizeof(type) == sizeof(Py_ssize_t) &&\
- (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
- v == (type)PY_SSIZE_T_MAX))) )
-#if defined (__cplusplus) && __cplusplus >= 201103L
- #include
- #define __Pyx_sst_abs(value) std::abs(value)
-#elif SIZEOF_INT >= SIZEOF_SIZE_T
- #define __Pyx_sst_abs(value) abs(value)
-#elif SIZEOF_LONG >= SIZEOF_SIZE_T
- #define __Pyx_sst_abs(value) labs(value)
-#elif defined (_MSC_VER) && defined (_M_X64)
- #define __Pyx_sst_abs(value) _abs64(value)
-#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
- #define __Pyx_sst_abs(value) llabs(value)
-#elif defined (__GNUC__)
- #define __Pyx_sst_abs(value) __builtin_llabs(value)
-#else
- #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
-#endif
-static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
-static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
-#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
-#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
-#define __Pyx_PyBytes_FromString PyBytes_FromString
-#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
-static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
-#if PY_MAJOR_VERSION < 3
- #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
- #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
-#else
- #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
- #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
-#endif
-#define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
-#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
-#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s)
-#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s)
-#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s)
-#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s)
-#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
-#if PY_MAJOR_VERSION < 3
-static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
-{
- const Py_UNICODE *u_end = u;
- while (*u_end++) ;
- return (size_t)(u_end - u - 1);
-}
-#else
-#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
-#endif
-#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
-#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
-#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
-#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
-#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
-#define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False))
-static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
-static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
-static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
-static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
-#if CYTHON_COMPILING_IN_CPYTHON
-#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
-#else
-#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
-#endif
-#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
-#if PY_MAJOR_VERSION >= 3
-#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
-#else
-#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
-#endif
-#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
-#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
-static int __Pyx_sys_getdefaultencoding_not_ascii;
-static int __Pyx_init_sys_getdefaultencoding_params(void) {
- PyObject* sys;
- PyObject* default_encoding = NULL;
- PyObject* ascii_chars_u = NULL;
- PyObject* ascii_chars_b = NULL;
- const char* default_encoding_c;
- sys = PyImport_ImportModule("sys");
- if (!sys) goto bad;
- default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
- Py_DECREF(sys);
- if (!default_encoding) goto bad;
- default_encoding_c = PyBytes_AsString(default_encoding);
- if (!default_encoding_c) goto bad;
- if (strcmp(default_encoding_c, "ascii") == 0) {
- __Pyx_sys_getdefaultencoding_not_ascii = 0;
- } else {
- char ascii_chars[128];
- int c;
- for (c = 0; c < 128; c++) {
- ascii_chars[c] = c;
- }
- __Pyx_sys_getdefaultencoding_not_ascii = 1;
- ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
- if (!ascii_chars_u) goto bad;
- ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
- if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
- PyErr_Format(
- PyExc_ValueError,
- "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
- default_encoding_c);
- goto bad;
- }
- Py_DECREF(ascii_chars_u);
- Py_DECREF(ascii_chars_b);
- }
- Py_DECREF(default_encoding);
- return 0;
-bad:
- Py_XDECREF(default_encoding);
- Py_XDECREF(ascii_chars_u);
- Py_XDECREF(ascii_chars_b);
- return -1;
-}
-#endif
-#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
-#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
-#else
-#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
-#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
-static char* __PYX_DEFAULT_STRING_ENCODING;
-static int __Pyx_init_sys_getdefaultencoding_params(void) {
- PyObject* sys;
- PyObject* default_encoding = NULL;
- char* default_encoding_c;
- sys = PyImport_ImportModule("sys");
- if (!sys) goto bad;
- default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
- Py_DECREF(sys);
- if (!default_encoding) goto bad;
- default_encoding_c = PyBytes_AsString(default_encoding);
- if (!default_encoding_c) goto bad;
- __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
- if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
- strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
- Py_DECREF(default_encoding);
- return 0;
-bad:
- Py_XDECREF(default_encoding);
- return -1;
-}
-#endif
-#endif
-
-
-/* Test for GCC > 2.95 */
-#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
- #define likely(x) __builtin_expect(!!(x), 1)
- #define unlikely(x) __builtin_expect(!!(x), 0)
-#else /* !__GNUC__ or GCC < 2.95 */
- #define likely(x) (x)
- #define unlikely(x) (x)
-#endif /* __GNUC__ */
-
-static PyObject *__pyx_m;
-static PyObject *__pyx_d;
-static PyObject *__pyx_b;
-static PyObject *__pyx_empty_tuple;
-static PyObject *__pyx_empty_bytes;
-static PyObject *__pyx_empty_unicode;
-static int __pyx_lineno;
-static int __pyx_clineno = 0;
-static const char * __pyx_cfilenm= __FILE__;
-static const char *__pyx_filename;
-
-/* None.proto */
-#if !defined(CYTHON_CCOMPLEX)
- #if defined(__cplusplus)
- #define CYTHON_CCOMPLEX 1
- #elif defined(_Complex_I)
- #define CYTHON_CCOMPLEX 1
- #else
- #define CYTHON_CCOMPLEX 0
- #endif
-#endif
-#if CYTHON_CCOMPLEX
- #ifdef __cplusplus
- #include
- #else
- #include
- #endif
-#endif
-#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
- #undef _Complex_I
- #define _Complex_I 1.0fj
-#endif
-
-
-static const char *__pyx_f[] = {
- "sklearn/metrics/cluster/expected_mutual_info_fast.pyx",
- "__init__.pxd",
- "type.pxd",
-};
-/* BufferFormatStructs.proto */
-#define IS_UNSIGNED(type) (((type) -1) > 0)
-struct __Pyx_StructField_;
-#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
-typedef struct {
- const char* name;
- struct __Pyx_StructField_* fields;
- size_t size;
- size_t arraysize[8];
- int ndim;
- char typegroup;
- char is_unsigned;
- int flags;
-} __Pyx_TypeInfo;
-typedef struct __Pyx_StructField_ {
- __Pyx_TypeInfo* type;
- const char* name;
- size_t offset;
-} __Pyx_StructField;
-typedef struct {
- __Pyx_StructField* field;
- size_t parent_offset;
-} __Pyx_BufFmt_StackElem;
-typedef struct {
- __Pyx_StructField root;
- __Pyx_BufFmt_StackElem* head;
- size_t fmt_offset;
- size_t new_count, enc_count;
- size_t struct_alignment;
- int is_complex;
- char enc_type;
- char new_packmode;
- char enc_packmode;
- char is_valid_array;
-} __Pyx_BufFmt_Context;
-
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":725
- * # in Cython to enable them only on the right systems.
- *
- * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<<
- * ctypedef npy_int16 int16_t
- * ctypedef npy_int32 int32_t
- */
-typedef npy_int8 __pyx_t_5numpy_int8_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":726
- *
- * ctypedef npy_int8 int8_t
- * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<<
- * ctypedef npy_int32 int32_t
- * ctypedef npy_int64 int64_t
- */
-typedef npy_int16 __pyx_t_5numpy_int16_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":727
- * ctypedef npy_int8 int8_t
- * ctypedef npy_int16 int16_t
- * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<<
- * ctypedef npy_int64 int64_t
- * #ctypedef npy_int96 int96_t
- */
-typedef npy_int32 __pyx_t_5numpy_int32_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":728
- * ctypedef npy_int16 int16_t
- * ctypedef npy_int32 int32_t
- * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<<
- * #ctypedef npy_int96 int96_t
- * #ctypedef npy_int128 int128_t
- */
-typedef npy_int64 __pyx_t_5numpy_int64_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":732
- * #ctypedef npy_int128 int128_t
- *
- * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<<
- * ctypedef npy_uint16 uint16_t
- * ctypedef npy_uint32 uint32_t
- */
-typedef npy_uint8 __pyx_t_5numpy_uint8_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":733
- *
- * ctypedef npy_uint8 uint8_t
- * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<<
- * ctypedef npy_uint32 uint32_t
- * ctypedef npy_uint64 uint64_t
- */
-typedef npy_uint16 __pyx_t_5numpy_uint16_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":734
- * ctypedef npy_uint8 uint8_t
- * ctypedef npy_uint16 uint16_t
- * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<<
- * ctypedef npy_uint64 uint64_t
- * #ctypedef npy_uint96 uint96_t
- */
-typedef npy_uint32 __pyx_t_5numpy_uint32_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":735
- * ctypedef npy_uint16 uint16_t
- * ctypedef npy_uint32 uint32_t
- * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<<
- * #ctypedef npy_uint96 uint96_t
- * #ctypedef npy_uint128 uint128_t
- */
-typedef npy_uint64 __pyx_t_5numpy_uint64_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":739
- * #ctypedef npy_uint128 uint128_t
- *
- * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<<
- * ctypedef npy_float64 float64_t
- * #ctypedef npy_float80 float80_t
- */
-typedef npy_float32 __pyx_t_5numpy_float32_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":740
- *
- * ctypedef npy_float32 float32_t
- * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<<
- * #ctypedef npy_float80 float80_t
- * #ctypedef npy_float128 float128_t
- */
-typedef npy_float64 __pyx_t_5numpy_float64_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":749
- * # The int types are mapped a bit surprising --
- * # numpy.int corresponds to 'l' and numpy.long to 'q'
- * ctypedef npy_long int_t # <<<<<<<<<<<<<<
- * ctypedef npy_longlong long_t
- * ctypedef npy_longlong longlong_t
- */
-typedef npy_long __pyx_t_5numpy_int_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":750
- * # numpy.int corresponds to 'l' and numpy.long to 'q'
- * ctypedef npy_long int_t
- * ctypedef npy_longlong long_t # <<<<<<<<<<<<<<
- * ctypedef npy_longlong longlong_t
- *
- */
-typedef npy_longlong __pyx_t_5numpy_long_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":751
- * ctypedef npy_long int_t
- * ctypedef npy_longlong long_t
- * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_ulong uint_t
- */
-typedef npy_longlong __pyx_t_5numpy_longlong_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":753
- * ctypedef npy_longlong longlong_t
- *
- * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<<
- * ctypedef npy_ulonglong ulong_t
- * ctypedef npy_ulonglong ulonglong_t
- */
-typedef npy_ulong __pyx_t_5numpy_uint_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":754
- *
- * ctypedef npy_ulong uint_t
- * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<<
- * ctypedef npy_ulonglong ulonglong_t
- *
- */
-typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":755
- * ctypedef npy_ulong uint_t
- * ctypedef npy_ulonglong ulong_t
- * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_intp intp_t
- */
-typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":757
- * ctypedef npy_ulonglong ulonglong_t
- *
- * ctypedef npy_intp intp_t # <<<<<<<<<<<<<<
- * ctypedef npy_uintp uintp_t
- *
- */
-typedef npy_intp __pyx_t_5numpy_intp_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":758
- *
- * ctypedef npy_intp intp_t
- * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_double float_t
- */
-typedef npy_uintp __pyx_t_5numpy_uintp_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":760
- * ctypedef npy_uintp uintp_t
- *
- * ctypedef npy_double float_t # <<<<<<<<<<<<<<
- * ctypedef npy_double double_t
- * ctypedef npy_longdouble longdouble_t
- */
-typedef npy_double __pyx_t_5numpy_float_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":761
- *
- * ctypedef npy_double float_t
- * ctypedef npy_double double_t # <<<<<<<<<<<<<<
- * ctypedef npy_longdouble longdouble_t
- *
- */
-typedef npy_double __pyx_t_5numpy_double_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":762
- * ctypedef npy_double float_t
- * ctypedef npy_double double_t
- * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_cfloat cfloat_t
- */
-typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
-/* None.proto */
-#if CYTHON_CCOMPLEX
- #ifdef __cplusplus
- typedef ::std::complex< float > __pyx_t_float_complex;
- #else
- typedef float _Complex __pyx_t_float_complex;
- #endif
-#else
- typedef struct { float real, imag; } __pyx_t_float_complex;
-#endif
-
-/* None.proto */
-#if CYTHON_CCOMPLEX
- #ifdef __cplusplus
- typedef ::std::complex< double > __pyx_t_double_complex;
- #else
- typedef double _Complex __pyx_t_double_complex;
- #endif
-#else
- typedef struct { double real, imag; } __pyx_t_double_complex;
-#endif
-
-
-/*--- Type declarations ---*/
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":764
- * ctypedef npy_longdouble longdouble_t
- *
- * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<<
- * ctypedef npy_cdouble cdouble_t
- * ctypedef npy_clongdouble clongdouble_t
- */
-typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":765
- *
- * ctypedef npy_cfloat cfloat_t
- * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<<
- * ctypedef npy_clongdouble clongdouble_t
- *
- */
-typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":766
- * ctypedef npy_cfloat cfloat_t
- * ctypedef npy_cdouble cdouble_t
- * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_cdouble complex_t
- */
-typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":768
- * ctypedef npy_clongdouble clongdouble_t
- *
- * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew1(a):
- */
-typedef npy_cdouble __pyx_t_5numpy_complex_t;
-
-/* --- Runtime support code (head) --- */
-/* Refnanny.proto */
-#ifndef CYTHON_REFNANNY
- #define CYTHON_REFNANNY 0
-#endif
-#if CYTHON_REFNANNY
- typedef struct {
- void (*INCREF)(void*, PyObject*, int);
- void (*DECREF)(void*, PyObject*, int);
- void (*GOTREF)(void*, PyObject*, int);
- void (*GIVEREF)(void*, PyObject*, int);
- void* (*SetupContext)(const char*, int, const char*);
- void (*FinishContext)(void**);
- } __Pyx_RefNannyAPIStruct;
- static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
- static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
- #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
-#ifdef WITH_THREAD
- #define __Pyx_RefNannySetupContext(name, acquire_gil)\
- if (acquire_gil) {\
- PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
- __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
- PyGILState_Release(__pyx_gilstate_save);\
- } else {\
- __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
- }
-#else
- #define __Pyx_RefNannySetupContext(name, acquire_gil)\
- __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
-#endif
- #define __Pyx_RefNannyFinishContext()\
- __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
- #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
- #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
- #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
- #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
- #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
- #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
- #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
- #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
-#else
- #define __Pyx_RefNannyDeclarations
- #define __Pyx_RefNannySetupContext(name, acquire_gil)
- #define __Pyx_RefNannyFinishContext()
- #define __Pyx_INCREF(r) Py_INCREF(r)
- #define __Pyx_DECREF(r) Py_DECREF(r)
- #define __Pyx_GOTREF(r)
- #define __Pyx_GIVEREF(r)
- #define __Pyx_XINCREF(r) Py_XINCREF(r)
- #define __Pyx_XDECREF(r) Py_XDECREF(r)
- #define __Pyx_XGOTREF(r)
- #define __Pyx_XGIVEREF(r)
-#endif
-#define __Pyx_XDECREF_SET(r, v) do {\
- PyObject *tmp = (PyObject *) r;\
- r = v; __Pyx_XDECREF(tmp);\
- } while (0)
-#define __Pyx_DECREF_SET(r, v) do {\
- PyObject *tmp = (PyObject *) r;\
- r = v; __Pyx_DECREF(tmp);\
- } while (0)
-#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
-#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
-
-/* PyObjectGetAttrStr.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
- PyTypeObject* tp = Py_TYPE(obj);
- if (likely(tp->tp_getattro))
- return tp->tp_getattro(obj, attr_name);
-#if PY_MAJOR_VERSION < 3
- if (likely(tp->tp_getattr))
- return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
-#endif
- return PyObject_GetAttr(obj, attr_name);
-}
-#else
-#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
-#endif
-
-/* GetBuiltinName.proto */
-static PyObject *__Pyx_GetBuiltinName(PyObject *name);
-
-/* RaiseArgTupleInvalid.proto */
-static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
- Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
-
-/* RaiseDoubleKeywords.proto */
-static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
-
-/* ParseKeywords.proto */
-static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\
- PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\
- const char* function_name);
-
-/* RaiseTooManyValuesToUnpack.proto */
-static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
-
-/* RaiseNeedMoreValuesToUnpack.proto */
-static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
-
-/* IterFinish.proto */
-static CYTHON_INLINE int __Pyx_IterFinish(void);
-
-/* UnpackItemEndCheck.proto */
-static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected);
-
-/* GetModuleGlobalName.proto */
-static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name);
-
-/* PyObjectCall.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
-#else
-#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
-#endif
-
-/* PyObjectCallMethO.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
-#endif
-
-/* PyObjectCallOneArg.proto */
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);
-
-/* ExtTypeTest.proto */
-static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type);
-
-/* BufferFormatCheck.proto */
-static CYTHON_INLINE int __Pyx_GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
- __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
-static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);
-static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts);
-static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
- __Pyx_BufFmt_StackElem* stack,
- __Pyx_TypeInfo* type); // PROTO
-
-/* BufferFallbackError.proto */
-static void __Pyx_RaiseBufferFallbackError(void);
-
-/* PyIntBinop.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace);
-#else
-#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace)\
- (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2))
-#endif
-
-#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0)
-/* ListCompAppend.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) {
- PyListObject* L = (PyListObject*) list;
- Py_ssize_t len = Py_SIZE(list);
- if (likely(L->allocated > len)) {
- Py_INCREF(x);
- PyList_SET_ITEM(list, len, x);
- Py_SIZE(list) = len+1;
- return 0;
- }
- return PyList_Append(list, x);
-}
-#else
-#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x)
-#endif
-
-#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1)
-/* PyThreadStateGet.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate;
-#define __Pyx_PyThreadState_assign __pyx_tstate = PyThreadState_GET();
-#else
-#define __Pyx_PyThreadState_declare
-#define __Pyx_PyThreadState_assign
-#endif
-
-/* PyErrFetchRestore.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
-#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
-#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
-#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
-static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
-static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
-#else
-#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb)
-#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb)
-#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb)
-#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb)
-#endif
-
-/* RaiseException.proto */
-static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
-
-/* DictGetItem.proto */
-#if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
-static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) {
- PyObject *value;
- value = PyDict_GetItemWithError(d, key);
- if (unlikely(!value)) {
- if (!PyErr_Occurred()) {
- PyObject* args = PyTuple_Pack(1, key);
- if (likely(args))
- PyErr_SetObject(PyExc_KeyError, args);
- Py_XDECREF(args);
- }
- return NULL;
- }
- Py_INCREF(value);
- return value;
-}
-#else
- #define __Pyx_PyDict_GetItem(d, key) PyObject_GetItem(d, key)
-#endif
-
-/* RaiseNoneIterError.proto */
-static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void);
-
-/* Import.proto */
-static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
-
-/* ImportFrom.proto */
-static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name);
-
-/* CodeObjectCache.proto */
-typedef struct {
- PyCodeObject* code_object;
- int code_line;
-} __Pyx_CodeObjectCacheEntry;
-struct __Pyx_CodeObjectCache {
- int count;
- int max_count;
- __Pyx_CodeObjectCacheEntry* entries;
-};
-static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
-static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
-static PyCodeObject *__pyx_find_code_object(int code_line);
-static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
-
-/* AddTraceback.proto */
-static void __Pyx_AddTraceback(const char *funcname, int c_line,
- int py_line, const char *filename);
-
-/* BufferStructDeclare.proto */
-typedef struct {
- Py_ssize_t shape, strides, suboffsets;
-} __Pyx_Buf_DimInfo;
-typedef struct {
- size_t refcount;
- Py_buffer pybuffer;
-} __Pyx_Buffer;
-typedef struct {
- __Pyx_Buffer *rcbuffer;
- char *data;
- __Pyx_Buf_DimInfo diminfo[8];
-} __Pyx_LocalBuf_ND;
-
-#if PY_MAJOR_VERSION < 3
- static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
- static void __Pyx_ReleaseBuffer(Py_buffer *view);
-#else
- #define __Pyx_GetBuffer PyObject_GetBuffer
- #define __Pyx_ReleaseBuffer PyBuffer_Release
-#endif
-
-
-/* None.proto */
-static Py_ssize_t __Pyx_zeros[] = {0, 0, 0, 0, 0, 0, 0, 0};
-static Py_ssize_t __Pyx_minusones[] = {-1, -1, -1, -1, -1, -1, -1, -1};
-
-/* CIntToPy.proto */
-static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
-
-/* None.proto */
-#if CYTHON_CCOMPLEX
- #ifdef __cplusplus
- #define __Pyx_CREAL(z) ((z).real())
- #define __Pyx_CIMAG(z) ((z).imag())
- #else
- #define __Pyx_CREAL(z) (__real__(z))
- #define __Pyx_CIMAG(z) (__imag__(z))
- #endif
-#else
- #define __Pyx_CREAL(z) ((z).real)
- #define __Pyx_CIMAG(z) ((z).imag)
-#endif
-#if defined(__cplusplus) && CYTHON_CCOMPLEX && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103)
- #define __Pyx_SET_CREAL(z,x) ((z).real(x))
- #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
-#else
- #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
- #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
-#endif
-
-/* None.proto */
-static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
-
-/* None.proto */
-#if CYTHON_CCOMPLEX
- #define __Pyx_c_eqf(a, b) ((a)==(b))
- #define __Pyx_c_sumf(a, b) ((a)+(b))
- #define __Pyx_c_difff(a, b) ((a)-(b))
- #define __Pyx_c_prodf(a, b) ((a)*(b))
- #define __Pyx_c_quotf(a, b) ((a)/(b))
- #define __Pyx_c_negf(a) (-(a))
- #ifdef __cplusplus
- #define __Pyx_c_is_zerof(z) ((z)==(float)0)
- #define __Pyx_c_conjf(z) (::std::conj(z))
- #if 1
- #define __Pyx_c_absf(z) (::std::abs(z))
- #define __Pyx_c_powf(a, b) (::std::pow(a, b))
- #endif
- #else
- #define __Pyx_c_is_zerof(z) ((z)==0)
- #define __Pyx_c_conjf(z) (conjf(z))
- #if 1
- #define __Pyx_c_absf(z) (cabsf(z))
- #define __Pyx_c_powf(a, b) (cpowf(a, b))
- #endif
- #endif
-#else
- static CYTHON_INLINE int __Pyx_c_eqf(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sumf(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_difff(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prodf(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quotf(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_negf(__pyx_t_float_complex);
- static CYTHON_INLINE int __Pyx_c_is_zerof(__pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conjf(__pyx_t_float_complex);
- #if 1
- static CYTHON_INLINE float __Pyx_c_absf(__pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_powf(__pyx_t_float_complex, __pyx_t_float_complex);
- #endif
-#endif
-
-/* None.proto */
-static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
-
-/* None.proto */
-#if CYTHON_CCOMPLEX
- #define __Pyx_c_eq(a, b) ((a)==(b))
- #define __Pyx_c_sum(a, b) ((a)+(b))
- #define __Pyx_c_diff(a, b) ((a)-(b))
- #define __Pyx_c_prod(a, b) ((a)*(b))
- #define __Pyx_c_quot(a, b) ((a)/(b))
- #define __Pyx_c_neg(a) (-(a))
- #ifdef __cplusplus
- #define __Pyx_c_is_zero(z) ((z)==(double)0)
- #define __Pyx_c_conj(z) (::std::conj(z))
- #if 1
- #define __Pyx_c_abs(z) (::std::abs(z))
- #define __Pyx_c_pow(a, b) (::std::pow(a, b))
- #endif
- #else
- #define __Pyx_c_is_zero(z) ((z)==0)
- #define __Pyx_c_conj(z) (conj(z))
- #if 1
- #define __Pyx_c_abs(z) (cabs(z))
- #define __Pyx_c_pow(a, b) (cpow(a, b))
- #endif
- #endif
-#else
- static CYTHON_INLINE int __Pyx_c_eq(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg(__pyx_t_double_complex);
- static CYTHON_INLINE int __Pyx_c_is_zero(__pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj(__pyx_t_double_complex);
- #if 1
- static CYTHON_INLINE double __Pyx_c_abs(__pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow(__pyx_t_double_complex, __pyx_t_double_complex);
- #endif
-#endif
-
-/* CIntToPy.proto */
-static CYTHON_INLINE PyObject* __Pyx_PyInt_From_enum__NPY_TYPES(enum NPY_TYPES value);
-
-/* CIntFromPy.proto */
-static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
-
-/* CIntFromPy.proto */
-static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
-
-/* CIntToPy.proto */
-static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
-
-/* CheckBinaryVersion.proto */
-static int __Pyx_check_binary_version(void);
-
-/* PyIdentifierFromString.proto */
-#if !defined(__Pyx_PyIdentifier_FromString)
-#if PY_MAJOR_VERSION < 3
- #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
-#else
- #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
-#endif
-#endif
-
-/* ModuleImport.proto */
-static PyObject *__Pyx_ImportModule(const char *name);
-
-/* TypeImport.proto */
-static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name, size_t size, int strict);
-
-/* FunctionImport.proto */
-static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig);
-
-/* InitStrings.proto */
-static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
-
-
-/* Module declarations from 'libc.math' */
-
-/* Module declarations from 'cpython.buffer' */
-
-/* Module declarations from 'libc.string' */
-
-/* Module declarations from 'libc.stdio' */
-
-/* Module declarations from '__builtin__' */
-
-/* Module declarations from 'cpython.type' */
-static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0;
-
-/* Module declarations from 'cpython' */
-
-/* Module declarations from 'cpython.object' */
-
-/* Module declarations from 'cpython.ref' */
-
-/* Module declarations from 'libc.stdlib' */
-
-/* Module declarations from 'numpy' */
-
-/* Module declarations from 'numpy' */
-static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
-static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
-static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
-static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
-static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
-static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, char *, char *, int *); /*proto*/
-
-/* Module declarations from 'cython' */
-
-/* Module declarations from 'sklearn.utils.lgamma' */
-static double (*__pyx_f_7sklearn_5utils_6lgamma_lgamma)(double); /*proto*/
-
-/* Module declarations from 'sklearn.metrics.cluster.expected_mutual_info_fast' */
-static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t = { "int32_t", NULL, sizeof(__pyx_t_5numpy_int32_t), { 0 }, 0, IS_UNSIGNED(__pyx_t_5numpy_int32_t) ? 'U' : 'I', IS_UNSIGNED(__pyx_t_5numpy_int32_t), 0 };
-static __Pyx_TypeInfo __Pyx_TypeInfo_double = { "double", NULL, sizeof(double), { 0 }, 0, 'R', 0, 0 };
-#define __Pyx_MODULE_NAME "sklearn.metrics.cluster.expected_mutual_info_fast"
-int __pyx_module_is_main_sklearn__metrics__cluster__expected_mutual_info_fast = 0;
-
-/* Implementation of 'sklearn.metrics.cluster.expected_mutual_info_fast' */
-static PyObject *__pyx_builtin_range;
-static PyObject *__pyx_builtin_ValueError;
-static PyObject *__pyx_builtin_RuntimeError;
-static const char __pyx_k_C[] = "C";
-static const char __pyx_k_N[] = "N";
-static const char __pyx_k_R[] = "R";
-static const char __pyx_k_T[] = "T";
-static const char __pyx_k_a[] = "a";
-static const char __pyx_k_b[] = "b";
-static const char __pyx_k_i[] = "i";
-static const char __pyx_k_j[] = "j";
-static const char __pyx_k_v[] = "v";
-static const char __pyx_k_w[] = "w";
-static const char __pyx_k_np[] = "np";
-static const char __pyx_k_emi[] = "emi";
-static const char __pyx_k_end[] = "end";
-static const char __pyx_k_gln[] = "gln";
-static const char __pyx_k_int[] = "int";
-static const char __pyx_k_log[] = "log";
-static const char __pyx_k_max[] = "max";
-static const char __pyx_k_nij[] = "nij";
-static const char __pyx_k_sum[] = "sum";
-static const char __pyx_k_axis[] = "axis";
-static const char __pyx_k_main[] = "__main__";
-static const char __pyx_k_nijs[] = "nijs";
-static const char __pyx_k_test[] = "__test__";
-static const char __pyx_k_array[] = "array";
-static const char __pyx_k_dtype[] = "dtype";
-static const char __pyx_k_float[] = "float";
-static const char __pyx_k_gln_N[] = "gln_N";
-static const char __pyx_k_gln_a[] = "gln_a";
-static const char __pyx_k_gln_b[] = "gln_b";
-static const char __pyx_k_int32[] = "int32";
-static const char __pyx_k_numpy[] = "numpy";
-static const char __pyx_k_range[] = "range";
-static const char __pyx_k_shape[] = "shape";
-static const char __pyx_k_start[] = "start";
-static const char __pyx_k_term1[] = "term1";
-static const char __pyx_k_term2[] = "term2";
-static const char __pyx_k_term3[] = "term3";
-static const char __pyx_k_arange[] = "arange";
-static const char __pyx_k_astype[] = "astype";
-static const char __pyx_k_gln_Na[] = "gln_Na";
-static const char __pyx_k_gln_Nb[] = "gln_Nb";
-static const char __pyx_k_import[] = "__import__";
-static const char __pyx_k_resize[] = "resize";
-static const char __pyx_k_gammaln[] = "gammaln";
-static const char __pyx_k_gln_nij[] = "gln_nij";
-static const char __pyx_k_maximum[] = "maximum";
-static const char __pyx_k_minimum[] = "minimum";
-static const char __pyx_k_newaxis[] = "newaxis";
-static const char __pyx_k_log_Nnij[] = "log_Nnij";
-static const char __pyx_k_n_samples[] = "n_samples";
-static const char __pyx_k_ValueError[] = "ValueError";
-static const char __pyx_k_contingency[] = "contingency";
-static const char __pyx_k_RuntimeError[] = "RuntimeError";
-static const char __pyx_k_log_ab_outer[] = "log_ab_outer";
-static const char __pyx_k_scipy_special[] = "scipy.special";
-static const char __pyx_k_expected_mutual_information[] = "expected_mutual_information";
-static const char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous";
-static const char __pyx_k_Users_michaelhush_Dropbox_Sourc[] = "/Users/michaelhush/Dropbox/SourceCode/scikit-learn/sklearn/metrics/cluster/expected_mutual_info_fast.pyx";
-static const char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)";
-static const char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd";
-static const char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported";
-static const char __pyx_k_ndarray_is_not_Fortran_contiguou[] = "ndarray is not Fortran contiguous";
-static const char __pyx_k_sklearn_metrics_cluster_expected[] = "sklearn.metrics.cluster.expected_mutual_info_fast";
-static const char __pyx_k_Format_string_allocated_too_shor_2[] = "Format string allocated too short.";
-static PyObject *__pyx_n_s_C;
-static PyObject *__pyx_kp_u_Format_string_allocated_too_shor;
-static PyObject *__pyx_kp_u_Format_string_allocated_too_shor_2;
-static PyObject *__pyx_n_s_N;
-static PyObject *__pyx_kp_u_Non_native_byte_order_not_suppor;
-static PyObject *__pyx_n_s_R;
-static PyObject *__pyx_n_s_RuntimeError;
-static PyObject *__pyx_n_s_T;
-static PyObject *__pyx_kp_s_Users_michaelhush_Dropbox_Sourc;
-static PyObject *__pyx_n_s_ValueError;
-static PyObject *__pyx_n_s_a;
-static PyObject *__pyx_n_s_arange;
-static PyObject *__pyx_n_s_array;
-static PyObject *__pyx_n_s_astype;
-static PyObject *__pyx_n_s_axis;
-static PyObject *__pyx_n_s_b;
-static PyObject *__pyx_n_s_contingency;
-static PyObject *__pyx_n_s_dtype;
-static PyObject *__pyx_n_s_emi;
-static PyObject *__pyx_n_s_end;
-static PyObject *__pyx_n_s_expected_mutual_information;
-static PyObject *__pyx_n_s_float;
-static PyObject *__pyx_n_s_gammaln;
-static PyObject *__pyx_n_s_gln;
-static PyObject *__pyx_n_s_gln_N;
-static PyObject *__pyx_n_s_gln_Na;
-static PyObject *__pyx_n_s_gln_Nb;
-static PyObject *__pyx_n_s_gln_a;
-static PyObject *__pyx_n_s_gln_b;
-static PyObject *__pyx_n_s_gln_nij;
-static PyObject *__pyx_n_s_i;
-static PyObject *__pyx_n_s_import;
-static PyObject *__pyx_n_s_int;
-static PyObject *__pyx_n_s_int32;
-static PyObject *__pyx_n_s_j;
-static PyObject *__pyx_n_s_log;
-static PyObject *__pyx_n_s_log_Nnij;
-static PyObject *__pyx_n_s_log_ab_outer;
-static PyObject *__pyx_n_s_main;
-static PyObject *__pyx_n_s_max;
-static PyObject *__pyx_n_s_maximum;
-static PyObject *__pyx_n_s_minimum;
-static PyObject *__pyx_n_s_n_samples;
-static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous;
-static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou;
-static PyObject *__pyx_n_s_newaxis;
-static PyObject *__pyx_n_s_nij;
-static PyObject *__pyx_n_s_nijs;
-static PyObject *__pyx_n_s_np;
-static PyObject *__pyx_n_s_numpy;
-static PyObject *__pyx_n_s_range;
-static PyObject *__pyx_n_s_resize;
-static PyObject *__pyx_n_s_scipy_special;
-static PyObject *__pyx_n_s_shape;
-static PyObject *__pyx_n_s_sklearn_metrics_cluster_expected;
-static PyObject *__pyx_n_s_start;
-static PyObject *__pyx_n_s_sum;
-static PyObject *__pyx_n_s_term1;
-static PyObject *__pyx_n_s_term2;
-static PyObject *__pyx_n_s_term3;
-static PyObject *__pyx_n_s_test;
-static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd;
-static PyObject *__pyx_n_s_v;
-static PyObject *__pyx_n_s_w;
-static PyObject *__pyx_pf_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_expected_mutual_information(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_contingency, int __pyx_v_n_samples); /* proto */
-static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
-static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */
-static PyObject *__pyx_int_0;
-static PyObject *__pyx_int_1;
-static PyObject *__pyx_slice_;
-static PyObject *__pyx_tuple__2;
-static PyObject *__pyx_tuple__3;
-static PyObject *__pyx_tuple__4;
-static PyObject *__pyx_tuple__5;
-static PyObject *__pyx_tuple__6;
-static PyObject *__pyx_tuple__7;
-static PyObject *__pyx_tuple__8;
-static PyObject *__pyx_codeobj__9;
-
-/* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":20
- * @cython.boundscheck(False)
- * @cython.wraparound(False)
- * def expected_mutual_information(contingency, int n_samples): # <<<<<<<<<<<<<<
- * """Calculate the expected mutual information for two labelings."""
- * cdef int R, C
- */
-
-/* Python wrapper */
-static PyObject *__pyx_pw_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_1expected_mutual_information(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static char __pyx_doc_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_expected_mutual_information[] = "Calculate the expected mutual information for two labelings.";
-static PyMethodDef __pyx_mdef_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_1expected_mutual_information = {"expected_mutual_information", (PyCFunction)__pyx_pw_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_1expected_mutual_information, METH_VARARGS|METH_KEYWORDS, __pyx_doc_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_expected_mutual_information};
-static PyObject *__pyx_pw_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_1expected_mutual_information(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
- PyObject *__pyx_v_contingency = 0;
- int __pyx_v_n_samples;
- PyObject *__pyx_r = 0;
- __Pyx_RefNannyDeclarations
- __Pyx_RefNannySetupContext("expected_mutual_information (wrapper)", 0);
- {
- static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_contingency,&__pyx_n_s_n_samples,0};
- PyObject* values[2] = {0,0};
- if (unlikely(__pyx_kwds)) {
- Py_ssize_t kw_args;
- const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
- switch (pos_args) {
- case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
- case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
- case 0: break;
- default: goto __pyx_L5_argtuple_error;
- }
- kw_args = PyDict_Size(__pyx_kwds);
- switch (pos_args) {
- case 0:
- if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_contingency)) != 0)) kw_args--;
- else goto __pyx_L5_argtuple_error;
- case 1:
- if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_n_samples)) != 0)) kw_args--;
- else {
- __Pyx_RaiseArgtupleInvalid("expected_mutual_information", 1, 2, 2, 1); __PYX_ERR(0, 20, __pyx_L3_error)
- }
- }
- if (unlikely(kw_args > 0)) {
- if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "expected_mutual_information") < 0)) __PYX_ERR(0, 20, __pyx_L3_error)
- }
- } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
- goto __pyx_L5_argtuple_error;
- } else {
- values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
- values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
- }
- __pyx_v_contingency = values[0];
- __pyx_v_n_samples = __Pyx_PyInt_As_int(values[1]); if (unlikely((__pyx_v_n_samples == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 20, __pyx_L3_error)
- }
- goto __pyx_L4_argument_unpacking_done;
- __pyx_L5_argtuple_error:;
- __Pyx_RaiseArgtupleInvalid("expected_mutual_information", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 20, __pyx_L3_error)
- __pyx_L3_error:;
- __Pyx_AddTraceback("sklearn.metrics.cluster.expected_mutual_info_fast.expected_mutual_information", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __Pyx_RefNannyFinishContext();
- return NULL;
- __pyx_L4_argument_unpacking_done:;
- __pyx_r = __pyx_pf_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_expected_mutual_information(__pyx_self, __pyx_v_contingency, __pyx_v_n_samples);
-
- /* function exit code */
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-static PyObject *__pyx_pf_7sklearn_7metrics_7cluster_25expected_mutual_info_fast_expected_mutual_information(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_contingency, int __pyx_v_n_samples) {
- int __pyx_v_R;
- int __pyx_v_C;
- float __pyx_v_N;
- float __pyx_v_gln_N;
- float __pyx_v_emi;
- float __pyx_v_term2;
- float __pyx_v_term3;
- float __pyx_v_gln;
- PyArrayObject *__pyx_v_gln_a = 0;
- PyArrayObject *__pyx_v_gln_b = 0;
- PyArrayObject *__pyx_v_gln_Na = 0;
- PyArrayObject *__pyx_v_gln_Nb = 0;
- PyArrayObject *__pyx_v_gln_nij = 0;
- PyArrayObject *__pyx_v_log_Nnij = 0;
- PyArrayObject *__pyx_v_nijs = 0;
- PyArrayObject *__pyx_v_term1 = 0;
- PyArrayObject *__pyx_v_log_ab_outer = 0;
- PyArrayObject *__pyx_v_a = 0;
- PyArrayObject *__pyx_v_b = 0;
- PyObject *__pyx_v_start = NULL;
- PyObject *__pyx_v_end = NULL;
- Py_ssize_t __pyx_v_i;
- Py_ssize_t __pyx_v_j;
- Py_ssize_t __pyx_v_nij;
- PyObject *__pyx_v_v = NULL;
- PyObject *__pyx_v_w = NULL;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_a;
- __Pyx_Buffer __pyx_pybuffer_a;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_b;
- __Pyx_Buffer __pyx_pybuffer_b;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_gln_Na;
- __Pyx_Buffer __pyx_pybuffer_gln_Na;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_gln_Nb;
- __Pyx_Buffer __pyx_pybuffer_gln_Nb;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_gln_a;
- __Pyx_Buffer __pyx_pybuffer_gln_a;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_gln_b;
- __Pyx_Buffer __pyx_pybuffer_gln_b;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_gln_nij;
- __Pyx_Buffer __pyx_pybuffer_gln_nij;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_log_Nnij;
- __Pyx_Buffer __pyx_pybuffer_log_Nnij;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_log_ab_outer;
- __Pyx_Buffer __pyx_pybuffer_log_ab_outer;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_nijs;
- __Pyx_Buffer __pyx_pybuffer_nijs;
- __Pyx_LocalBuf_ND __pyx_pybuffernd_term1;
- __Pyx_Buffer __pyx_pybuffer_term1;
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- PyObject *__pyx_t_2 = NULL;
- PyObject *__pyx_t_3 = NULL;
- PyObject *__pyx_t_4 = NULL;
- PyObject *(*__pyx_t_5)(PyObject *);
- int __pyx_t_6;
- int __pyx_t_7;
- PyObject *__pyx_t_8 = NULL;
- PyArrayObject *__pyx_t_9 = NULL;
- PyObject *__pyx_t_10 = NULL;
- PyObject *__pyx_t_11 = NULL;
- PyObject *__pyx_t_12 = NULL;
- PyObject *__pyx_t_13 = NULL;
- int __pyx_t_14;
- PyArrayObject *__pyx_t_15 = NULL;
- Py_ssize_t __pyx_t_16;
- PyArrayObject *__pyx_t_17 = NULL;
- PyArrayObject *__pyx_t_18 = NULL;
- float __pyx_t_19;
- Py_ssize_t __pyx_t_20;
- PyObject *(*__pyx_t_21)(PyObject *);
- Py_ssize_t __pyx_t_22;
- PyObject *(*__pyx_t_23)(PyObject *);
- PyObject *__pyx_t_24 = NULL;
- PyObject *__pyx_t_25 = NULL;
- long __pyx_t_26;
- long __pyx_t_27;
- Py_ssize_t __pyx_t_28;
- Py_ssize_t __pyx_t_29;
- Py_ssize_t __pyx_t_30;
- Py_ssize_t __pyx_t_31;
- Py_ssize_t __pyx_t_32;
- Py_ssize_t __pyx_t_33;
- Py_ssize_t __pyx_t_34;
- Py_ssize_t __pyx_t_35;
- Py_ssize_t __pyx_t_36;
- Py_ssize_t __pyx_t_37;
- Py_ssize_t __pyx_t_38;
- Py_ssize_t __pyx_t_39;
- Py_ssize_t __pyx_t_40;
- Py_ssize_t __pyx_t_41;
- __Pyx_RefNannySetupContext("expected_mutual_information", 0);
- __pyx_pybuffer_gln_a.pybuffer.buf = NULL;
- __pyx_pybuffer_gln_a.refcount = 0;
- __pyx_pybuffernd_gln_a.data = NULL;
- __pyx_pybuffernd_gln_a.rcbuffer = &__pyx_pybuffer_gln_a;
- __pyx_pybuffer_gln_b.pybuffer.buf = NULL;
- __pyx_pybuffer_gln_b.refcount = 0;
- __pyx_pybuffernd_gln_b.data = NULL;
- __pyx_pybuffernd_gln_b.rcbuffer = &__pyx_pybuffer_gln_b;
- __pyx_pybuffer_gln_Na.pybuffer.buf = NULL;
- __pyx_pybuffer_gln_Na.refcount = 0;
- __pyx_pybuffernd_gln_Na.data = NULL;
- __pyx_pybuffernd_gln_Na.rcbuffer = &__pyx_pybuffer_gln_Na;
- __pyx_pybuffer_gln_Nb.pybuffer.buf = NULL;
- __pyx_pybuffer_gln_Nb.refcount = 0;
- __pyx_pybuffernd_gln_Nb.data = NULL;
- __pyx_pybuffernd_gln_Nb.rcbuffer = &__pyx_pybuffer_gln_Nb;
- __pyx_pybuffer_gln_nij.pybuffer.buf = NULL;
- __pyx_pybuffer_gln_nij.refcount = 0;
- __pyx_pybuffernd_gln_nij.data = NULL;
- __pyx_pybuffernd_gln_nij.rcbuffer = &__pyx_pybuffer_gln_nij;
- __pyx_pybuffer_log_Nnij.pybuffer.buf = NULL;
- __pyx_pybuffer_log_Nnij.refcount = 0;
- __pyx_pybuffernd_log_Nnij.data = NULL;
- __pyx_pybuffernd_log_Nnij.rcbuffer = &__pyx_pybuffer_log_Nnij;
- __pyx_pybuffer_nijs.pybuffer.buf = NULL;
- __pyx_pybuffer_nijs.refcount = 0;
- __pyx_pybuffernd_nijs.data = NULL;
- __pyx_pybuffernd_nijs.rcbuffer = &__pyx_pybuffer_nijs;
- __pyx_pybuffer_term1.pybuffer.buf = NULL;
- __pyx_pybuffer_term1.refcount = 0;
- __pyx_pybuffernd_term1.data = NULL;
- __pyx_pybuffernd_term1.rcbuffer = &__pyx_pybuffer_term1;
- __pyx_pybuffer_log_ab_outer.pybuffer.buf = NULL;
- __pyx_pybuffer_log_ab_outer.refcount = 0;
- __pyx_pybuffernd_log_ab_outer.data = NULL;
- __pyx_pybuffernd_log_ab_outer.rcbuffer = &__pyx_pybuffer_log_ab_outer;
- __pyx_pybuffer_a.pybuffer.buf = NULL;
- __pyx_pybuffer_a.refcount = 0;
- __pyx_pybuffernd_a.data = NULL;
- __pyx_pybuffernd_a.rcbuffer = &__pyx_pybuffer_a;
- __pyx_pybuffer_b.pybuffer.buf = NULL;
- __pyx_pybuffer_b.refcount = 0;
- __pyx_pybuffernd_b.data = NULL;
- __pyx_pybuffernd_b.rcbuffer = &__pyx_pybuffer_b;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":29
- * cdef np.ndarray[np.int32_t] a, b
- * #cdef np.ndarray[int, ndim=2] start, end
- * R, C = contingency.shape # <<<<<<<<<<<<<<
- * N = float(n_samples)
- * a = np.sum(contingency, axis=1).astype(np.int32)
- */
- __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_contingency, __pyx_n_s_shape); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 29, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- if ((likely(PyTuple_CheckExact(__pyx_t_1))) || (PyList_CheckExact(__pyx_t_1))) {
- PyObject* sequence = __pyx_t_1;
- #if CYTHON_COMPILING_IN_CPYTHON
- Py_ssize_t size = Py_SIZE(sequence);
- #else
- Py_ssize_t size = PySequence_Size(sequence);
- #endif
- if (unlikely(size != 2)) {
- if (size > 2) __Pyx_RaiseTooManyValuesError(2);
- else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
- __PYX_ERR(0, 29, __pyx_L1_error)
- }
- #if CYTHON_COMPILING_IN_CPYTHON
- if (likely(PyTuple_CheckExact(sequence))) {
- __pyx_t_2 = PyTuple_GET_ITEM(sequence, 0);
- __pyx_t_3 = PyTuple_GET_ITEM(sequence, 1);
- } else {
- __pyx_t_2 = PyList_GET_ITEM(sequence, 0);
- __pyx_t_3 = PyList_GET_ITEM(sequence, 1);
- }
- __Pyx_INCREF(__pyx_t_2);
- __Pyx_INCREF(__pyx_t_3);
- #else
- __pyx_t_2 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 29, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_t_3 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 29, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- #endif
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- } else {
- Py_ssize_t index = -1;
- __pyx_t_4 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 29, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __pyx_t_5 = Py_TYPE(__pyx_t_4)->tp_iternext;
- index = 0; __pyx_t_2 = __pyx_t_5(__pyx_t_4); if (unlikely(!__pyx_t_2)) goto __pyx_L3_unpacking_failed;
- __Pyx_GOTREF(__pyx_t_2);
- index = 1; __pyx_t_3 = __pyx_t_5(__pyx_t_4); if (unlikely(!__pyx_t_3)) goto __pyx_L3_unpacking_failed;
- __Pyx_GOTREF(__pyx_t_3);
- if (__Pyx_IternextUnpackEndCheck(__pyx_t_5(__pyx_t_4), 2) < 0) __PYX_ERR(0, 29, __pyx_L1_error)
- __pyx_t_5 = NULL;
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- goto __pyx_L4_unpacking_done;
- __pyx_L3_unpacking_failed:;
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_5 = NULL;
- if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);
- __PYX_ERR(0, 29, __pyx_L1_error)
- __pyx_L4_unpacking_done:;
- }
- __pyx_t_6 = __Pyx_PyInt_As_int(__pyx_t_2); if (unlikely((__pyx_t_6 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 29, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_7 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_7 == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 29, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_v_R = __pyx_t_6;
- __pyx_v_C = __pyx_t_7;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":30
- * #cdef np.ndarray[int, ndim=2] start, end
- * R, C = contingency.shape
- * N = float(n_samples) # <<<<<<<<<<<<<<
- * a = np.sum(contingency, axis=1).astype(np.int32)
- * b = np.sum(contingency, axis=0).astype(np.int32)
- */
- __pyx_v_N = ((double)__pyx_v_n_samples);
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":31
- * R, C = contingency.shape
- * N = float(n_samples)
- * a = np.sum(contingency, axis=1).astype(np.int32) # <<<<<<<<<<<<<<
- * b = np.sum(contingency, axis=0).astype(np.int32)
- * # There are three major terms to the EMI equation, which are multiplied to
- */
- __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_sum); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_INCREF(__pyx_v_contingency);
- __Pyx_GIVEREF(__pyx_v_contingency);
- PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_v_contingency);
- __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_axis, __pyx_int_1) < 0) __PYX_ERR(0, 31, __pyx_L1_error)
- __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, __pyx_t_4); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_astype); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
- __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_int32); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
- __pyx_t_8 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_4))) {
- __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_4);
- if (likely(__pyx_t_8)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
- __Pyx_INCREF(__pyx_t_8);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_4, function);
- }
- }
- if (!__pyx_t_8) {
- __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __Pyx_GOTREF(__pyx_t_1);
- } else {
- __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_GIVEREF(__pyx_t_8); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_8); __pyx_t_8 = NULL;
- __Pyx_GIVEREF(__pyx_t_3);
- PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_3);
- __pyx_t_3 = 0;
- __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- }
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 31, __pyx_L1_error)
- __pyx_t_9 = ((PyArrayObject *)__pyx_t_1);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_a.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_a.rcbuffer->pybuffer, (PyObject*)__pyx_t_9, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_10, &__pyx_t_11, &__pyx_t_12);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_a.rcbuffer->pybuffer, (PyObject*)__pyx_v_a, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_12);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_10, __pyx_t_11, __pyx_t_12);
- }
- }
- __pyx_pybuffernd_a.diminfo[0].strides = __pyx_pybuffernd_a.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_a.diminfo[0].shape = __pyx_pybuffernd_a.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 31, __pyx_L1_error)
- }
- __pyx_t_9 = 0;
- __pyx_v_a = ((PyArrayObject *)__pyx_t_1);
- __pyx_t_1 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":32
- * N = float(n_samples)
- * a = np.sum(contingency, axis=1).astype(np.int32)
- * b = np.sum(contingency, axis=0).astype(np.int32) # <<<<<<<<<<<<<<
- * # There are three major terms to the EMI equation, which are multiplied to
- * # and then summed over varying nij values.
- */
- __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_sum); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_INCREF(__pyx_v_contingency);
- __Pyx_GIVEREF(__pyx_v_contingency);
- PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_v_contingency);
- __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_axis, __pyx_int_0) < 0) __PYX_ERR(0, 32, __pyx_L1_error)
- __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_astype); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
- __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_int32); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
- __pyx_t_8 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_8)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_8);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- }
- }
- if (!__pyx_t_8) {
- __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __Pyx_GOTREF(__pyx_t_1);
- } else {
- __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_GIVEREF(__pyx_t_8); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_8); __pyx_t_8 = NULL;
- __Pyx_GIVEREF(__pyx_t_4);
- PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_4);
- __pyx_t_4 = 0;
- __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 32, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- }
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 32, __pyx_L1_error)
- __pyx_t_9 = ((PyArrayObject *)__pyx_t_1);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_b.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_b.rcbuffer->pybuffer, (PyObject*)__pyx_t_9, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_12, &__pyx_t_11, &__pyx_t_10);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_b.rcbuffer->pybuffer, (PyObject*)__pyx_v_b, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_12); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_10);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_12, __pyx_t_11, __pyx_t_10);
- }
- }
- __pyx_pybuffernd_b.diminfo[0].strides = __pyx_pybuffernd_b.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_b.diminfo[0].shape = __pyx_pybuffernd_b.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 32, __pyx_L1_error)
- }
- __pyx_t_9 = 0;
- __pyx_v_b = ((PyArrayObject *)__pyx_t_1);
- __pyx_t_1 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":36
- * # and then summed over varying nij values.
- * # While nijs[0] will never be used, having it simplifies the indexing.
- * nijs = np.arange(0, max(np.max(a), np.max(b)) + 1, dtype='float') # <<<<<<<<<<<<<<
- * nijs[0] = 1 # Stops divide by zero warnings. As its not used, no issue.
- * # term1 is nij / N
- */
- __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_arange); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_max); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_2 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_4))) {
- __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_4);
- if (likely(__pyx_t_2)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
- __Pyx_INCREF(__pyx_t_2);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_4, function);
- }
- }
- if (!__pyx_t_2) {
- __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_4, ((PyObject *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- } else {
- __pyx_t_8 = PyTuple_New(1+1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_2); __pyx_t_2 = NULL;
- __Pyx_INCREF(((PyObject *)__pyx_v_b));
- __Pyx_GIVEREF(((PyObject *)__pyx_v_b));
- PyTuple_SET_ITEM(__pyx_t_8, 0+1, ((PyObject *)__pyx_v_b));
- __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_8, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
- }
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_max); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
- __pyx_t_8 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) {
- __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_2);
- if (likely(__pyx_t_8)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
- __Pyx_INCREF(__pyx_t_8);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_2, function);
- }
- }
- if (!__pyx_t_8) {
- __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_2, ((PyObject *)__pyx_v_a)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- } else {
- __pyx_t_13 = PyTuple_New(1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_GIVEREF(__pyx_t_8); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_8); __pyx_t_8 = NULL;
- __Pyx_INCREF(((PyObject *)__pyx_v_a));
- __Pyx_GIVEREF(((PyObject *)__pyx_v_a));
- PyTuple_SET_ITEM(__pyx_t_13, 0+1, ((PyObject *)__pyx_v_a));
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_13, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- }
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_13 = PyObject_RichCompare(__pyx_t_1, __pyx_t_4, Py_GT); __Pyx_XGOTREF(__pyx_t_13); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 36, __pyx_L1_error)
- __pyx_t_14 = __Pyx_PyObject_IsTrue(__pyx_t_13); if (unlikely(__pyx_t_14 < 0)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- if (__pyx_t_14) {
- __Pyx_INCREF(__pyx_t_1);
- __pyx_t_2 = __pyx_t_1;
- } else {
- __Pyx_INCREF(__pyx_t_4);
- __pyx_t_2 = __pyx_t_4;
- }
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __pyx_t_1 = __Pyx_PyInt_AddObjC(__pyx_t_2, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_INCREF(__pyx_int_0);
- __Pyx_GIVEREF(__pyx_int_0);
- PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_int_0);
- __Pyx_GIVEREF(__pyx_t_1);
- PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_1);
- __pyx_t_1 = 0;
- __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_n_s_float) < 0) __PYX_ERR(0, 36, __pyx_L1_error)
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, __pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 36, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 36, __pyx_L1_error)
- __pyx_t_15 = ((PyArrayObject *)__pyx_t_4);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_nijs.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_nijs.rcbuffer->pybuffer, (PyObject*)__pyx_t_15, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_10, &__pyx_t_11, &__pyx_t_12);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_nijs.rcbuffer->pybuffer, (PyObject*)__pyx_v_nijs, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_12);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_10, __pyx_t_11, __pyx_t_12);
- }
- }
- __pyx_pybuffernd_nijs.diminfo[0].strides = __pyx_pybuffernd_nijs.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_nijs.diminfo[0].shape = __pyx_pybuffernd_nijs.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 36, __pyx_L1_error)
- }
- __pyx_t_15 = 0;
- __pyx_v_nijs = ((PyArrayObject *)__pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":37
- * # While nijs[0] will never be used, having it simplifies the indexing.
- * nijs = np.arange(0, max(np.max(a), np.max(b)) + 1, dtype='float')
- * nijs[0] = 1 # Stops divide by zero warnings. As its not used, no issue. # <<<<<<<<<<<<<<
- * # term1 is nij / N
- * term1 = nijs / N
- */
- __pyx_t_16 = 0;
- *__Pyx_BufPtrStrided1d(double *, __pyx_pybuffernd_nijs.rcbuffer->pybuffer.buf, __pyx_t_16, __pyx_pybuffernd_nijs.diminfo[0].strides) = 1.0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":39
- * nijs[0] = 1 # Stops divide by zero warnings. As its not used, no issue.
- * # term1 is nij / N
- * term1 = nijs / N # <<<<<<<<<<<<<<
- * # term2 is log((N*nij) / (a * b)) == log(N * nij) - log(a * b)
- * # term2 uses the outer product
- */
- __pyx_t_4 = PyFloat_FromDouble(__pyx_v_N); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 39, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_1 = __Pyx_PyNumber_Divide(((PyObject *)__pyx_v_nijs), __pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 39, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 39, __pyx_L1_error)
- __pyx_t_15 = ((PyArrayObject *)__pyx_t_1);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_term1.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_term1.rcbuffer->pybuffer, (PyObject*)__pyx_t_15, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_12, &__pyx_t_11, &__pyx_t_10);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_term1.rcbuffer->pybuffer, (PyObject*)__pyx_v_term1, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_12); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_10);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_12, __pyx_t_11, __pyx_t_10);
- }
- }
- __pyx_pybuffernd_term1.diminfo[0].strides = __pyx_pybuffernd_term1.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_term1.diminfo[0].shape = __pyx_pybuffernd_term1.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 39, __pyx_L1_error)
- }
- __pyx_t_15 = 0;
- __pyx_v_term1 = ((PyArrayObject *)__pyx_t_1);
- __pyx_t_1 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":42
- * # term2 is log((N*nij) / (a * b)) == log(N * nij) - log(a * b)
- * # term2 uses the outer product
- * log_ab_outer = np.log(a)[:, np.newaxis] + np.log(b) # <<<<<<<<<<<<<<
- * # term2 uses N * nij
- * log_Nnij = np.log(N * nijs)
- */
- __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_log); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_4 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) {
- __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
- if (likely(__pyx_t_4)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
- __Pyx_INCREF(__pyx_t_4);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_2, function);
- }
- }
- if (!__pyx_t_4) {
- __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, ((PyObject *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- } else {
- __pyx_t_3 = PyTuple_New(1+1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4); __pyx_t_4 = NULL;
- __Pyx_INCREF(((PyObject *)__pyx_v_a));
- __Pyx_GIVEREF(((PyObject *)__pyx_v_a));
- PyTuple_SET_ITEM(__pyx_t_3, 0+1, ((PyObject *)__pyx_v_a));
- __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- }
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_newaxis); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_INCREF(__pyx_slice_);
- __Pyx_GIVEREF(__pyx_slice_);
- PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_slice_);
- __Pyx_GIVEREF(__pyx_t_3);
- PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_t_3);
- __pyx_t_3 = 0;
- __pyx_t_3 = PyObject_GetItem(__pyx_t_1, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_log); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __pyx_t_1 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_4))) {
- __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_4);
- if (likely(__pyx_t_1)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
- __Pyx_INCREF(__pyx_t_1);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_4, function);
- }
- }
- if (!__pyx_t_1) {
- __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_t_4, ((PyObject *)__pyx_v_b)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- } else {
- __pyx_t_13 = PyTuple_New(1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1); __pyx_t_1 = NULL;
- __Pyx_INCREF(((PyObject *)__pyx_v_b));
- __Pyx_GIVEREF(((PyObject *)__pyx_v_b));
- PyTuple_SET_ITEM(__pyx_t_13, 0+1, ((PyObject *)__pyx_v_b));
- __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_13, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- }
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_4 = PyNumber_Add(__pyx_t_3, __pyx_t_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 42, __pyx_L1_error)
- __pyx_t_17 = ((PyArrayObject *)__pyx_t_4);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer, (PyObject*)__pyx_t_17, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_10, &__pyx_t_11, &__pyx_t_12);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer, (PyObject*)__pyx_v_log_ab_outer, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_12);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_10, __pyx_t_11, __pyx_t_12);
- }
- }
- __pyx_pybuffernd_log_ab_outer.diminfo[0].strides = __pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_log_ab_outer.diminfo[0].shape = __pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_log_ab_outer.diminfo[1].strides = __pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_log_ab_outer.diminfo[1].shape = __pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer.shape[1];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 42, __pyx_L1_error)
- }
- __pyx_t_17 = 0;
- __pyx_v_log_ab_outer = ((PyArrayObject *)__pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":44
- * log_ab_outer = np.log(a)[:, np.newaxis] + np.log(b)
- * # term2 uses N * nij
- * log_Nnij = np.log(N * nijs) # <<<<<<<<<<<<<<
- * # term3 is large, and involved many factorials. Calculate these in log
- * # space to stop overflows.
- */
- __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 44, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_log); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 44, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_2 = PyFloat_FromDouble(__pyx_v_N); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 44, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_t_13 = PyNumber_Multiply(__pyx_t_2, ((PyObject *)__pyx_v_nijs)); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 44, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_2 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_2)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_2);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- }
- }
- if (!__pyx_t_2) {
- __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_13); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 44, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- __Pyx_GOTREF(__pyx_t_4);
- } else {
- __pyx_t_1 = PyTuple_New(1+1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 44, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2); __pyx_t_2 = NULL;
- __Pyx_GIVEREF(__pyx_t_13);
- PyTuple_SET_ITEM(__pyx_t_1, 0+1, __pyx_t_13);
- __pyx_t_13 = 0;
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_1, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 44, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- }
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 44, __pyx_L1_error)
- __pyx_t_18 = ((PyArrayObject *)__pyx_t_4);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_log_Nnij.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_log_Nnij.rcbuffer->pybuffer, (PyObject*)__pyx_t_18, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_12, &__pyx_t_11, &__pyx_t_10);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_log_Nnij.rcbuffer->pybuffer, (PyObject*)__pyx_v_log_Nnij, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_12); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_10);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_12, __pyx_t_11, __pyx_t_10);
- }
- }
- __pyx_pybuffernd_log_Nnij.diminfo[0].strides = __pyx_pybuffernd_log_Nnij.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_log_Nnij.diminfo[0].shape = __pyx_pybuffernd_log_Nnij.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 44, __pyx_L1_error)
- }
- __pyx_t_18 = 0;
- __pyx_v_log_Nnij = ((PyArrayObject *)__pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":47
- * # term3 is large, and involved many factorials. Calculate these in log
- * # space to stop overflows.
- * gln_a = gammaln(a + 1) # <<<<<<<<<<<<<<
- * gln_b = gammaln(b + 1)
- * gln_Na = gammaln(N - a + 1)
- */
- __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_gammaln); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 47, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_1 = PyNumber_Add(((PyObject *)__pyx_v_a), __pyx_int_1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 47, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_t_13 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_13)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_13);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- }
- }
- if (!__pyx_t_13) {
- __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 47, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __Pyx_GOTREF(__pyx_t_4);
- } else {
- __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 47, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_GIVEREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_13); __pyx_t_13 = NULL;
- __Pyx_GIVEREF(__pyx_t_1);
- PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_1);
- __pyx_t_1 = 0;
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 47, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- }
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 47, __pyx_L1_error)
- __pyx_t_18 = ((PyArrayObject *)__pyx_t_4);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_a.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_a.rcbuffer->pybuffer, (PyObject*)__pyx_t_18, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_10, &__pyx_t_11, &__pyx_t_12);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_a.rcbuffer->pybuffer, (PyObject*)__pyx_v_gln_a, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_12);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_10, __pyx_t_11, __pyx_t_12);
- }
- }
- __pyx_pybuffernd_gln_a.diminfo[0].strides = __pyx_pybuffernd_gln_a.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_gln_a.diminfo[0].shape = __pyx_pybuffernd_gln_a.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 47, __pyx_L1_error)
- }
- __pyx_t_18 = 0;
- __pyx_v_gln_a = ((PyArrayObject *)__pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":48
- * # space to stop overflows.
- * gln_a = gammaln(a + 1)
- * gln_b = gammaln(b + 1) # <<<<<<<<<<<<<<
- * gln_Na = gammaln(N - a + 1)
- * gln_Nb = gammaln(N - b + 1)
- */
- __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_gammaln); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 48, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_2 = PyNumber_Add(((PyObject *)__pyx_v_b), __pyx_int_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 48, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_t_1 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_1)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_1);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- }
- }
- if (!__pyx_t_1) {
- __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 48, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __Pyx_GOTREF(__pyx_t_4);
- } else {
- __pyx_t_13 = PyTuple_New(1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 48, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1); __pyx_t_1 = NULL;
- __Pyx_GIVEREF(__pyx_t_2);
- PyTuple_SET_ITEM(__pyx_t_13, 0+1, __pyx_t_2);
- __pyx_t_2 = 0;
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_13, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 48, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- }
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 48, __pyx_L1_error)
- __pyx_t_18 = ((PyArrayObject *)__pyx_t_4);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_b.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_b.rcbuffer->pybuffer, (PyObject*)__pyx_t_18, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_12, &__pyx_t_11, &__pyx_t_10);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_b.rcbuffer->pybuffer, (PyObject*)__pyx_v_gln_b, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_12); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_10);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_12, __pyx_t_11, __pyx_t_10);
- }
- }
- __pyx_pybuffernd_gln_b.diminfo[0].strides = __pyx_pybuffernd_gln_b.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_gln_b.diminfo[0].shape = __pyx_pybuffernd_gln_b.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 48, __pyx_L1_error)
- }
- __pyx_t_18 = 0;
- __pyx_v_gln_b = ((PyArrayObject *)__pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":49
- * gln_a = gammaln(a + 1)
- * gln_b = gammaln(b + 1)
- * gln_Na = gammaln(N - a + 1) # <<<<<<<<<<<<<<
- * gln_Nb = gammaln(N - b + 1)
- * gln_N = gammaln(N + 1)
- */
- __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_gammaln); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 49, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_13 = PyFloat_FromDouble(__pyx_v_N); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 49, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __pyx_t_2 = PyNumber_Subtract(__pyx_t_13, ((PyObject *)__pyx_v_a)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 49, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- __pyx_t_13 = __Pyx_PyInt_AddObjC(__pyx_t_2, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 49, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_2 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_2)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_2);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- }
- }
- if (!__pyx_t_2) {
- __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_13); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 49, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- __Pyx_GOTREF(__pyx_t_4);
- } else {
- __pyx_t_1 = PyTuple_New(1+1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 49, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2); __pyx_t_2 = NULL;
- __Pyx_GIVEREF(__pyx_t_13);
- PyTuple_SET_ITEM(__pyx_t_1, 0+1, __pyx_t_13);
- __pyx_t_13 = 0;
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_1, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 49, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- }
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 49, __pyx_L1_error)
- __pyx_t_18 = ((PyArrayObject *)__pyx_t_4);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_Na.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_Na.rcbuffer->pybuffer, (PyObject*)__pyx_t_18, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_10, &__pyx_t_11, &__pyx_t_12);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_Na.rcbuffer->pybuffer, (PyObject*)__pyx_v_gln_Na, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_12);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_10, __pyx_t_11, __pyx_t_12);
- }
- }
- __pyx_pybuffernd_gln_Na.diminfo[0].strides = __pyx_pybuffernd_gln_Na.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_gln_Na.diminfo[0].shape = __pyx_pybuffernd_gln_Na.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 49, __pyx_L1_error)
- }
- __pyx_t_18 = 0;
- __pyx_v_gln_Na = ((PyArrayObject *)__pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":50
- * gln_b = gammaln(b + 1)
- * gln_Na = gammaln(N - a + 1)
- * gln_Nb = gammaln(N - b + 1) # <<<<<<<<<<<<<<
- * gln_N = gammaln(N + 1)
- * gln_nij = gammaln(nijs + 1)
- */
- __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_gammaln); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 50, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_1 = PyFloat_FromDouble(__pyx_v_N); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 50, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_t_13 = PyNumber_Subtract(__pyx_t_1, ((PyObject *)__pyx_v_b)); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 50, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __pyx_t_1 = __Pyx_PyInt_AddObjC(__pyx_t_13, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 50, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- __pyx_t_13 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_13)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_13);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- }
- }
- if (!__pyx_t_13) {
- __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 50, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __Pyx_GOTREF(__pyx_t_4);
- } else {
- __pyx_t_2 = PyTuple_New(1+1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 50, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_GIVEREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_13); __pyx_t_13 = NULL;
- __Pyx_GIVEREF(__pyx_t_1);
- PyTuple_SET_ITEM(__pyx_t_2, 0+1, __pyx_t_1);
- __pyx_t_1 = 0;
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_2, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 50, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- }
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 50, __pyx_L1_error)
- __pyx_t_18 = ((PyArrayObject *)__pyx_t_4);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_Nb.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_Nb.rcbuffer->pybuffer, (PyObject*)__pyx_t_18, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_12, &__pyx_t_11, &__pyx_t_10);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_Nb.rcbuffer->pybuffer, (PyObject*)__pyx_v_gln_Nb, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_12); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_10);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_12, __pyx_t_11, __pyx_t_10);
- }
- }
- __pyx_pybuffernd_gln_Nb.diminfo[0].strides = __pyx_pybuffernd_gln_Nb.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_gln_Nb.diminfo[0].shape = __pyx_pybuffernd_gln_Nb.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 50, __pyx_L1_error)
- }
- __pyx_t_18 = 0;
- __pyx_v_gln_Nb = ((PyArrayObject *)__pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":51
- * gln_Na = gammaln(N - a + 1)
- * gln_Nb = gammaln(N - b + 1)
- * gln_N = gammaln(N + 1) # <<<<<<<<<<<<<<
- * gln_nij = gammaln(nijs + 1)
- * # start and end values for nij terms for each summation.
- */
- __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_gammaln); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 51, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_2 = PyFloat_FromDouble((__pyx_v_N + 1.0)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 51, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_t_1 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_1)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_1);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- }
- }
- if (!__pyx_t_1) {
- __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 51, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __Pyx_GOTREF(__pyx_t_4);
- } else {
- __pyx_t_13 = PyTuple_New(1+1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 51, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_1); __pyx_t_1 = NULL;
- __Pyx_GIVEREF(__pyx_t_2);
- PyTuple_SET_ITEM(__pyx_t_13, 0+1, __pyx_t_2);
- __pyx_t_2 = 0;
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_13, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 51, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- }
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_19 = __pyx_PyFloat_AsFloat(__pyx_t_4); if (unlikely((__pyx_t_19 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 51, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_v_gln_N = __pyx_t_19;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":52
- * gln_Nb = gammaln(N - b + 1)
- * gln_N = gammaln(N + 1)
- * gln_nij = gammaln(nijs + 1) # <<<<<<<<<<<<<<
- * # start and end values for nij terms for each summation.
- * start = np.array([[v - N + w for w in b] for v in a], dtype='int')
- */
- __pyx_t_3 = __Pyx_GetModuleGlobalName(__pyx_n_s_gammaln); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 52, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_13 = PyNumber_Add(((PyObject *)__pyx_v_nijs), __pyx_int_1); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 52, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __pyx_t_2 = NULL;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_2)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_2);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- }
- }
- if (!__pyx_t_2) {
- __pyx_t_4 = __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_13); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 52, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- __Pyx_GOTREF(__pyx_t_4);
- } else {
- __pyx_t_1 = PyTuple_New(1+1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 52, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2); __pyx_t_2 = NULL;
- __Pyx_GIVEREF(__pyx_t_13);
- PyTuple_SET_ITEM(__pyx_t_1, 0+1, __pyx_t_13);
- __pyx_t_13 = 0;
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_1, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 52, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- }
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 52, __pyx_L1_error)
- __pyx_t_18 = ((PyArrayObject *)__pyx_t_4);
- {
- __Pyx_BufFmt_StackElem __pyx_stack[1];
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_nij.rcbuffer->pybuffer);
- __pyx_t_7 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_nij.rcbuffer->pybuffer, (PyObject*)__pyx_t_18, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
- if (unlikely(__pyx_t_7 < 0)) {
- PyErr_Fetch(&__pyx_t_10, &__pyx_t_11, &__pyx_t_12);
- if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_gln_nij.rcbuffer->pybuffer, (PyObject*)__pyx_v_gln_nij, &__Pyx_TypeInfo_double, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
- Py_XDECREF(__pyx_t_10); Py_XDECREF(__pyx_t_11); Py_XDECREF(__pyx_t_12);
- __Pyx_RaiseBufferFallbackError();
- } else {
- PyErr_Restore(__pyx_t_10, __pyx_t_11, __pyx_t_12);
- }
- }
- __pyx_pybuffernd_gln_nij.diminfo[0].strides = __pyx_pybuffernd_gln_nij.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_gln_nij.diminfo[0].shape = __pyx_pybuffernd_gln_nij.rcbuffer->pybuffer.shape[0];
- if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 52, __pyx_L1_error)
- }
- __pyx_t_18 = 0;
- __pyx_v_gln_nij = ((PyArrayObject *)__pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":54
- * gln_nij = gammaln(nijs + 1)
- * # start and end values for nij terms for each summation.
- * start = np.array([[v - N + w for w in b] for v in a], dtype='int') # <<<<<<<<<<<<<<
- * start = np.maximum(start, 1)
- * end = np.minimum(np.resize(a, (C, R)).T, np.resize(b, (R, C))) + 1
- */
- __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_array); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_4 = PyList_New(0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- if (likely(PyList_CheckExact(((PyObject *)__pyx_v_a))) || PyTuple_CheckExact(((PyObject *)__pyx_v_a))) {
- __pyx_t_1 = ((PyObject *)__pyx_v_a); __Pyx_INCREF(__pyx_t_1); __pyx_t_20 = 0;
- __pyx_t_21 = NULL;
- } else {
- __pyx_t_20 = -1; __pyx_t_1 = PyObject_GetIter(((PyObject *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_t_21 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_21)) __PYX_ERR(0, 54, __pyx_L1_error)
- }
- for (;;) {
- if (likely(!__pyx_t_21)) {
- if (likely(PyList_CheckExact(__pyx_t_1))) {
- if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_1)) break;
- #if CYTHON_COMPILING_IN_CPYTHON
- __pyx_t_13 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 54, __pyx_L1_error)
- #else
- __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- #endif
- } else {
- if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
- #if CYTHON_COMPILING_IN_CPYTHON
- __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_20); __Pyx_INCREF(__pyx_t_13); __pyx_t_20++; if (unlikely(0 < 0)) __PYX_ERR(0, 54, __pyx_L1_error)
- #else
- __pyx_t_13 = PySequence_ITEM(__pyx_t_1, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- #endif
- }
- } else {
- __pyx_t_13 = __pyx_t_21(__pyx_t_1);
- if (unlikely(!__pyx_t_13)) {
- PyObject* exc_type = PyErr_Occurred();
- if (exc_type) {
- if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
- else __PYX_ERR(0, 54, __pyx_L1_error)
- }
- break;
- }
- __Pyx_GOTREF(__pyx_t_13);
- }
- __Pyx_XDECREF_SET(__pyx_v_v, __pyx_t_13);
- __pyx_t_13 = 0;
- __pyx_t_13 = PyList_New(0); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- if (likely(PyList_CheckExact(((PyObject *)__pyx_v_b))) || PyTuple_CheckExact(((PyObject *)__pyx_v_b))) {
- __pyx_t_2 = ((PyObject *)__pyx_v_b); __Pyx_INCREF(__pyx_t_2); __pyx_t_22 = 0;
- __pyx_t_23 = NULL;
- } else {
- __pyx_t_22 = -1; __pyx_t_2 = PyObject_GetIter(((PyObject *)__pyx_v_b)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_t_23 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_23)) __PYX_ERR(0, 54, __pyx_L1_error)
- }
- for (;;) {
- if (likely(!__pyx_t_23)) {
- if (likely(PyList_CheckExact(__pyx_t_2))) {
- if (__pyx_t_22 >= PyList_GET_SIZE(__pyx_t_2)) break;
- #if CYTHON_COMPILING_IN_CPYTHON
- __pyx_t_8 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_22); __Pyx_INCREF(__pyx_t_8); __pyx_t_22++; if (unlikely(0 < 0)) __PYX_ERR(0, 54, __pyx_L1_error)
- #else
- __pyx_t_8 = PySequence_ITEM(__pyx_t_2, __pyx_t_22); __pyx_t_22++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- #endif
- } else {
- if (__pyx_t_22 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
- #if CYTHON_COMPILING_IN_CPYTHON
- __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_22); __Pyx_INCREF(__pyx_t_8); __pyx_t_22++; if (unlikely(0 < 0)) __PYX_ERR(0, 54, __pyx_L1_error)
- #else
- __pyx_t_8 = PySequence_ITEM(__pyx_t_2, __pyx_t_22); __pyx_t_22++; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- #endif
- }
- } else {
- __pyx_t_8 = __pyx_t_23(__pyx_t_2);
- if (unlikely(!__pyx_t_8)) {
- PyObject* exc_type = PyErr_Occurred();
- if (exc_type) {
- if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
- else __PYX_ERR(0, 54, __pyx_L1_error)
- }
- break;
- }
- __Pyx_GOTREF(__pyx_t_8);
- }
- __Pyx_XDECREF_SET(__pyx_v_w, __pyx_t_8);
- __pyx_t_8 = 0;
- __pyx_t_8 = PyFloat_FromDouble(__pyx_v_N); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __pyx_t_24 = PyNumber_Subtract(__pyx_v_v, __pyx_t_8); if (unlikely(!__pyx_t_24)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_24);
- __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
- __pyx_t_8 = PyNumber_Add(__pyx_t_24, __pyx_v_w); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __Pyx_DECREF(__pyx_t_24); __pyx_t_24 = 0;
- if (unlikely(__Pyx_ListComp_Append(__pyx_t_13, (PyObject*)__pyx_t_8))) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
- }
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- if (unlikely(__Pyx_ListComp_Append(__pyx_t_4, (PyObject*)__pyx_t_13))) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- }
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_GIVEREF(__pyx_t_4);
- PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_4);
- __pyx_t_4 = 0;
- __pyx_t_4 = PyDict_New(); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- if (PyDict_SetItem(__pyx_t_4, __pyx_n_s_dtype, __pyx_n_s_int) < 0) __PYX_ERR(0, 54, __pyx_L1_error)
- __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 54, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_v_start = __pyx_t_13;
- __pyx_t_13 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":55
- * # start and end values for nij terms for each summation.
- * start = np.array([[v - N + w for w in b] for v in a], dtype='int')
- * start = np.maximum(start, 1) # <<<<<<<<<<<<<<
- * end = np.minimum(np.resize(a, (C, R)).T, np.resize(b, (R, C))) + 1
- * # emi itself is a summation over the various values.
- */
- __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 55, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_maximum); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 55, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_4 = NULL;
- __pyx_t_20 = 0;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_1))) {
- __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_1);
- if (likely(__pyx_t_4)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
- __Pyx_INCREF(__pyx_t_4);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_1, function);
- __pyx_t_20 = 1;
- }
- }
- __pyx_t_3 = PyTuple_New(2+__pyx_t_20); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 55, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- if (__pyx_t_4) {
- __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4); __pyx_t_4 = NULL;
- }
- __Pyx_INCREF(__pyx_v_start);
- __Pyx_GIVEREF(__pyx_v_start);
- PyTuple_SET_ITEM(__pyx_t_3, 0+__pyx_t_20, __pyx_v_start);
- __Pyx_INCREF(__pyx_int_1);
- __Pyx_GIVEREF(__pyx_int_1);
- PyTuple_SET_ITEM(__pyx_t_3, 1+__pyx_t_20, __pyx_int_1);
- __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_3, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 55, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __Pyx_DECREF_SET(__pyx_v_start, __pyx_t_13);
- __pyx_t_13 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":56
- * start = np.array([[v - N + w for w in b] for v in a], dtype='int')
- * start = np.maximum(start, 1)
- * end = np.minimum(np.resize(a, (C, R)).T, np.resize(b, (R, C))) + 1 # <<<<<<<<<<<<<<
- * # emi itself is a summation over the various values.
- * emi = 0
- */
- __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_minimum); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_resize); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_C); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_R); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __pyx_t_24 = PyTuple_New(2); if (unlikely(!__pyx_t_24)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_24);
- __Pyx_GIVEREF(__pyx_t_4);
- PyTuple_SET_ITEM(__pyx_t_24, 0, __pyx_t_4);
- __Pyx_GIVEREF(__pyx_t_8);
- PyTuple_SET_ITEM(__pyx_t_24, 1, __pyx_t_8);
- __pyx_t_4 = 0;
- __pyx_t_8 = 0;
- __pyx_t_8 = NULL;
- __pyx_t_20 = 0;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) {
- __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_2);
- if (likely(__pyx_t_8)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
- __Pyx_INCREF(__pyx_t_8);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_2, function);
- __pyx_t_20 = 1;
- }
- }
- __pyx_t_4 = PyTuple_New(2+__pyx_t_20); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- if (__pyx_t_8) {
- __Pyx_GIVEREF(__pyx_t_8); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_8); __pyx_t_8 = NULL;
- }
- __Pyx_INCREF(((PyObject *)__pyx_v_a));
- __Pyx_GIVEREF(((PyObject *)__pyx_v_a));
- PyTuple_SET_ITEM(__pyx_t_4, 0+__pyx_t_20, ((PyObject *)__pyx_v_a));
- __Pyx_GIVEREF(__pyx_t_24);
- PyTuple_SET_ITEM(__pyx_t_4, 1+__pyx_t_20, __pyx_t_24);
- __pyx_t_24 = 0;
- __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
- __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_T); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
- __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_24 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_resize); if (unlikely(!__pyx_t_24)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_24);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_R); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_C); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_8);
- __pyx_t_25 = PyTuple_New(2); if (unlikely(!__pyx_t_25)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_25);
- __Pyx_GIVEREF(__pyx_t_4);
- PyTuple_SET_ITEM(__pyx_t_25, 0, __pyx_t_4);
- __Pyx_GIVEREF(__pyx_t_8);
- PyTuple_SET_ITEM(__pyx_t_25, 1, __pyx_t_8);
- __pyx_t_4 = 0;
- __pyx_t_8 = 0;
- __pyx_t_8 = NULL;
- __pyx_t_20 = 0;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_24))) {
- __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_24);
- if (likely(__pyx_t_8)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_24);
- __Pyx_INCREF(__pyx_t_8);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_24, function);
- __pyx_t_20 = 1;
- }
- }
- __pyx_t_4 = PyTuple_New(2+__pyx_t_20); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- if (__pyx_t_8) {
- __Pyx_GIVEREF(__pyx_t_8); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_8); __pyx_t_8 = NULL;
- }
- __Pyx_INCREF(((PyObject *)__pyx_v_b));
- __Pyx_GIVEREF(((PyObject *)__pyx_v_b));
- PyTuple_SET_ITEM(__pyx_t_4, 0+__pyx_t_20, ((PyObject *)__pyx_v_b));
- __Pyx_GIVEREF(__pyx_t_25);
- PyTuple_SET_ITEM(__pyx_t_4, 1+__pyx_t_20, __pyx_t_25);
- __pyx_t_25 = 0;
- __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_24, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __Pyx_DECREF(__pyx_t_24); __pyx_t_24 = 0;
- __pyx_t_24 = NULL;
- __pyx_t_20 = 0;
- if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_3))) {
- __pyx_t_24 = PyMethod_GET_SELF(__pyx_t_3);
- if (likely(__pyx_t_24)) {
- PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_24);
- __Pyx_INCREF(function);
- __Pyx_DECREF_SET(__pyx_t_3, function);
- __pyx_t_20 = 1;
- }
- }
- __pyx_t_4 = PyTuple_New(2+__pyx_t_20); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- if (__pyx_t_24) {
- __Pyx_GIVEREF(__pyx_t_24); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_24); __pyx_t_24 = NULL;
- }
- __Pyx_GIVEREF(__pyx_t_2);
- PyTuple_SET_ITEM(__pyx_t_4, 0+__pyx_t_20, __pyx_t_2);
- __Pyx_GIVEREF(__pyx_t_1);
- PyTuple_SET_ITEM(__pyx_t_4, 1+__pyx_t_20, __pyx_t_1);
- __pyx_t_2 = 0;
- __pyx_t_1 = 0;
- __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_4, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_3 = __Pyx_PyInt_AddObjC(__pyx_t_13, __pyx_int_1, 1, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 56, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- __pyx_v_end = __pyx_t_3;
- __pyx_t_3 = 0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":58
- * end = np.minimum(np.resize(a, (C, R)).T, np.resize(b, (R, C))) + 1
- * # emi itself is a summation over the various values.
- * emi = 0 # <<<<<<<<<<<<<<
- * cdef Py_ssize_t i, j, nij
- * for i in range(R):
- */
- __pyx_v_emi = 0.0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":60
- * emi = 0
- * cdef Py_ssize_t i, j, nij
- * for i in range(R): # <<<<<<<<<<<<<<
- * for j in range(C):
- * for nij in range(start[i,j], end[i,j]):
- */
- __pyx_t_7 = __pyx_v_R;
- for (__pyx_t_20 = 0; __pyx_t_20 < __pyx_t_7; __pyx_t_20+=1) {
- __pyx_v_i = __pyx_t_20;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":61
- * cdef Py_ssize_t i, j, nij
- * for i in range(R):
- * for j in range(C): # <<<<<<<<<<<<<<
- * for nij in range(start[i,j], end[i,j]):
- * term2 = log_Nnij[nij] - log_ab_outer[i,j]
- */
- __pyx_t_6 = __pyx_v_C;
- for (__pyx_t_22 = 0; __pyx_t_22 < __pyx_t_6; __pyx_t_22+=1) {
- __pyx_v_j = __pyx_t_22;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":62
- * for i in range(R):
- * for j in range(C):
- * for nij in range(start[i,j], end[i,j]): # <<<<<<<<<<<<<<
- * term2 = log_Nnij[nij] - log_ab_outer[i,j]
- * # Numerators are positive, denominators are negative.
- */
- __pyx_t_3 = PyInt_FromSsize_t(__pyx_v_i); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_13 = PyInt_FromSsize_t(__pyx_v_j); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_GIVEREF(__pyx_t_3);
- PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3);
- __Pyx_GIVEREF(__pyx_t_13);
- PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_13);
- __pyx_t_3 = 0;
- __pyx_t_13 = 0;
- __pyx_t_13 = PyObject_GetItem(__pyx_v_end, __pyx_t_4); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_26 = __Pyx_PyInt_As_long(__pyx_t_13); if (unlikely((__pyx_t_26 == (long)-1) && PyErr_Occurred())) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0;
- __pyx_t_13 = PyInt_FromSsize_t(__pyx_v_i); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_13);
- __pyx_t_4 = PyInt_FromSsize_t(__pyx_v_j); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_GIVEREF(__pyx_t_13);
- PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_13);
- __Pyx_GIVEREF(__pyx_t_4);
- PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_4);
- __pyx_t_13 = 0;
- __pyx_t_4 = 0;
- __pyx_t_4 = PyObject_GetItem(__pyx_v_start, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_27 = __Pyx_PyInt_As_long(__pyx_t_4); if (unlikely((__pyx_t_27 == (long)-1) && PyErr_Occurred())) __PYX_ERR(0, 62, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- for (__pyx_t_28 = __pyx_t_27; __pyx_t_28 < __pyx_t_26; __pyx_t_28+=1) {
- __pyx_v_nij = __pyx_t_28;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":63
- * for j in range(C):
- * for nij in range(start[i,j], end[i,j]):
- * term2 = log_Nnij[nij] - log_ab_outer[i,j] # <<<<<<<<<<<<<<
- * # Numerators are positive, denominators are negative.
- * gln = (gln_a[i] + gln_b[j] + gln_Na[i] + gln_Nb[j]
- */
- __pyx_t_29 = __pyx_v_nij;
- __pyx_t_30 = __pyx_v_i;
- __pyx_t_31 = __pyx_v_j;
- __pyx_v_term2 = ((*__Pyx_BufPtrStrided1d(double *, __pyx_pybuffernd_log_Nnij.rcbuffer->pybuffer.buf, __pyx_t_29, __pyx_pybuffernd_log_Nnij.diminfo[0].strides)) - (*__Pyx_BufPtrStrided2d(double *, __pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer.buf, __pyx_t_30, __pyx_pybuffernd_log_ab_outer.diminfo[0].strides, __pyx_t_31, __pyx_pybuffernd_log_ab_outer.diminfo[1].strides)));
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":65
- * term2 = log_Nnij[nij] - log_ab_outer[i,j]
- * # Numerators are positive, denominators are negative.
- * gln = (gln_a[i] + gln_b[j] + gln_Na[i] + gln_Nb[j] # <<<<<<<<<<<<<<
- * - gln_N - gln_nij[nij] - lgamma(a[i] - nij + 1)
- * - lgamma(b[j] - nij + 1)
- */
- __pyx_t_32 = __pyx_v_i;
- __pyx_t_33 = __pyx_v_j;
- __pyx_t_34 = __pyx_v_i;
- __pyx_t_35 = __pyx_v_j;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":66
- * # Numerators are positive, denominators are negative.
- * gln = (gln_a[i] + gln_b[j] + gln_Na[i] + gln_Nb[j]
- * - gln_N - gln_nij[nij] - lgamma(a[i] - nij + 1) # <<<<<<<<<<<<<<
- * - lgamma(b[j] - nij + 1)
- * - lgamma(N - a[i] - b[j] + nij + 1))
- */
- __pyx_t_36 = __pyx_v_nij;
- __pyx_t_37 = __pyx_v_i;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":67
- * gln = (gln_a[i] + gln_b[j] + gln_Na[i] + gln_Nb[j]
- * - gln_N - gln_nij[nij] - lgamma(a[i] - nij + 1)
- * - lgamma(b[j] - nij + 1) # <<<<<<<<<<<<<<
- * - lgamma(N - a[i] - b[j] + nij + 1))
- * term3 = exp(gln)
- */
- __pyx_t_38 = __pyx_v_j;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":68
- * - gln_N - gln_nij[nij] - lgamma(a[i] - nij + 1)
- * - lgamma(b[j] - nij + 1)
- * - lgamma(N - a[i] - b[j] + nij + 1)) # <<<<<<<<<<<<<<
- * term3 = exp(gln)
- * emi += (term1[nij] * term2 * term3)
- */
- __pyx_t_39 = __pyx_v_i;
- __pyx_t_40 = __pyx_v_j;
- __pyx_v_gln = (((((((((*__Pyx_BufPtrStrided1d(double *, __pyx_pybuffernd_gln_a.rcbuffer->pybuffer.buf, __pyx_t_32, __pyx_pybuffernd_gln_a.diminfo[0].strides)) + (*__Pyx_BufPtrStrided1d(double *, __pyx_pybuffernd_gln_b.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_gln_b.diminfo[0].strides))) + (*__Pyx_BufPtrStrided1d(double *, __pyx_pybuffernd_gln_Na.rcbuffer->pybuffer.buf, __pyx_t_34, __pyx_pybuffernd_gln_Na.diminfo[0].strides))) + (*__Pyx_BufPtrStrided1d(double *, __pyx_pybuffernd_gln_Nb.rcbuffer->pybuffer.buf, __pyx_t_35, __pyx_pybuffernd_gln_Nb.diminfo[0].strides))) - __pyx_v_gln_N) - (*__Pyx_BufPtrStrided1d(double *, __pyx_pybuffernd_gln_nij.rcbuffer->pybuffer.buf, __pyx_t_36, __pyx_pybuffernd_gln_nij.diminfo[0].strides))) - __pyx_f_7sklearn_5utils_6lgamma_lgamma((((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_a.rcbuffer->pybuffer.buf, __pyx_t_37, __pyx_pybuffernd_a.diminfo[0].strides)) - __pyx_v_nij) + 1))) - __pyx_f_7sklearn_5utils_6lgamma_lgamma((((*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_b.rcbuffer->pybuffer.buf, __pyx_t_38, __pyx_pybuffernd_b.diminfo[0].strides)) - __pyx_v_nij) + 1))) - __pyx_f_7sklearn_5utils_6lgamma_lgamma(((((__pyx_v_N - (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_a.rcbuffer->pybuffer.buf, __pyx_t_39, __pyx_pybuffernd_a.diminfo[0].strides))) - (*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_b.rcbuffer->pybuffer.buf, __pyx_t_40, __pyx_pybuffernd_b.diminfo[0].strides))) + __pyx_v_nij) + 1.0)));
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":69
- * - lgamma(b[j] - nij + 1)
- * - lgamma(N - a[i] - b[j] + nij + 1))
- * term3 = exp(gln) # <<<<<<<<<<<<<<
- * emi += (term1[nij] * term2 * term3)
- * return emi
- */
- __pyx_v_term3 = exp(__pyx_v_gln);
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":70
- * - lgamma(N - a[i] - b[j] + nij + 1))
- * term3 = exp(gln)
- * emi += (term1[nij] * term2 * term3) # <<<<<<<<<<<<<<
- * return emi
- */
- __pyx_t_41 = __pyx_v_nij;
- __pyx_v_emi = (__pyx_v_emi + (((*__Pyx_BufPtrStrided1d(double *, __pyx_pybuffernd_term1.rcbuffer->pybuffer.buf, __pyx_t_41, __pyx_pybuffernd_term1.diminfo[0].strides)) * __pyx_v_term2) * __pyx_v_term3));
- }
- }
- }
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":71
- * term3 = exp(gln)
- * emi += (term1[nij] * term2 * term3)
- * return emi # <<<<<<<<<<<<<<
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_4 = PyFloat_FromDouble(__pyx_v_emi); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 71, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_r = __pyx_t_4;
- __pyx_t_4 = 0;
- goto __pyx_L0;
-
- /* "sklearn/metrics/cluster/expected_mutual_info_fast.pyx":20
- * @cython.boundscheck(False)
- * @cython.wraparound(False)
- * def expected_mutual_information(contingency, int n_samples): # <<<<<<<<<<<<<<
- * """Calculate the expected mutual information for two labelings."""
- * cdef int R, C
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_XDECREF(__pyx_t_2);
- __Pyx_XDECREF(__pyx_t_3);
- __Pyx_XDECREF(__pyx_t_4);
- __Pyx_XDECREF(__pyx_t_8);
- __Pyx_XDECREF(__pyx_t_13);
- __Pyx_XDECREF(__pyx_t_24);
- __Pyx_XDECREF(__pyx_t_25);
- { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
- __Pyx_PyThreadState_declare
- __Pyx_PyThreadState_assign
- __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_a.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_b.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_Na.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_Nb.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_a.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_b.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_nij.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_log_Nnij.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_nijs.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_term1.rcbuffer->pybuffer);
- __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
- __Pyx_AddTraceback("sklearn.metrics.cluster.expected_mutual_info_fast.expected_mutual_information", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = NULL;
- goto __pyx_L2;
- __pyx_L0:;
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_a.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_b.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_Na.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_Nb.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_a.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_b.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_gln_nij.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_log_Nnij.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_log_ab_outer.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_nijs.rcbuffer->pybuffer);
- __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_term1.rcbuffer->pybuffer);
- __pyx_L2:;
- __Pyx_XDECREF((PyObject *)__pyx_v_gln_a);
- __Pyx_XDECREF((PyObject *)__pyx_v_gln_b);
- __Pyx_XDECREF((PyObject *)__pyx_v_gln_Na);
- __Pyx_XDECREF((PyObject *)__pyx_v_gln_Nb);
- __Pyx_XDECREF((PyObject *)__pyx_v_gln_nij);
- __Pyx_XDECREF((PyObject *)__pyx_v_log_Nnij);
- __Pyx_XDECREF((PyObject *)__pyx_v_nijs);
- __Pyx_XDECREF((PyObject *)__pyx_v_term1);
- __Pyx_XDECREF((PyObject *)__pyx_v_log_ab_outer);
- __Pyx_XDECREF((PyObject *)__pyx_v_a);
- __Pyx_XDECREF((PyObject *)__pyx_v_b);
- __Pyx_XDECREF(__pyx_v_start);
- __Pyx_XDECREF(__pyx_v_end);
- __Pyx_XDECREF(__pyx_v_v);
- __Pyx_XDECREF(__pyx_v_w);
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":197
- * # experimental exception made for __getbuffer__ and __releasebuffer__
- * # -- the details of this may change.
- * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<<
- * # This implementation of getbuffer is geared towards Cython
- * # requirements, and does not yet fullfill the PEP.
- */
-
-/* Python wrapper */
-static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
-static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
- int __pyx_r;
- __Pyx_RefNannyDeclarations
- __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0);
- __pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
-
- /* function exit code */
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
- int __pyx_v_copy_shape;
- int __pyx_v_i;
- int __pyx_v_ndim;
- int __pyx_v_endian_detector;
- int __pyx_v_little_endian;
- int __pyx_v_t;
- char *__pyx_v_f;
- PyArray_Descr *__pyx_v_descr = 0;
- int __pyx_v_offset;
- int __pyx_v_hasfields;
- int __pyx_r;
- __Pyx_RefNannyDeclarations
- int __pyx_t_1;
- int __pyx_t_2;
- PyObject *__pyx_t_3 = NULL;
- int __pyx_t_4;
- int __pyx_t_5;
- PyObject *__pyx_t_6 = NULL;
- char *__pyx_t_7;
- __Pyx_RefNannySetupContext("__getbuffer__", 0);
- if (__pyx_v_info != NULL) {
- __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None);
- __Pyx_GIVEREF(__pyx_v_info->obj);
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":203
- * # of flags
- *
- * if info == NULL: return # <<<<<<<<<<<<<<
- *
- * cdef int copy_shape, i, ndim
- */
- __pyx_t_1 = ((__pyx_v_info == NULL) != 0);
- if (__pyx_t_1) {
- __pyx_r = 0;
- goto __pyx_L0;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":206
- *
- * cdef int copy_shape, i, ndim
- * cdef int endian_detector = 1 # <<<<<<<<<<<<<<
- * cdef bint little_endian = ((&endian_detector)[0] != 0)
- *
- */
- __pyx_v_endian_detector = 1;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":207
- * cdef int copy_shape, i, ndim
- * cdef int endian_detector = 1
- * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<<
- *
- * ndim = PyArray_NDIM(self)
- */
- __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":209
- * cdef bint little_endian = ((&endian_detector)[0] != 0)
- *
- * ndim = PyArray_NDIM(self) # <<<<<<<<<<<<<<
- *
- * if sizeof(npy_intp) != sizeof(Py_ssize_t):
- */
- __pyx_v_ndim = PyArray_NDIM(__pyx_v_self);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":211
- * ndim = PyArray_NDIM(self)
- *
- * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<<
- * copy_shape = 1
- * else:
- */
- __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0);
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":212
- *
- * if sizeof(npy_intp) != sizeof(Py_ssize_t):
- * copy_shape = 1 # <<<<<<<<<<<<<<
- * else:
- * copy_shape = 0
- */
- __pyx_v_copy_shape = 1;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":211
- * ndim = PyArray_NDIM(self)
- *
- * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<<
- * copy_shape = 1
- * else:
- */
- goto __pyx_L4;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":214
- * copy_shape = 1
- * else:
- * copy_shape = 0 # <<<<<<<<<<<<<<
- *
- * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
- */
- /*else*/ {
- __pyx_v_copy_shape = 0;
- }
- __pyx_L4:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":216
- * copy_shape = 0
- *
- * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<<
- * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
- * raise ValueError(u"ndarray is not C contiguous")
- */
- __pyx_t_2 = (((__pyx_v_flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS) != 0);
- if (__pyx_t_2) {
- } else {
- __pyx_t_1 = __pyx_t_2;
- goto __pyx_L6_bool_binop_done;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":217
- *
- * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
- * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): # <<<<<<<<<<<<<<
- * raise ValueError(u"ndarray is not C contiguous")
- *
- */
- __pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_C_CONTIGUOUS) != 0)) != 0);
- __pyx_t_1 = __pyx_t_2;
- __pyx_L6_bool_binop_done:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":216
- * copy_shape = 0
- *
- * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<<
- * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
- * raise ValueError(u"ndarray is not C contiguous")
- */
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":218
- * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
- * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
- * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<<
- *
- * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
- */
- __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 218, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_Raise(__pyx_t_3, 0, 0, 0);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __PYX_ERR(1, 218, __pyx_L1_error)
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":216
- * copy_shape = 0
- *
- * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<<
- * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
- * raise ValueError(u"ndarray is not C contiguous")
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":220
- * raise ValueError(u"ndarray is not C contiguous")
- *
- * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<<
- * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
- * raise ValueError(u"ndarray is not Fortran contiguous")
- */
- __pyx_t_2 = (((__pyx_v_flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) != 0);
- if (__pyx_t_2) {
- } else {
- __pyx_t_1 = __pyx_t_2;
- goto __pyx_L9_bool_binop_done;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":221
- *
- * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
- * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): # <<<<<<<<<<<<<<
- * raise ValueError(u"ndarray is not Fortran contiguous")
- *
- */
- __pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_F_CONTIGUOUS) != 0)) != 0);
- __pyx_t_1 = __pyx_t_2;
- __pyx_L9_bool_binop_done:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":220
- * raise ValueError(u"ndarray is not C contiguous")
- *
- * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<<
- * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
- * raise ValueError(u"ndarray is not Fortran contiguous")
- */
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":222
- * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
- * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
- * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<<
- *
- * info.buf = PyArray_DATA(self)
- */
- __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 222, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_Raise(__pyx_t_3, 0, 0, 0);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __PYX_ERR(1, 222, __pyx_L1_error)
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":220
- * raise ValueError(u"ndarray is not C contiguous")
- *
- * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<<
- * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
- * raise ValueError(u"ndarray is not Fortran contiguous")
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":224
- * raise ValueError(u"ndarray is not Fortran contiguous")
- *
- * info.buf = PyArray_DATA(self) # <<<<<<<<<<<<<<
- * info.ndim = ndim
- * if copy_shape:
- */
- __pyx_v_info->buf = PyArray_DATA(__pyx_v_self);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":225
- *
- * info.buf = PyArray_DATA(self)
- * info.ndim = ndim # <<<<<<<<<<<<<<
- * if copy_shape:
- * # Allocate new buffer for strides and shape info.
- */
- __pyx_v_info->ndim = __pyx_v_ndim;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":226
- * info.buf = PyArray_DATA(self)
- * info.ndim = ndim
- * if copy_shape: # <<<<<<<<<<<<<<
- * # Allocate new buffer for strides and shape info.
- * # This is allocated as one block, strides first.
- */
- __pyx_t_1 = (__pyx_v_copy_shape != 0);
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":229
- * # Allocate new buffer for strides and shape info.
- * # This is allocated as one block, strides first.
- * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) # <<<<<<<<<<<<<<
- * info.shape = info.strides + ndim
- * for i in range(ndim):
- */
- __pyx_v_info->strides = ((Py_ssize_t *)malloc((((sizeof(Py_ssize_t)) * ((size_t)__pyx_v_ndim)) * 2)));
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":230
- * # This is allocated as one block, strides first.
- * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2)
- * info.shape = info.strides + ndim # <<<<<<<<<<<<<<
- * for i in range(ndim):
- * info.strides[i] = PyArray_STRIDES(self)[i]
- */
- __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":231
- * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2)
- * info.shape = info.strides + ndim
- * for i in range(ndim): # <<<<<<<<<<<<<<
- * info.strides[i] = PyArray_STRIDES(self)[i]
- * info.shape[i] = PyArray_DIMS(self)[i]
- */
- __pyx_t_4 = __pyx_v_ndim;
- for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) {
- __pyx_v_i = __pyx_t_5;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":232
- * info.shape = info.strides + ndim
- * for i in range(ndim):
- * info.strides[i] = PyArray_STRIDES(self)[i] # <<<<<<<<<<<<<<
- * info.shape[i] = PyArray_DIMS(self)[i]
- * else:
- */
- (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":233
- * for i in range(ndim):
- * info.strides[i] = PyArray_STRIDES(self)[i]
- * info.shape[i] = PyArray_DIMS(self)[i] # <<<<<<<<<<<<<<
- * else:
- * info.strides = PyArray_STRIDES(self)
- */
- (__pyx_v_info->shape[__pyx_v_i]) = (PyArray_DIMS(__pyx_v_self)[__pyx_v_i]);
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":226
- * info.buf = PyArray_DATA(self)
- * info.ndim = ndim
- * if copy_shape: # <<<<<<<<<<<<<<
- * # Allocate new buffer for strides and shape info.
- * # This is allocated as one block, strides first.
- */
- goto __pyx_L11;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":235
- * info.shape[i] = PyArray_DIMS(self)[i]
- * else:
- * info.strides = PyArray_STRIDES(self) # <<<<<<<<<<<<<<
- * info.shape = PyArray_DIMS(self)
- * info.suboffsets = NULL
- */
- /*else*/ {
- __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self));
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":236
- * else:
- * info.strides = PyArray_STRIDES(self)
- * info.shape = PyArray_DIMS(self) # <<<<<<<<<<<<<<
- * info.suboffsets = NULL
- * info.itemsize = PyArray_ITEMSIZE(self)
- */
- __pyx_v_info->shape = ((Py_ssize_t *)PyArray_DIMS(__pyx_v_self));
- }
- __pyx_L11:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":237
- * info.strides = PyArray_STRIDES(self)
- * info.shape = PyArray_DIMS(self)
- * info.suboffsets = NULL # <<<<<<<<<<<<<<
- * info.itemsize = PyArray_ITEMSIZE(self)
- * info.readonly = not PyArray_ISWRITEABLE(self)
- */
- __pyx_v_info->suboffsets = NULL;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":238
- * info.shape = PyArray_DIMS(self)
- * info.suboffsets = NULL
- * info.itemsize = PyArray_ITEMSIZE(self) # <<<<<<<<<<<<<<
- * info.readonly = not PyArray_ISWRITEABLE(self)
- *
- */
- __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":239
- * info.suboffsets = NULL
- * info.itemsize = PyArray_ITEMSIZE(self)
- * info.readonly = not PyArray_ISWRITEABLE(self) # <<<<<<<<<<<<<<
- *
- * cdef int t
- */
- __pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0));
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":242
- *
- * cdef int t
- * cdef char* f = NULL # <<<<<<<<<<<<<<
- * cdef dtype descr = self.descr
- * cdef int offset
- */
- __pyx_v_f = NULL;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":243
- * cdef int t
- * cdef char* f = NULL
- * cdef dtype descr = self.descr # <<<<<<<<<<<<<<
- * cdef int offset
- *
- */
- __pyx_t_3 = ((PyObject *)__pyx_v_self->descr);
- __Pyx_INCREF(__pyx_t_3);
- __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3);
- __pyx_t_3 = 0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":246
- * cdef int offset
- *
- * cdef bint hasfields = PyDataType_HASFIELDS(descr) # <<<<<<<<<<<<<<
- *
- * if not hasfields and not copy_shape:
- */
- __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":248
- * cdef bint hasfields = PyDataType_HASFIELDS(descr)
- *
- * if not hasfields and not copy_shape: # <<<<<<<<<<<<<<
- * # do not call releasebuffer
- * info.obj = None
- */
- __pyx_t_2 = ((!(__pyx_v_hasfields != 0)) != 0);
- if (__pyx_t_2) {
- } else {
- __pyx_t_1 = __pyx_t_2;
- goto __pyx_L15_bool_binop_done;
- }
- __pyx_t_2 = ((!(__pyx_v_copy_shape != 0)) != 0);
- __pyx_t_1 = __pyx_t_2;
- __pyx_L15_bool_binop_done:;
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":250
- * if not hasfields and not copy_shape:
- * # do not call releasebuffer
- * info.obj = None # <<<<<<<<<<<<<<
- * else:
- * # need to call releasebuffer
- */
- __Pyx_INCREF(Py_None);
- __Pyx_GIVEREF(Py_None);
- __Pyx_GOTREF(__pyx_v_info->obj);
- __Pyx_DECREF(__pyx_v_info->obj);
- __pyx_v_info->obj = Py_None;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":248
- * cdef bint hasfields = PyDataType_HASFIELDS(descr)
- *
- * if not hasfields and not copy_shape: # <<<<<<<<<<<<<<
- * # do not call releasebuffer
- * info.obj = None
- */
- goto __pyx_L14;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":253
- * else:
- * # need to call releasebuffer
- * info.obj = self # <<<<<<<<<<<<<<
- *
- * if not hasfields:
- */
- /*else*/ {
- __Pyx_INCREF(((PyObject *)__pyx_v_self));
- __Pyx_GIVEREF(((PyObject *)__pyx_v_self));
- __Pyx_GOTREF(__pyx_v_info->obj);
- __Pyx_DECREF(__pyx_v_info->obj);
- __pyx_v_info->obj = ((PyObject *)__pyx_v_self);
- }
- __pyx_L14:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":255
- * info.obj = self
- *
- * if not hasfields: # <<<<<<<<<<<<<<
- * t = descr.type_num
- * if ((descr.byteorder == c'>' and little_endian) or
- */
- __pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0);
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":256
- *
- * if not hasfields:
- * t = descr.type_num # <<<<<<<<<<<<<<
- * if ((descr.byteorder == c'>' and little_endian) or
- * (descr.byteorder == c'<' and not little_endian)):
- */
- __pyx_t_4 = __pyx_v_descr->type_num;
- __pyx_v_t = __pyx_t_4;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":257
- * if not hasfields:
- * t = descr.type_num
- * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<<
- * (descr.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported")
- */
- __pyx_t_2 = ((__pyx_v_descr->byteorder == '>') != 0);
- if (!__pyx_t_2) {
- goto __pyx_L20_next_or;
- } else {
- }
- __pyx_t_2 = (__pyx_v_little_endian != 0);
- if (!__pyx_t_2) {
- } else {
- __pyx_t_1 = __pyx_t_2;
- goto __pyx_L19_bool_binop_done;
- }
- __pyx_L20_next_or:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":258
- * t = descr.type_num
- * if ((descr.byteorder == c'>' and little_endian) or
- * (descr.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<<
- * raise ValueError(u"Non-native byte order not supported")
- * if t == NPY_BYTE: f = "b"
- */
- __pyx_t_2 = ((__pyx_v_descr->byteorder == '<') != 0);
- if (__pyx_t_2) {
- } else {
- __pyx_t_1 = __pyx_t_2;
- goto __pyx_L19_bool_binop_done;
- }
- __pyx_t_2 = ((!(__pyx_v_little_endian != 0)) != 0);
- __pyx_t_1 = __pyx_t_2;
- __pyx_L19_bool_binop_done:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":257
- * if not hasfields:
- * t = descr.type_num
- * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<<
- * (descr.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported")
- */
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":259
- * if ((descr.byteorder == c'>' and little_endian) or
- * (descr.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<<
- * if t == NPY_BYTE: f = "b"
- * elif t == NPY_UBYTE: f = "B"
- */
- __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 259, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_Raise(__pyx_t_3, 0, 0, 0);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __PYX_ERR(1, 259, __pyx_L1_error)
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":257
- * if not hasfields:
- * t = descr.type_num
- * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<<
- * (descr.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported")
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":260
- * (descr.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported")
- * if t == NPY_BYTE: f = "b" # <<<<<<<<<<<<<<
- * elif t == NPY_UBYTE: f = "B"
- * elif t == NPY_SHORT: f = "h"
- */
- switch (__pyx_v_t) {
- case NPY_BYTE:
- __pyx_v_f = ((char *)"b");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":261
- * raise ValueError(u"Non-native byte order not supported")
- * if t == NPY_BYTE: f = "b"
- * elif t == NPY_UBYTE: f = "B" # <<<<<<<<<<<<<<
- * elif t == NPY_SHORT: f = "h"
- * elif t == NPY_USHORT: f = "H"
- */
- case NPY_UBYTE:
- __pyx_v_f = ((char *)"B");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":262
- * if t == NPY_BYTE: f = "b"
- * elif t == NPY_UBYTE: f = "B"
- * elif t == NPY_SHORT: f = "h" # <<<<<<<<<<<<<<
- * elif t == NPY_USHORT: f = "H"
- * elif t == NPY_INT: f = "i"
- */
- case NPY_SHORT:
- __pyx_v_f = ((char *)"h");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":263
- * elif t == NPY_UBYTE: f = "B"
- * elif t == NPY_SHORT: f = "h"
- * elif t == NPY_USHORT: f = "H" # <<<<<<<<<<<<<<
- * elif t == NPY_INT: f = "i"
- * elif t == NPY_UINT: f = "I"
- */
- case NPY_USHORT:
- __pyx_v_f = ((char *)"H");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":264
- * elif t == NPY_SHORT: f = "h"
- * elif t == NPY_USHORT: f = "H"
- * elif t == NPY_INT: f = "i" # <<<<<<<<<<<<<<
- * elif t == NPY_UINT: f = "I"
- * elif t == NPY_LONG: f = "l"
- */
- case NPY_INT:
- __pyx_v_f = ((char *)"i");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":265
- * elif t == NPY_USHORT: f = "H"
- * elif t == NPY_INT: f = "i"
- * elif t == NPY_UINT: f = "I" # <<<<<<<<<<<<<<
- * elif t == NPY_LONG: f = "l"
- * elif t == NPY_ULONG: f = "L"
- */
- case NPY_UINT:
- __pyx_v_f = ((char *)"I");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":266
- * elif t == NPY_INT: f = "i"
- * elif t == NPY_UINT: f = "I"
- * elif t == NPY_LONG: f = "l" # <<<<<<<<<<<<<<
- * elif t == NPY_ULONG: f = "L"
- * elif t == NPY_LONGLONG: f = "q"
- */
- case NPY_LONG:
- __pyx_v_f = ((char *)"l");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":267
- * elif t == NPY_UINT: f = "I"
- * elif t == NPY_LONG: f = "l"
- * elif t == NPY_ULONG: f = "L" # <<<<<<<<<<<<<<
- * elif t == NPY_LONGLONG: f = "q"
- * elif t == NPY_ULONGLONG: f = "Q"
- */
- case NPY_ULONG:
- __pyx_v_f = ((char *)"L");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":268
- * elif t == NPY_LONG: f = "l"
- * elif t == NPY_ULONG: f = "L"
- * elif t == NPY_LONGLONG: f = "q" # <<<<<<<<<<<<<<
- * elif t == NPY_ULONGLONG: f = "Q"
- * elif t == NPY_FLOAT: f = "f"
- */
- case NPY_LONGLONG:
- __pyx_v_f = ((char *)"q");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":269
- * elif t == NPY_ULONG: f = "L"
- * elif t == NPY_LONGLONG: f = "q"
- * elif t == NPY_ULONGLONG: f = "Q" # <<<<<<<<<<<<<<
- * elif t == NPY_FLOAT: f = "f"
- * elif t == NPY_DOUBLE: f = "d"
- */
- case NPY_ULONGLONG:
- __pyx_v_f = ((char *)"Q");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":270
- * elif t == NPY_LONGLONG: f = "q"
- * elif t == NPY_ULONGLONG: f = "Q"
- * elif t == NPY_FLOAT: f = "f" # <<<<<<<<<<<<<<
- * elif t == NPY_DOUBLE: f = "d"
- * elif t == NPY_LONGDOUBLE: f = "g"
- */
- case NPY_FLOAT:
- __pyx_v_f = ((char *)"f");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":271
- * elif t == NPY_ULONGLONG: f = "Q"
- * elif t == NPY_FLOAT: f = "f"
- * elif t == NPY_DOUBLE: f = "d" # <<<<<<<<<<<<<<
- * elif t == NPY_LONGDOUBLE: f = "g"
- * elif t == NPY_CFLOAT: f = "Zf"
- */
- case NPY_DOUBLE:
- __pyx_v_f = ((char *)"d");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":272
- * elif t == NPY_FLOAT: f = "f"
- * elif t == NPY_DOUBLE: f = "d"
- * elif t == NPY_LONGDOUBLE: f = "g" # <<<<<<<<<<<<<<
- * elif t == NPY_CFLOAT: f = "Zf"
- * elif t == NPY_CDOUBLE: f = "Zd"
- */
- case NPY_LONGDOUBLE:
- __pyx_v_f = ((char *)"g");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":273
- * elif t == NPY_DOUBLE: f = "d"
- * elif t == NPY_LONGDOUBLE: f = "g"
- * elif t == NPY_CFLOAT: f = "Zf" # <<<<<<<<<<<<<<
- * elif t == NPY_CDOUBLE: f = "Zd"
- * elif t == NPY_CLONGDOUBLE: f = "Zg"
- */
- case NPY_CFLOAT:
- __pyx_v_f = ((char *)"Zf");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":274
- * elif t == NPY_LONGDOUBLE: f = "g"
- * elif t == NPY_CFLOAT: f = "Zf"
- * elif t == NPY_CDOUBLE: f = "Zd" # <<<<<<<<<<<<<<
- * elif t == NPY_CLONGDOUBLE: f = "Zg"
- * elif t == NPY_OBJECT: f = "O"
- */
- case NPY_CDOUBLE:
- __pyx_v_f = ((char *)"Zd");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":275
- * elif t == NPY_CFLOAT: f = "Zf"
- * elif t == NPY_CDOUBLE: f = "Zd"
- * elif t == NPY_CLONGDOUBLE: f = "Zg" # <<<<<<<<<<<<<<
- * elif t == NPY_OBJECT: f = "O"
- * else:
- */
- case NPY_CLONGDOUBLE:
- __pyx_v_f = ((char *)"Zg");
- break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":276
- * elif t == NPY_CDOUBLE: f = "Zd"
- * elif t == NPY_CLONGDOUBLE: f = "Zg"
- * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<<
- * else:
- * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
- */
- case NPY_OBJECT:
- __pyx_v_f = ((char *)"O");
- break;
- default:
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":278
- * elif t == NPY_OBJECT: f = "O"
- * else:
- * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<<
- * info.format = f
- * return
- */
- __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_t); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 278, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_6 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_t_3); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 278, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_6);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 278, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_GIVEREF(__pyx_t_6);
- PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_6);
- __pyx_t_6 = 0;
- __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 278, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_6);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __Pyx_Raise(__pyx_t_6, 0, 0, 0);
- __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
- __PYX_ERR(1, 278, __pyx_L1_error)
- break;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":279
- * else:
- * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
- * info.format = f # <<<<<<<<<<<<<<
- * return
- * else:
- */
- __pyx_v_info->format = __pyx_v_f;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":280
- * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
- * info.format = f
- * return # <<<<<<<<<<<<<<
- * else:
- * info.format = stdlib.malloc(_buffer_format_string_len)
- */
- __pyx_r = 0;
- goto __pyx_L0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":255
- * info.obj = self
- *
- * if not hasfields: # <<<<<<<<<<<<<<
- * t = descr.type_num
- * if ((descr.byteorder == c'>' and little_endian) or
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":282
- * return
- * else:
- * info.format = stdlib.malloc(_buffer_format_string_len) # <<<<<<<<<<<<<<
- * info.format[0] = c'^' # Native data types, manual alignment
- * offset = 0
- */
- /*else*/ {
- __pyx_v_info->format = ((char *)malloc(0xFF));
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":283
- * else:
- * info.format = stdlib.malloc(_buffer_format_string_len)
- * info.format[0] = c'^' # Native data types, manual alignment # <<<<<<<<<<<<<<
- * offset = 0
- * f = _util_dtypestring(descr, info.format + 1,
- */
- (__pyx_v_info->format[0]) = '^';
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":284
- * info.format = stdlib.malloc(_buffer_format_string_len)
- * info.format[0] = c'^' # Native data types, manual alignment
- * offset = 0 # <<<<<<<<<<<<<<
- * f = _util_dtypestring(descr, info.format + 1,
- * info.format + _buffer_format_string_len,
- */
- __pyx_v_offset = 0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":285
- * info.format[0] = c'^' # Native data types, manual alignment
- * offset = 0
- * f = _util_dtypestring(descr, info.format + 1, # <<<<<<<<<<<<<<
- * info.format + _buffer_format_string_len,
- * &offset)
- */
- __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 0xFF), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == NULL)) __PYX_ERR(1, 285, __pyx_L1_error)
- __pyx_v_f = __pyx_t_7;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":288
- * info.format + _buffer_format_string_len,
- * &offset)
- * f[0] = c'\0' # Terminate format string # <<<<<<<<<<<<<<
- *
- * def __releasebuffer__(ndarray self, Py_buffer* info):
- */
- (__pyx_v_f[0]) = '\x00';
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":197
- * # experimental exception made for __getbuffer__ and __releasebuffer__
- * # -- the details of this may change.
- * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<<
- * # This implementation of getbuffer is geared towards Cython
- * # requirements, and does not yet fullfill the PEP.
- */
-
- /* function exit code */
- __pyx_r = 0;
- goto __pyx_L0;
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_3);
- __Pyx_XDECREF(__pyx_t_6);
- __Pyx_AddTraceback("numpy.ndarray.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = -1;
- if (__pyx_v_info != NULL && __pyx_v_info->obj != NULL) {
- __Pyx_GOTREF(__pyx_v_info->obj);
- __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = NULL;
- }
- goto __pyx_L2;
- __pyx_L0:;
- if (__pyx_v_info != NULL && __pyx_v_info->obj == Py_None) {
- __Pyx_GOTREF(Py_None);
- __Pyx_DECREF(Py_None); __pyx_v_info->obj = NULL;
- }
- __pyx_L2:;
- __Pyx_XDECREF((PyObject *)__pyx_v_descr);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":290
- * f[0] = c'\0' # Terminate format string
- *
- * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<<
- * if PyArray_HASFIELDS(self):
- * stdlib.free(info.format)
- */
-
-/* Python wrapper */
-static CYTHON_UNUSED void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info); /*proto*/
-static CYTHON_UNUSED void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info) {
- __Pyx_RefNannyDeclarations
- __Pyx_RefNannySetupContext("__releasebuffer__ (wrapper)", 0);
- __pyx_pf_5numpy_7ndarray_2__releasebuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info));
-
- /* function exit code */
- __Pyx_RefNannyFinishContext();
-}
-
-static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info) {
- __Pyx_RefNannyDeclarations
- int __pyx_t_1;
- __Pyx_RefNannySetupContext("__releasebuffer__", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":291
- *
- * def __releasebuffer__(ndarray self, Py_buffer* info):
- * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<<
- * stdlib.free(info.format)
- * if sizeof(npy_intp) != sizeof(Py_ssize_t):
- */
- __pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0);
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":292
- * def __releasebuffer__(ndarray self, Py_buffer* info):
- * if PyArray_HASFIELDS(self):
- * stdlib.free(info.format) # <<<<<<<<<<<<<<
- * if sizeof(npy_intp) != sizeof(Py_ssize_t):
- * stdlib.free(info.strides)
- */
- free(__pyx_v_info->format);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":291
- *
- * def __releasebuffer__(ndarray self, Py_buffer* info):
- * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<<
- * stdlib.free(info.format)
- * if sizeof(npy_intp) != sizeof(Py_ssize_t):
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":293
- * if PyArray_HASFIELDS(self):
- * stdlib.free(info.format)
- * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<<
- * stdlib.free(info.strides)
- * # info.shape was stored after info.strides in the same block
- */
- __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0);
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":294
- * stdlib.free(info.format)
- * if sizeof(npy_intp) != sizeof(Py_ssize_t):
- * stdlib.free(info.strides) # <<<<<<<<<<<<<<
- * # info.shape was stored after info.strides in the same block
- *
- */
- free(__pyx_v_info->strides);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":293
- * if PyArray_HASFIELDS(self):
- * stdlib.free(info.format)
- * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<<
- * stdlib.free(info.strides)
- * # info.shape was stored after info.strides in the same block
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":290
- * f[0] = c'\0' # Terminate format string
- *
- * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<<
- * if PyArray_HASFIELDS(self):
- * stdlib.free(info.format)
- */
-
- /* function exit code */
- __Pyx_RefNannyFinishContext();
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":770
- * ctypedef npy_cdouble complex_t
- *
- * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(1, a)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":771
- *
- * cdef inline object PyArray_MultiIterNew1(a):
- * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew2(a, b):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 771, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":770
- * ctypedef npy_cdouble complex_t
- *
- * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(1, a)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":773
- * return PyArray_MultiIterNew(1, a)
- *
- * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(2, a, b)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":774
- *
- * cdef inline object PyArray_MultiIterNew2(a, b):
- * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew3(a, b, c):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 774, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":773
- * return PyArray_MultiIterNew(1, a)
- *
- * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(2, a, b)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":776
- * return PyArray_MultiIterNew(2, a, b)
- *
- * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(3, a, b, c)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":777
- *
- * cdef inline object PyArray_MultiIterNew3(a, b, c):
- * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 777, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":776
- * return PyArray_MultiIterNew(2, a, b)
- *
- * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(3, a, b, c)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":779
- * return PyArray_MultiIterNew(3, a, b, c)
- *
- * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(4, a, b, c, d)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":780
- *
- * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
- * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 780, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":779
- * return PyArray_MultiIterNew(3, a, b, c)
- *
- * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(4, a, b, c, d)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":782
- * return PyArray_MultiIterNew(4, a, b, c, d)
- *
- * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(5, a, b, c, d, e)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":783
- *
- * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
- * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<<
- *
- * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 783, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":782
- * return PyArray_MultiIterNew(4, a, b, c, d)
- *
- * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(5, a, b, c, d, e)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":785
- * return PyArray_MultiIterNew(5, a, b, c, d, e)
- *
- * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<<
- * # Recursive utility function used in __getbuffer__ to get format
- * # string. The new location in the format string is returned.
- */
-
-static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx_v_descr, char *__pyx_v_f, char *__pyx_v_end, int *__pyx_v_offset) {
- PyArray_Descr *__pyx_v_child = 0;
- int __pyx_v_endian_detector;
- int __pyx_v_little_endian;
- PyObject *__pyx_v_fields = 0;
- PyObject *__pyx_v_childname = NULL;
- PyObject *__pyx_v_new_offset = NULL;
- PyObject *__pyx_v_t = NULL;
- char *__pyx_r;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- Py_ssize_t __pyx_t_2;
- PyObject *__pyx_t_3 = NULL;
- PyObject *__pyx_t_4 = NULL;
- int __pyx_t_5;
- int __pyx_t_6;
- int __pyx_t_7;
- long __pyx_t_8;
- char *__pyx_t_9;
- __Pyx_RefNannySetupContext("_util_dtypestring", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":790
- *
- * cdef dtype child
- * cdef int endian_detector = 1 # <<<<<<<<<<<<<<
- * cdef bint little_endian = ((&endian_detector)[0] != 0)
- * cdef tuple fields
- */
- __pyx_v_endian_detector = 1;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":791
- * cdef dtype child
- * cdef int endian_detector = 1
- * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<<
- * cdef tuple fields
- *
- */
- __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":794
- * cdef tuple fields
- *
- * for childname in descr.names: # <<<<<<<<<<<<<<
- * fields = descr.fields[childname]
- * child, new_offset = fields
- */
- if (unlikely(__pyx_v_descr->names == Py_None)) {
- PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
- __PYX_ERR(1, 794, __pyx_L1_error)
- }
- __pyx_t_1 = __pyx_v_descr->names; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0;
- for (;;) {
- if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
- #if CYTHON_COMPILING_IN_CPYTHON
- __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(1, 794, __pyx_L1_error)
- #else
- __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 794, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- #endif
- __Pyx_XDECREF_SET(__pyx_v_childname, __pyx_t_3);
- __pyx_t_3 = 0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":795
- *
- * for childname in descr.names:
- * fields = descr.fields[childname] # <<<<<<<<<<<<<<
- * child, new_offset = fields
- *
- */
- if (unlikely(__pyx_v_descr->fields == Py_None)) {
- PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
- __PYX_ERR(1, 795, __pyx_L1_error)
- }
- __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_descr->fields, __pyx_v_childname); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 795, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- if (!(likely(PyTuple_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_t_3)->tp_name), 0))) __PYX_ERR(1, 795, __pyx_L1_error)
- __Pyx_XDECREF_SET(__pyx_v_fields, ((PyObject*)__pyx_t_3));
- __pyx_t_3 = 0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":796
- * for childname in descr.names:
- * fields = descr.fields[childname]
- * child, new_offset = fields # <<<<<<<<<<<<<<
- *
- * if (end - f) - (new_offset - offset[0]) < 15:
- */
- if (likely(__pyx_v_fields != Py_None)) {
- PyObject* sequence = __pyx_v_fields;
- #if CYTHON_COMPILING_IN_CPYTHON
- Py_ssize_t size = Py_SIZE(sequence);
- #else
- Py_ssize_t size = PySequence_Size(sequence);
- #endif
- if (unlikely(size != 2)) {
- if (size > 2) __Pyx_RaiseTooManyValuesError(2);
- else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
- __PYX_ERR(1, 796, __pyx_L1_error)
- }
- #if CYTHON_COMPILING_IN_CPYTHON
- __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0);
- __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1);
- __Pyx_INCREF(__pyx_t_3);
- __Pyx_INCREF(__pyx_t_4);
- #else
- __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 796, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 796, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- #endif
- } else {
- __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 796, __pyx_L1_error)
- }
- if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_dtype))))) __PYX_ERR(1, 796, __pyx_L1_error)
- __Pyx_XDECREF_SET(__pyx_v_child, ((PyArray_Descr *)__pyx_t_3));
- __pyx_t_3 = 0;
- __Pyx_XDECREF_SET(__pyx_v_new_offset, __pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":798
- * child, new_offset = fields
- *
- * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<<
- * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
- *
- */
- __pyx_t_4 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 798, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyNumber_Subtract(__pyx_v_new_offset, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 798, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 798, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = ((((__pyx_v_end - __pyx_v_f) - ((int)__pyx_t_5)) < 15) != 0);
- if (__pyx_t_6) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":799
- *
- * if (end - f) - (new_offset - offset[0]) < 15:
- * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<<
- *
- * if ((child.byteorder == c'>' and little_endian) or
- */
- __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 799, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_Raise(__pyx_t_3, 0, 0, 0);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __PYX_ERR(1, 799, __pyx_L1_error)
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":798
- * child, new_offset = fields
- *
- * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<<
- * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
- *
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":801
- * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
- *
- * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<<
- * (child.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported")
- */
- __pyx_t_7 = ((__pyx_v_child->byteorder == '>') != 0);
- if (!__pyx_t_7) {
- goto __pyx_L8_next_or;
- } else {
- }
- __pyx_t_7 = (__pyx_v_little_endian != 0);
- if (!__pyx_t_7) {
- } else {
- __pyx_t_6 = __pyx_t_7;
- goto __pyx_L7_bool_binop_done;
- }
- __pyx_L8_next_or:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":802
- *
- * if ((child.byteorder == c'>' and little_endian) or
- * (child.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<<
- * raise ValueError(u"Non-native byte order not supported")
- * # One could encode it in the format string and have Cython
- */
- __pyx_t_7 = ((__pyx_v_child->byteorder == '<') != 0);
- if (__pyx_t_7) {
- } else {
- __pyx_t_6 = __pyx_t_7;
- goto __pyx_L7_bool_binop_done;
- }
- __pyx_t_7 = ((!(__pyx_v_little_endian != 0)) != 0);
- __pyx_t_6 = __pyx_t_7;
- __pyx_L7_bool_binop_done:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":801
- * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
- *
- * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<<
- * (child.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported")
- */
- if (__pyx_t_6) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":803
- * if ((child.byteorder == c'>' and little_endian) or
- * (child.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<<
- * # One could encode it in the format string and have Cython
- * # complain instead, BUT: < and > in format strings also imply
- */
- __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 803, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_Raise(__pyx_t_3, 0, 0, 0);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __PYX_ERR(1, 803, __pyx_L1_error)
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":801
- * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
- *
- * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<<
- * (child.byteorder == c'<' and not little_endian)):
- * raise ValueError(u"Non-native byte order not supported")
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":813
- *
- * # Output padding bytes
- * while offset[0] < new_offset: # <<<<<<<<<<<<<<
- * f[0] = 120 # "x"; pad byte
- * f += 1
- */
- while (1) {
- __pyx_t_3 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 813, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_t_3, __pyx_v_new_offset, Py_LT); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 813, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 813, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (!__pyx_t_6) break;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":814
- * # Output padding bytes
- * while offset[0] < new_offset:
- * f[0] = 120 # "x"; pad byte # <<<<<<<<<<<<<<
- * f += 1
- * offset[0] += 1
- */
- (__pyx_v_f[0]) = 0x78;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":815
- * while offset[0] < new_offset:
- * f[0] = 120 # "x"; pad byte
- * f += 1 # <<<<<<<<<<<<<<
- * offset[0] += 1
- *
- */
- __pyx_v_f = (__pyx_v_f + 1);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":816
- * f[0] = 120 # "x"; pad byte
- * f += 1
- * offset[0] += 1 # <<<<<<<<<<<<<<
- *
- * offset[0] += child.itemsize
- */
- __pyx_t_8 = 0;
- (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + 1);
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":818
- * offset[0] += 1
- *
- * offset[0] += child.itemsize # <<<<<<<<<<<<<<
- *
- * if not PyDataType_HASFIELDS(child):
- */
- __pyx_t_8 = 0;
- (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + __pyx_v_child->elsize);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":820
- * offset[0] += child.itemsize
- *
- * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<<
- * t = child.type_num
- * if end - f < 5:
- */
- __pyx_t_6 = ((!(PyDataType_HASFIELDS(__pyx_v_child) != 0)) != 0);
- if (__pyx_t_6) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":821
- *
- * if not PyDataType_HASFIELDS(child):
- * t = child.type_num # <<<<<<<<<<<<<<
- * if end - f < 5:
- * raise RuntimeError(u"Format string allocated too short.")
- */
- __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_child->type_num); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 821, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_XDECREF_SET(__pyx_v_t, __pyx_t_4);
- __pyx_t_4 = 0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":822
- * if not PyDataType_HASFIELDS(child):
- * t = child.type_num
- * if end - f < 5: # <<<<<<<<<<<<<<
- * raise RuntimeError(u"Format string allocated too short.")
- *
- */
- __pyx_t_6 = (((__pyx_v_end - __pyx_v_f) < 5) != 0);
- if (__pyx_t_6) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":823
- * t = child.type_num
- * if end - f < 5:
- * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<<
- *
- * # Until ticket #99 is fixed, use integers to avoid warnings
- */
- __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 823, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_Raise(__pyx_t_4, 0, 0, 0);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __PYX_ERR(1, 823, __pyx_L1_error)
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":822
- * if not PyDataType_HASFIELDS(child):
- * t = child.type_num
- * if end - f < 5: # <<<<<<<<<<<<<<
- * raise RuntimeError(u"Format string allocated too short.")
- *
- */
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":826
- *
- * # Until ticket #99 is fixed, use integers to avoid warnings
- * if t == NPY_BYTE: f[0] = 98 #"b" # <<<<<<<<<<<<<<
- * elif t == NPY_UBYTE: f[0] = 66 #"B"
- * elif t == NPY_SHORT: f[0] = 104 #"h"
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_BYTE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 826, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 826, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 826, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 98;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":827
- * # Until ticket #99 is fixed, use integers to avoid warnings
- * if t == NPY_BYTE: f[0] = 98 #"b"
- * elif t == NPY_UBYTE: f[0] = 66 #"B" # <<<<<<<<<<<<<<
- * elif t == NPY_SHORT: f[0] = 104 #"h"
- * elif t == NPY_USHORT: f[0] = 72 #"H"
- */
- __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_UBYTE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 827, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 827, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 827, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 66;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":828
- * if t == NPY_BYTE: f[0] = 98 #"b"
- * elif t == NPY_UBYTE: f[0] = 66 #"B"
- * elif t == NPY_SHORT: f[0] = 104 #"h" # <<<<<<<<<<<<<<
- * elif t == NPY_USHORT: f[0] = 72 #"H"
- * elif t == NPY_INT: f[0] = 105 #"i"
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_SHORT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 828, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 828, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 828, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 0x68;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":829
- * elif t == NPY_UBYTE: f[0] = 66 #"B"
- * elif t == NPY_SHORT: f[0] = 104 #"h"
- * elif t == NPY_USHORT: f[0] = 72 #"H" # <<<<<<<<<<<<<<
- * elif t == NPY_INT: f[0] = 105 #"i"
- * elif t == NPY_UINT: f[0] = 73 #"I"
- */
- __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_USHORT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 829, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 829, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 829, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 72;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":830
- * elif t == NPY_SHORT: f[0] = 104 #"h"
- * elif t == NPY_USHORT: f[0] = 72 #"H"
- * elif t == NPY_INT: f[0] = 105 #"i" # <<<<<<<<<<<<<<
- * elif t == NPY_UINT: f[0] = 73 #"I"
- * elif t == NPY_LONG: f[0] = 108 #"l"
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_INT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 830, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 830, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 830, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 0x69;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":831
- * elif t == NPY_USHORT: f[0] = 72 #"H"
- * elif t == NPY_INT: f[0] = 105 #"i"
- * elif t == NPY_UINT: f[0] = 73 #"I" # <<<<<<<<<<<<<<
- * elif t == NPY_LONG: f[0] = 108 #"l"
- * elif t == NPY_ULONG: f[0] = 76 #"L"
- */
- __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_UINT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 831, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 831, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 831, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 73;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":832
- * elif t == NPY_INT: f[0] = 105 #"i"
- * elif t == NPY_UINT: f[0] = 73 #"I"
- * elif t == NPY_LONG: f[0] = 108 #"l" # <<<<<<<<<<<<<<
- * elif t == NPY_ULONG: f[0] = 76 #"L"
- * elif t == NPY_LONGLONG: f[0] = 113 #"q"
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONG); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 832, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 832, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 832, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 0x6C;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":833
- * elif t == NPY_UINT: f[0] = 73 #"I"
- * elif t == NPY_LONG: f[0] = 108 #"l"
- * elif t == NPY_ULONG: f[0] = 76 #"L" # <<<<<<<<<<<<<<
- * elif t == NPY_LONGLONG: f[0] = 113 #"q"
- * elif t == NPY_ULONGLONG: f[0] = 81 #"Q"
- */
- __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_ULONG); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 833, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 833, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 833, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 76;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":834
- * elif t == NPY_LONG: f[0] = 108 #"l"
- * elif t == NPY_ULONG: f[0] = 76 #"L"
- * elif t == NPY_LONGLONG: f[0] = 113 #"q" # <<<<<<<<<<<<<<
- * elif t == NPY_ULONGLONG: f[0] = 81 #"Q"
- * elif t == NPY_FLOAT: f[0] = 102 #"f"
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONGLONG); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 834, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 834, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 834, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 0x71;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":835
- * elif t == NPY_ULONG: f[0] = 76 #"L"
- * elif t == NPY_LONGLONG: f[0] = 113 #"q"
- * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" # <<<<<<<<<<<<<<
- * elif t == NPY_FLOAT: f[0] = 102 #"f"
- * elif t == NPY_DOUBLE: f[0] = 100 #"d"
- */
- __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_ULONGLONG); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 835, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 835, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 835, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 81;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":836
- * elif t == NPY_LONGLONG: f[0] = 113 #"q"
- * elif t == NPY_ULONGLONG: f[0] = 81 #"Q"
- * elif t == NPY_FLOAT: f[0] = 102 #"f" # <<<<<<<<<<<<<<
- * elif t == NPY_DOUBLE: f[0] = 100 #"d"
- * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g"
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_FLOAT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 836, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 836, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 836, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 0x66;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":837
- * elif t == NPY_ULONGLONG: f[0] = 81 #"Q"
- * elif t == NPY_FLOAT: f[0] = 102 #"f"
- * elif t == NPY_DOUBLE: f[0] = 100 #"d" # <<<<<<<<<<<<<<
- * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g"
- * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf
- */
- __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_DOUBLE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 837, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 837, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 837, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 0x64;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":838
- * elif t == NPY_FLOAT: f[0] = 102 #"f"
- * elif t == NPY_DOUBLE: f[0] = 100 #"d"
- * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" # <<<<<<<<<<<<<<
- * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf
- * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONGDOUBLE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 838, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 838, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 838, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 0x67;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":839
- * elif t == NPY_DOUBLE: f[0] = 100 #"d"
- * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g"
- * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf # <<<<<<<<<<<<<<
- * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd
- * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
- */
- __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CFLOAT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 839, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 839, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 839, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 90;
- (__pyx_v_f[1]) = 0x66;
- __pyx_v_f = (__pyx_v_f + 1);
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":840
- * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g"
- * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf
- * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd # <<<<<<<<<<<<<<
- * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
- * elif t == NPY_OBJECT: f[0] = 79 #"O"
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CDOUBLE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 840, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 840, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 840, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 90;
- (__pyx_v_f[1]) = 0x64;
- __pyx_v_f = (__pyx_v_f + 1);
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":841
- * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf
- * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd
- * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg # <<<<<<<<<<<<<<
- * elif t == NPY_OBJECT: f[0] = 79 #"O"
- * else:
- */
- __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CLONGDOUBLE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 841, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 841, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 841, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 90;
- (__pyx_v_f[1]) = 0x67;
- __pyx_v_f = (__pyx_v_f + 1);
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":842
- * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd
- * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
- * elif t == NPY_OBJECT: f[0] = 79 #"O" # <<<<<<<<<<<<<<
- * else:
- * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
- */
- __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_OBJECT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 842, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 842, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 842, __pyx_L1_error)
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- if (__pyx_t_6) {
- (__pyx_v_f[0]) = 79;
- goto __pyx_L15;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":844
- * elif t == NPY_OBJECT: f[0] = 79 #"O"
- * else:
- * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<<
- * f += 1
- * else:
- */
- /*else*/ {
- __pyx_t_3 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_v_t); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 844, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 844, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_4);
- __Pyx_GIVEREF(__pyx_t_3);
- PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3);
- __pyx_t_3 = 0;
- __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 844, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_3);
- __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
- __Pyx_Raise(__pyx_t_3, 0, 0, 0);
- __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
- __PYX_ERR(1, 844, __pyx_L1_error)
- }
- __pyx_L15:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":845
- * else:
- * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
- * f += 1 # <<<<<<<<<<<<<<
- * else:
- * # Cython ignores struct boundary information ("T{...}"),
- */
- __pyx_v_f = (__pyx_v_f + 1);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":820
- * offset[0] += child.itemsize
- *
- * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<<
- * t = child.type_num
- * if end - f < 5:
- */
- goto __pyx_L13;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":849
- * # Cython ignores struct boundary information ("T{...}"),
- * # so don't output it
- * f = _util_dtypestring(child, f, end, offset) # <<<<<<<<<<<<<<
- * return f
- *
- */
- /*else*/ {
- __pyx_t_9 = __pyx_f_5numpy__util_dtypestring(__pyx_v_child, __pyx_v_f, __pyx_v_end, __pyx_v_offset); if (unlikely(__pyx_t_9 == NULL)) __PYX_ERR(1, 849, __pyx_L1_error)
- __pyx_v_f = __pyx_t_9;
- }
- __pyx_L13:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":794
- * cdef tuple fields
- *
- * for childname in descr.names: # <<<<<<<<<<<<<<
- * fields = descr.fields[childname]
- * child, new_offset = fields
- */
- }
- __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":850
- * # so don't output it
- * f = _util_dtypestring(child, f, end, offset)
- * return f # <<<<<<<<<<<<<<
- *
- *
- */
- __pyx_r = __pyx_v_f;
- goto __pyx_L0;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":785
- * return PyArray_MultiIterNew(5, a, b, c, d, e)
- *
- * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<<
- * # Recursive utility function used in __getbuffer__ to get format
- * # string. The new location in the format string is returned.
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_XDECREF(__pyx_t_3);
- __Pyx_XDECREF(__pyx_t_4);
- __Pyx_AddTraceback("numpy._util_dtypestring", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = NULL;
- __pyx_L0:;
- __Pyx_XDECREF((PyObject *)__pyx_v_child);
- __Pyx_XDECREF(__pyx_v_fields);
- __Pyx_XDECREF(__pyx_v_childname);
- __Pyx_XDECREF(__pyx_v_new_offset);
- __Pyx_XDECREF(__pyx_v_t);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":966
- *
- *
- * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<<
- * cdef PyObject* baseptr
- * if base is None:
- */
-
-static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
- PyObject *__pyx_v_baseptr;
- __Pyx_RefNannyDeclarations
- int __pyx_t_1;
- int __pyx_t_2;
- __Pyx_RefNannySetupContext("set_array_base", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":968
- * cdef inline void set_array_base(ndarray arr, object base):
- * cdef PyObject* baseptr
- * if base is None: # <<<<<<<<<<<<<<
- * baseptr = NULL
- * else:
- */
- __pyx_t_1 = (__pyx_v_base == Py_None);
- __pyx_t_2 = (__pyx_t_1 != 0);
- if (__pyx_t_2) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":969
- * cdef PyObject* baseptr
- * if base is None:
- * baseptr = NULL # <<<<<<<<<<<<<<
- * else:
- * Py_INCREF(base) # important to do this before decref below!
- */
- __pyx_v_baseptr = NULL;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":968
- * cdef inline void set_array_base(ndarray arr, object base):
- * cdef PyObject* baseptr
- * if base is None: # <<<<<<<<<<<<<<
- * baseptr = NULL
- * else:
- */
- goto __pyx_L3;
- }
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":971
- * baseptr = NULL
- * else:
- * Py_INCREF(base) # important to do this before decref below! # <<<<<<<<<<<<<<
- * baseptr = base
- * Py_XDECREF(arr.base)
- */
- /*else*/ {
- Py_INCREF(__pyx_v_base);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":972
- * else:
- * Py_INCREF(base) # important to do this before decref below!
- * baseptr = base # <<<<<<<<<<<<<<
- * Py_XDECREF(arr.base)
- * arr.base = baseptr
- */
- __pyx_v_baseptr = ((PyObject *)__pyx_v_base);
- }
- __pyx_L3:;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":973
- * Py_INCREF(base) # important to do this before decref below!
- * baseptr = base
- * Py_XDECREF(arr.base) # <<<<<<<<<<<<<<
- * arr.base = baseptr
- *
- */
- Py_XDECREF(__pyx_v_arr->base);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":974
- * baseptr = base
- * Py_XDECREF(arr.base)
- * arr.base = baseptr # <<<<<<<<<<<<<<
- *
- * cdef inline object get_array_base(ndarray arr):
- */
- __pyx_v_arr->base = __pyx_v_baseptr;
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":966
- *
- *
- * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<<
- * cdef PyObject* baseptr
- * if base is None:
- */
-
- /* function exit code */
- __Pyx_RefNannyFinishContext();
-}
-
-/* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":976
- * arr.base = baseptr
- *
- * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<<
- * if arr.base is NULL:
- * return None
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- int __pyx_t_1;
- __Pyx_RefNannySetupContext("get_array_base", 0);
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":977
- *
- * cdef inline object get_array_base(ndarray arr):
- * if arr.base is NULL: # <<<<<<<<<<<<<<
- * return None
- * else:
- */
- __pyx_t_1 = ((__pyx_v_arr->base == NULL) != 0);
- if (__pyx_t_1) {
-
- /* "../../../anaconda/lib/python3.5/site-packages/Cython/Includes/numpy/__init__.pxd":978
- * cdef inline object get_array_base(ndarray arr):
- * if arr.base is NULL:
- * return None # <<<<<<<<<<<<<<
- * else:
- * return