[2/N] Modernize FFX: pytest (#39)

* [2/N] Modernize FFX: pytest * Update Travis * Travis
natekupp · Dec 19, 2019 · 0b04ef7 · 0b04ef7
1 parent f263f01
commit 0b04ef7
Show file tree

Hide file tree

Showing 16 changed files with 244 additions and 257 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,41 +1,19 @@
 language: python
-
-matrix:
-    fast_finish: true
-    include:
-        - os: linux
-          dist: trusty
-          sudo: required
-          python: 3.5
-          env: PYVERSION="3"
-        - os: linux
-          dist: trusty
-          sudo: required
-          python: 2.7
-          env: PYVERSION=""
-        - os: linux
-          dist: precise
-          sudo: required
-          python: 3.5
-          env: PYVERSION="3"
-        - os: linux
-          dist: precise
-          sudo: required
-          python: 2.7
-          env: PYVERSION=""
+python:
+  - "2.7"
+  - "3.5"
+  - "3.6"
+  - "3.7"
 
 install:
-    - pip${PYVERSION} install --upgrade pip setuptools wheel
-    - pip${PYVERSION} install coveralls codecov
-    - pip${PYVERSION} install --only-binary=numpy,scipy scipy
-    - pip${PYVERSION} install scikit-learn
-    - pip${PYVERSION} install git+https://github.com/natekupp/ffx
+  - pip install -r dev-requirements.txt
+  - pip install -e .
 
 before_script:
-    - export PYTHONPATH=$(pwd):$PYTHONPATH;
+  - export PYTHONPATH=$(pwd):$PYTHONPATH;
 
 script:
-  - coverage${PYVERSION} run --source ffx tests/x_square_test.py
+  - pytest --cov ffx
 
 after_success:
   - coveralls
diff --git a/Makefile b/Makefile
@@ -14,3 +14,6 @@ pypi:
 	python setup.py sdist bdist_egg bdist_wheel
 	twine upload dist/*
 	#twine upload --repository-url https://test.pypi.org/legacy/ dist/* # testpypi
+
+test:
+	pytest ffx_tests/
diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,6 +1,7 @@
 black==19.10b0; python_version >= '3.6'
-coveralls
+coveralls==1.9.2
 codecov
 isort>=4.3.21
 pylint>=2.4.1; python_version >= '3.6'
 pytest==4.6.7
+pytest-cov==2.7.1
diff --git a/ffx/core.py b/ffx/core.py
@@ -1,59 +1,3 @@
-"""FFX.py v1.3 (Sept 16, 2011)
-This module implements the Fast Function Extraction (FFX) algorithm.
-
-Reference: Trent McConaghy, FFX: Fast, Scalable, Deterministic Symbolic
-Regression Technology, Genetic Programming Theory and Practice IX, Edited by R.
-Riolo, E. Vladislavleva, and J. Moore, Springer, 2011.  http://www.trent.st/ffx
-
-
-HOW TO USE THIS MODULE:
-
-Easiest to use by calling runffx.py.  Its code has example usage patterns.
-
-The main routines are:
-  models = MultiFFXModelFactory().build(train_X, train_y, test_X, test_y, varnames)
-  yhat = model.simulate(X)
-  print model
-
-Can expand / restrict the set of functions via the user-changeable constants (right below licence).
-
-
-FFX Software Licence Agreement (like BSD, but adapted for non-commercial gain only)
-
-Copyright (c) 2011, Solido Design Automation Inc.  Authored by Trent McConaghy.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-    * Usage does not involve commercial gain.
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the documentation
-      and/or other materials provided with the distribution.
-    * Neither the name of the associated institutions nor the names of its
-      contributors may be used to endorse or promote products derived from this
-      software without specific prior written permission.
-
-For permissions beyond the scope of this license, please contact Trent
-McConaghy (trentmc@solidodesign.com).
-
-THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ''AS IS'' AND ANY EXPRESS OR
-IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-EVENT SHALL THE DEVELOPERS OR THEIR INSTITUTIONS BE LIABLE FOR ANY DIRECT,
-INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Patent pending.
-
-"""
-from __future__ import print_function
-
 import math
 import signal
 import sys
@@ -524,8 +468,8 @@ def build(self, train_X, train_y, test_X, test_y, varnames=None, verbose=False):
 
         if pandas is not None and isinstance(train_X, pandas.DataFrame):
             varnames = train_X.columns
-            train_X = train_X.as_matrix()
-            test_X = test_X.as_matrix()
+            train_X = train_X.to_numpy()
+            test_X = test_X.to_numpy()
         if isinstance(train_X, numpy.ndarray) and varnames is None:
             raise Exception('varnames required for numpy.ndarray')
 
@@ -902,7 +846,7 @@ def _pathwiseLearn(
         Returns list of model (or None if failure)."""
         if verbose:
             print('    Pathwise learn: begin. max_num_bases=%d' % max_num_bases)
-        max_iter = 1000  # default 1000. magic number.
+        max_iter = 5000  # default 5000. magic number.
 
         # Condition X and y:
         # -"unbias" = rescale so that (mean=0, stddev=1) -- subtract each row's

diff --git a/ffx_tests/conftest.py b/ffx_tests/conftest.py
@@ -0,0 +1,10 @@
+import os
+
+import pandas as pd
+import pytest
+
+
+@pytest.fixture(scope='session')
+def iris():
+    path = os.path.dirname(__file__)
+    return pd.read_csv(os.path.join(path, 'data/iris.csv'))
diff --git a/tests/data/iris.csv → ffx_tests/data/iris.csv b/tests/data/iris.csv → ffx_tests/data/iris.csv
diff --git a/ffx_tests/test_readme.py b/ffx_tests/test_readme.py
@@ -0,0 +1,45 @@
+# this is the tiny example in the README, and now it
+# also prints out num_bases and complexity for each model
+
+import ffx
+import numpy as np
+
+EXPECTED = [
+    (0, 1, '2.00'),
+    (1, 5, '1.85 + 0.0302*b'),
+    (2, 10, '1.76 + 0.0720*log10(b) + 0.0389*b'),
+    (3, 19, '1.72 - 0.0796*max(0,2.60-a) + 0.0705*b - 0.0701*max(0,2.40-a)'),
+    (
+        4,
+        26,
+        '1.71 - 0.0875*max(0,2.60-a) - 0.0796*max(0,2.40-a) + 0.0748*b - 0.000532*max(0,5.73-b)',
+    ),
+    (
+        5,
+        32,
+        '(1.72 - 0.0802*max(0,2.60-a) - 0.0751*max(0,2.40-a) + 0.0714*b) / (1.0 + 0.0142*max(0,2.20-a) - 0.000721*b)',
+    ),
+    (
+        6,
+        39,
+        '(1.71 - 0.0883*max(0,2.60-a) - 0.0862*max(0,2.40-a) + 0.0763*b - 0.00449*max(0,5.73-b)) / (1.0 + 0.0260*max(0,2.20-a) - 0.00130*b)',
+    ),
+    (
+        8,
+        63,
+        '(1.75 + 0.0319*b + 0.0318*b - 0.0106*max(0,2.60-a) - 0.00616*b * max(0,5.73-b) + 2.53e-5*b^2 + 2.22e-5*b^2) / (1.0 + 0.0586*max(0,2.60-a) * max(0,2.40-a) + 0.0138*max(0,5.73-b) * max(0,2.60-a))',
+    ),
+]
+
+
+def test_readme_example():
+    train_X = np.array([(1.5, 2, 3), (4, 5, 6)]).T
+    train_y = np.array([1, 2, 3])
+
+    test_X = np.array([(5.241, 1.23, 3.125), (1.1, 0.124, 0.391)]).T
+    test_y = np.array([3.03, 0.9113, 1.823])
+
+    np.random.seed(0)
+
+    models = ffx.run(train_X, train_y, test_X, test_y, ["a", "b"])
+    assert [(model.numBases(), model.complexity(), str(model)) for model in models] == EXPECTED
diff --git a/ffx_tests/test_sklearn_api.py b/ffx_tests/test_sklearn_api.py
@@ -0,0 +1,42 @@
+import ffx
+import numpy as np
+
+EXPECTED_MODELS = [
+    (0, 1, '0.298'),
+    (1, 5, '0.102 + 0.395*X1'),
+    (2, 9, '0.0141 + 0.485*X1 + 0.0861*X0'),
+    (
+        7,
+        42,
+        '0.0924 + 0.372*X1 - 0.0743*max(0,0.867-X1) + 0.0658*X0 + 0.0359*X0 * X1 + 0.0201*max(0,X1-0.200) + 0.00932*X1^2 - 0.00504*max(0,0.867-X0)',
+    ),
+]
+
+
+def test_sklearn_api():
+    np.random.seed(0)
+
+    n_samples = 10000
+
+    # This creates a dataset of 2 predictors
+    X = np.random.random((n_samples, 2))  # pylint: disable=no-member
+    y = 0.1 * X[:, 0] + 0.5 * X[:, 1]
+
+    train_X, test_X = X[: int(n_samples / 2)], X[int(n_samples / 2) :]
+    train_y, test_y = y[: int(n_samples / 2)], y[int(n_samples / 2) :]
+
+    FFX = ffx.FFXRegressor()
+    FFX.fit(train_X, train_y)
+
+    # Best model
+    assert (
+        str(FFX.model_)
+        == '0.0924 + 0.372*X1 - 0.0743*max(0,0.867-X1) + 0.0658*X0 + 0.0359*X0 * X1 + 0.0201*max(0,X1-0.200) + 0.00932*X1^2 - 0.00504*max(0,0.867-X0)'
+    )
+    assert FFX.model_.numBases() == 7
+    assert FFX.score(test_X, test_y) == 0.9984036148094735
+    assert FFX.complexity() == 42
+
+    assert [
+        (model.numBases(), model.complexity(), str(model)) for model in FFX.models_
+    ] == EXPECTED_MODELS
diff --git a/ffx_tests/test_x_square.py b/ffx_tests/test_x_square.py
@@ -0,0 +1,27 @@
+import ffx
+import numpy as np
+
+EXPECTED = [
+    (0, 1, '3.50'),
+    (1, 7, '0.640 + 0.817*x^2'),
+    (2, 11, '0.0846 + 0.972*x^2 + 0.00984*x'),
+    (
+        6,
+        31,
+        '(0.0955 + 0.488*x^2 + 0.468*x^2 + 0.00638*x + 0.00124*x) / (1.0 - 0.00336*x - 0.00213*x)',
+    ),
+]
+
+
+def test_x_square():
+    np.random.seed(0)
+
+    # This creates a dataset of 1 predictor
+    train_X = np.array([[0, 1, 2, 3]]).T
+    train_y = np.array([0, 1, 4, 9])
+
+    test_X = np.array([[4, 5, 6, 7]]).T
+    test_y = np.array([16, 25, 36, 49])
+
+    models = ffx.run(train_X, train_y, test_X, test_y, ["x"])
+    assert [(model.numBases(), model.complexity(), str(model)) for model in models] == EXPECTED
diff --git a/ffx_tests/tests.py b/ffx_tests/tests.py
@@ -0,0 +1,86 @@
+import ffx
+import numpy as np
+from ffx.core import (
+    ConstantModel,
+    OperatorBase,
+    ProductBase,
+    SimpleBase,
+    INF,
+    OP_ABS,
+    OP_GTH,
+    OP_LOG10,
+    OP_LTH,
+    OP_MAX0,
+    OP_MIN0,
+)
+
+EPS = 0.001
+
+
+def similar(a, b, eps):
+    return sum(abs(a - b)) < eps
+
+
+def check_base(xtrain, model, fn):
+    return similar(model.simulate(xtrain), fn(xtrain[:, 0]), EPS)
+
+
+def test_simple_base(iris):
+    xtrain = iris.iloc[:50, 0:2].values
+    assert check_base(xtrain, SimpleBase(0, 1), lambda x: x)
+    assert check_base(xtrain, SimpleBase(0, 2), lambda x: x ** 2)
+
+
+def test_operator_base(iris):
+    base_simple = SimpleBase(0, 1)
+    base_abs = OperatorBase(base_simple, OP_ABS)
+    base_max = OperatorBase(base_simple, OP_MAX0)
+    base_min = OperatorBase(base_simple, OP_MIN0)
+    base_log10 = OperatorBase(base_simple, OP_LOG10)
+    base_gth = OperatorBase(base_simple, OP_GTH, 0.5)
+    base_lth = OperatorBase(base_simple, OP_LTH, 0.5)
+
+    xtrain = iris.iloc[:50, 0:2].values
+    assert check_base(xtrain, base_abs, np.abs)
+    assert check_base(xtrain, base_max, lambda x: np.clip(x, 0.0, INF))
+    assert check_base(xtrain, base_min, lambda x: np.clip(x, -INF, 0.0))
+    assert check_base(xtrain, base_log10, np.log10)
+    assert check_base(xtrain, base_gth, lambda x: np.clip(0.5 - x, 0.0, INF))
+    assert check_base(xtrain, base_lth, lambda x: np.clip(x - 0.5, 0.0, INF))
+
+
+def test_product_base(iris):
+    a = SimpleBase(0, 1)
+    b = SimpleBase(0, 1)
+    c = SimpleBase(0, 2)
+
+    xtrain = iris.iloc[:50, 0:2].values
+    assert check_base(xtrain, ProductBase(a, b), lambda x: x ** 2)
+    assert check_base(xtrain, ProductBase(a, c), lambda x: x ** 3)
+
+
+def test_constant_model(iris):
+    xtrain = iris.iloc[:50, 0:2].values
+
+    mu = xtrain[:, 0].mean()
+    data = np.repeat(mu, xtrain.shape[0])
+    assert similar(ConstantModel(mu, 0).simulate(xtrain), data, EPS)
+
+
+def test_multi_ffx_model_factory(iris):
+    np.random.seed(0)
+
+    xtrain_pandas = iris.iloc[:50, 0:2]
+    xtest_pandas = iris.iloc[51:100, 0:2]
+    xtrain = xtrain_pandas.values
+    ytrain = iris.iloc[:50, 2]
+    xtest = xtest_pandas.values
+    ytest = iris.iloc[51:100, 2]
+
+    # Use numpy.ndarray
+    models = ffx.run(xtrain, ytrain, xtest, ytest, iris.columns)
+    assert abs(np.mean([model.test_nmse for model in models]) - 0.5821326214099275) < EPS
+
+    # Use pandas.DataFrame
+    models = ffx.run(xtrain_pandas, ytrain, xtest_pandas, ytest)
+    assert abs(np.mean([model.test_nmse for model in models]) - 0.5821326214099275) < EPS