From 57dacddb91347246ac56a956e66bd342b76a4249 Mon Sep 17 00:00:00 2001
From: Peter Kroon <peterkroon06+github@gmail.com>
Date: Tue, 2 Apr 2019 17:13:17 +0200
Subject: [PATCH 01/11] Create first hypothesis test and fix first bug

---
 symfit/core/fit.py          |   1 +
 symfit/core/models.py       |  10 +-
 tests/hypothesis_helpers.py | 295 ++++++++++++++++++++++++++++++++++++
 tests/test_hypothesis.py    |  33 ++++
 4 files changed, 337 insertions(+), 2 deletions(-)
 create mode 100644 tests/hypothesis_helpers.py
 create mode 100644 tests/test_hypothesis.py

diff --git a/symfit/core/fit.py b/symfit/core/fit.py
index 3975d9a9..1a2ef7ab 100644
--- a/symfit/core/fit.py
+++ b/symfit/core/fit.py
@@ -26,6 +26,7 @@
 else:
     import funcsigs as inspect_sig
 
+
 class TakesData(object):
     """
     An base class for everything that takes data. Most importantly, it takes care
diff --git a/symfit/core/models.py b/symfit/core/models.py
index 01a8109f..da1e72f3 100644
--- a/symfit/core/models.py
+++ b/symfit/core/models.py
@@ -328,8 +328,14 @@ def _init_from_dict(self, model_dict):
         # Everything at the bottom of the toposort is independent, at the top
         # dependent, and the rest interdependent.
         ordered = list(toposort(self.connectivity_mapping))
-        independent = sorted(ordered.pop(0), key=sort_func)
-        self.dependent_vars = sorted(ordered.pop(-1), key=sort_func)
+        if ordered:
+            independent = sorted(ordered.pop(0), key=sort_func)
+        else:
+            independent = []
+        if ordered:
+            self.dependent_vars = sorted(ordered.pop(-1), key=sort_func)
+        else:
+            self.dependent_vars = []
         self.interdependent_vars = sorted(
             [item for items in ordered for item in items],
             key=sort_func
diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py
new file mode 100644
index 00000000..ec732012
--- /dev/null
+++ b/tests/hypothesis_helpers.py
@@ -0,0 +1,295 @@
+from functools import reduce
+import operator
+
+import numpy as np
+
+from symfit import Model, parameters, variables, Variable
+from symfit.core.support import key2str
+import sympy
+
+from hypothesis import strategies as st
+from hypothesis import assume, note
+
+
+NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e6, max_value=1e6)
+
+
+@st.composite
+def exprs(draw, symbols, operations, n_steps, allow_number=False):
+    """
+    A strategy to generate arbitrary :mod:`Sympy` expressions, based on
+    strategies for generating `symbols` and `operations`.
+
+    Parameters
+    ----------
+    symbols: hypothesis.LazySearchStrategy[sympy.Symbol]
+        A strategy that generates sympy symbols. Should probably include
+        constants such as 1 and -1.
+    operations: hypothesis.LazySearchStrategy[tuple[int, sympy.Operation]]
+        A strategy that generates operations with the associated number of
+        arguments.
+    n_steps: int
+        The number of steps to take. Larger numbers result in more complex
+        expressions.
+    allow_number: bool
+        Whether the produced expression can be just a number.
+
+    Returns
+    -------
+    sympy.Expression
+    """
+    expr = draw(symbols)
+    for _ in range(n_steps):
+        nargs, op = draw(operations)
+        args, expr_pos = draw(st.tuples(st.lists(symbols,
+                                                 min_size=nargs-1,
+                                                 max_size=nargs-1),
+                                        st.integers(0, nargs-1)))
+        args.insert(expr_pos, expr)
+        expr = op(*args)
+    assume(allow_number or not expr.is_number)
+    return expr
+
+
+# TODO: ODEModels
+@st.composite
+def models(draw, dependent_vars, symbols, steps=5):
+    """
+    A strategy for generating :class:`symfit.Model` s. 
+
+    Parameters
+    ----------
+    dependent_vars: hypothesis.LazySearchStrategy[list[symfit.Variable]]
+        A strategy that generates lists of Variables. These variables will be
+        used as dependent variables.
+    symbols: hypothesis.LazySearchStrategy[symfit.Parameter or symfit.Variable]
+        A strategy that generates either a parameter or independent variable.
+    steps: int
+        The number of operations per expression. Larger numbers result is more
+        complex models.
+
+    Returns
+    -------
+    symfit.Model
+    """
+    constants = st.sampled_from([0, 1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E])
+    symbols = st.one_of(constants, symbols)
+    ops = st.sampled_from([(2, sympy.Add),
+                           (2, sympy.Mul),
+                           # Can cause imaginary numbers (e.g. (-4)**1.2)
+                           #(2, sympy.Pow),
+                           (1, sympy.sin),
+                           # (2, sympy.log),
+                          ])
+    expressions = exprs(symbols, ops, steps, False)
+    model_dict = {}
+    for dependent_var in draw(dependent_vars):
+        expr = draw(expressions)
+        model_dict[dependent_var] = expr
+    model = Model(model_dict)
+    return model
+
+
+def _same_shape_vars(model):
+    """
+    Helper function that returns a set of frozensets of all independent
+    variables in model that should have broadcast compatible shapes.
+    """
+    indep_vars = model.independent_vars
+    con_map = model.connectivity_mapping
+    shape_groups = {var: {var} for var in indep_vars}
+    for group in con_map.values():
+        comb = {var for var in group if var in indep_vars}
+        shape_group = set()
+        for var in comb:
+            shape_group.update(shape_groups[var])
+        for var in shape_group:
+            shape_groups[var] = shape_group
+    shape_groups = set(frozenset(g) for g in shape_groups.values())
+    return shape_groups
+
+
+@st.composite
+def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, max_dim=3):
+    """
+    Strategy that generates shapes for the independent variables of `model` such
+    that variables that need to be broadcast compatible will be the same shape.
+
+    Parameters
+    ----------
+    model: symfit.Model
+        Shapes will be generated for this model's independent variables.
+    min_value: int
+        The minimum size a dimension can be.
+    max_value: int
+        The maximum size a dimension can be.
+    min_dim: int
+        The minimum number of dimensions the shape should have.
+    max_dim: int
+        The maximum number of dimensions the shape should have.
+
+    Returns
+    -------
+    dict[str]: tuple[int, ...]
+    """
+    indep_vars = model.independent_vars
+    shape_groups = _same_shape_vars(model)
+    shapes = st.lists(st.integers(min_value, max_value),
+                      min_size=min_dim, max_size=max_dim).map(tuple)
+    grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups}))
+    indep_shape = {}
+    for vars, shape in grp_shape.items():
+        for var in vars:
+            indep_shape[var] = shape
+    return indep_shape
+
+
+def numpy_arrays(shape, elements=NUMBERS, **kwargs):
+    """
+    Basic strategy for generating numpy arrays of the specified `shape`, with
+    the specified `elements`.
+
+    Parameters
+    ----------
+    shape: tuple[int, ...]
+        The shape the array should get.
+    elements: hypothesis.LazySearchStrategy
+        The strategy which should be used to generate elements of the array.
+    kwargs: dict[str]
+        Additional keyword arguments to be passed to
+        :func:`hypothesis.strategies.lists`
+
+    Returns
+    -------
+    hypothesis.LazySearchStrategy[np.array]
+    """
+    number_of_values = reduce(operator.mul, shape, 1)
+    return st.lists(elements,
+                    min_size=number_of_values,
+                    max_size=number_of_values,
+                    **kwargs).map(lambda l: np.reshape(l, shape))
+
+
+@st.composite
+def indep_values(draw, model):
+    """
+    Strategy for generating independent data for a given `model`.
+
+    Parameters
+    ----------
+    model: symfit.Model
+
+    Returns
+    -------
+    dict[str, np.array]
+        Dict of numpy arrays with data for every independent variable in
+        `model`.
+    """
+    indep_vars = model.independent_vars
+    dep_vars = model.dependent_vars
+    params = model.params
+
+    indep_shape = draw(independent_data_shapes(model))
+
+    indep_vals = {}
+    for var, shape in indep_shape.items():
+        data = numpy_arrays(shape, unique=True).map(np.sort)
+        indep_vals[str(var)] = data
+
+    data = draw(st.fixed_dictionaries(indep_vals))
+
+    return data
+
+
+@st.composite
+def param_values(draw, model):
+    """
+    Strategy for generating values for the parameters of `model`. Will also set
+    the values of the parameters.
+
+    Parameters
+    ----------
+    model: symfit.Model
+
+    Returns
+    -------
+    dict[str, float]
+    """
+    param_vals = st.fixed_dictionaries({str(param): NUMBERS for param in model.params})
+    data = draw(param_vals)
+    for param in model.params:
+        param.value = data[str(param)]
+    return data
+
+
+@st.composite
+def dep_values(draw, model, indep_data, param_vals):
+    """
+    Strategy for generating dependent data for the given model, independent
+    data, and parameter values.
+
+    Parameters
+    ----------
+    model: symfit.Model
+        The model to be used. Will be called with the given data and parameter
+        values.
+    indep_data: dict[str, np.array]
+        Data for all the independent variables in `model`.
+    param_vals: dict[str, float]
+        Values for all the parameters in `model`.
+
+    Returns
+    -------
+    dict[str, np.array]
+        Data for the dependent variables of `model`, as well as associated
+        sigmas.
+    """
+    try:
+        dep_data = model(**indep_data, **param_vals)._asdict()
+    except (OverflowError, ZeroDivisionError):
+        assume(False)  # Some model + data that causes numerical issues
+    shapes = {var: data.shape for var, data in dep_data.items()}
+    sigmas = {
+        'sigma_{}'.format(str(var)): st.one_of(
+            st.none(),
+            st.floats(allow_nan=False, allow_infinity=False, min_value=0, max_value=5),
+            numpy_arrays(vals.shape)
+        ) for var, vals in dep_data.items()
+    }
+    sigmas = draw(st.fixed_dictionaries(dict(**sigmas)))
+    return dict(**sigmas, **key2str(dep_data))
+
+
+@st.composite
+def model_with_data(draw, dependent_vars, symbols, steps=5):
+    """
+    Strategy that generates a model with associated data using the given
+    dependent variables.
+
+    Parameters
+    ----------
+    dependent_vars: hypothesis.LazySearchStrategy[list[symfit.Variable]]
+        Strategy to use to generate dependent variables for the model.
+    symbols: hypothesis.LazySearchStrategy[symfit.Variable or symfit.Parameter]
+        Strategy to use to generate independent variables and parameters for the
+        model
+    steps: int
+        The number of operations to perform when generating the expressions for
+        every dependent component of the model. More steps means complexer
+        expressions.
+
+    Returns
+    -------
+    symfit.Model
+        The generated model.
+    dict[str, float]
+        Values for the parameters.
+    dict[str, np.array]
+        Data for the independent variables.
+    dict[str, np.array]
+        Data for the dependent variables, as well as sigmas.
+    """
+    model = draw(models(dependent_vars, symbols, steps=steps))
+    indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters')
+    dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data')
+    return model, param_data, indep_data, dep_data
diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py
new file mode 100644
index 00000000..cb2fe1d5
--- /dev/null
+++ b/tests/test_hypothesis.py
@@ -0,0 +1,33 @@
+from .hypothesis_helpers import model_with_data
+from hypothesis import strategies as st
+from hypothesis import given, note, assume, settings
+
+import numpy as np
+
+from symfit import Fit, variables, parameters
+
+
+DEPENDENT_VARS = st.lists(st.sampled_from(variables('Y1, Y2')), unique=True, min_size=1)
+SYMBOLS = st.sampled_from(parameters('a, b, c') + variables('x, y, z'))
+
+def _cmp_result(reference, found):
+    assert reference.keys() == found.keys()
+    ref_vals = []
+    found_vals = []
+    for k in reference:
+        ref_vals.append(reference[k])
+        found_vals.append(found[k])
+    assert np.allclose(ref_vals, found_vals)
+
+
+@settings(max_examples=500)
+@given(model_with_data(DEPENDENT_VARS, SYMBOLS))
+def test_simple(model_with_data):
+    model, param_vals, indep_vals, dep_vals = model_with_data
+    note(str(model))
+    assume(model.params)  # Stacking error in model.eval_jacobian
+
+    fit = Fit(model, **indep_vals, **dep_vals)
+    result = fit.execute()
+    note(str(result))
+    _cmp_result(param_vals, result.params)

From 9930acbc3c7a6718a54595d1f51703c8700d0888 Mon Sep 17 00:00:00 2001
From: Peter Kroon <peterkroon06+github@gmail.com>
Date: Wed, 3 Apr 2019 11:05:19 +0200
Subject: [PATCH 02/11] Change exception/assume

---
 tests/hypothesis_helpers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py
index ec732012..b31f105f 100644
--- a/tests/hypothesis_helpers.py
+++ b/tests/hypothesis_helpers.py
@@ -244,10 +244,7 @@ def dep_values(draw, model, indep_data, param_vals):
         Data for the dependent variables of `model`, as well as associated
         sigmas.
     """
-    try:
-        dep_data = model(**indep_data, **param_vals)._asdict()
-    except (OverflowError, ZeroDivisionError):
-        assume(False)  # Some model + data that causes numerical issues
+    dep_data = model(**indep_data, **param_vals)._asdict()
     shapes = {var: data.shape for var, data in dep_data.items()}
     sigmas = {
         'sigma_{}'.format(str(var)): st.one_of(
@@ -291,5 +288,8 @@ def model_with_data(draw, dependent_vars, symbols, steps=5):
     """
     model = draw(models(dependent_vars, symbols, steps=steps))
     indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters')
-    dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data')
+    try:
+        dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data')
+    except ArithmeticError:
+        assume(False)  # Some model + data that causes numerical issues
     return model, param_data, indep_data, dep_data

From b46a407177be17d36d2ce815e5b5617c90270c30 Mon Sep 17 00:00:00 2001
From: Peter Kroon <peterkroon06+github@gmail.com>
Date: Wed, 3 Apr 2019 11:19:02 +0200
Subject: [PATCH 03/11] Add hypothesis to Github Actions

---
 .github/workflows/test.yaml              | 2 +-
 .github/workflows/test_and_coveralls.yml | 4 +---
 requirements_test.txt                    | 4 ++++
 3 files changed, 6 insertions(+), 4 deletions(-)
 create mode 100644 requirements_test.txt

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 69a0098c..be462b0c 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -19,8 +19,8 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install wheel
-        pip install pytest
         pip install -r requirements.txt
+        pip install -r requirements_test.txt
         pip install matplotlib
         pip install -e .
     - name: Test with coverage
diff --git a/.github/workflows/test_and_coveralls.yml b/.github/workflows/test_and_coveralls.yml
index ec2f4690..c3d503ce 100644
--- a/.github/workflows/test_and_coveralls.yml
+++ b/.github/workflows/test_and_coveralls.yml
@@ -16,9 +16,8 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install wheel
-        pip install coveralls
-        pip install pytest
         pip install -r requirements.txt
+        pip install -r requirements_test.txt
         pip install matplotlib
         pip install -e .
     - name: Test with coverage
@@ -28,5 +27,4 @@ jobs:
       env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       run: |
-          pip install --upgrade coveralls
           coveralls --service=github
\ No newline at end of file
diff --git a/requirements_test.txt b/requirements_test.txt
new file mode 100644
index 00000000..103175ea
--- /dev/null
+++ b/requirements_test.txt
@@ -0,0 +1,4 @@
+pytest
+coveralls
+hypothesis
+

From 87be0f268d63d4c5aae3a7a77101bd0900df82ae Mon Sep 17 00:00:00 2001
From: Peter Kroon <p.c.kroon@rug.nl>
Date: Tue, 16 Apr 2019 11:46:21 +0200
Subject: [PATCH 04/11] Better expression generation

---
 tests/hypothesis_helpers.py | 79 ++++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 31 deletions(-)

diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py
index b31f105f..26b7d0a1 100644
--- a/tests/hypothesis_helpers.py
+++ b/tests/hypothesis_helpers.py
@@ -8,14 +8,19 @@
 import sympy
 
 from hypothesis import strategies as st
+from hypothesis.extra import numpy as npst
 from hypothesis import assume, note
 
 
 NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e6, max_value=1e6)
 
 
+def _is_sorted(lst, cmp=lambda x, y: x <= y):
+    return all(cmp(x, y) for x, y in zip(lst[:-1], lst[1:]))
+
+
 @st.composite
-def exprs(draw, symbols, operations, n_steps, allow_number=False):
+def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False):
     """
     A strategy to generate arbitrary :mod:`Sympy` expressions, based on
     strategies for generating `symbols` and `operations`.
@@ -25,12 +30,14 @@ def exprs(draw, symbols, operations, n_steps, allow_number=False):
     symbols: hypothesis.LazySearchStrategy[sympy.Symbol]
         A strategy that generates sympy symbols. Should probably include
         constants such as 1 and -1.
-    operations: hypothesis.LazySearchStrategy[tuple[int, sympy.Operation]]
+    unary_ops: hypothesis.LazySearchStrategy[sympy.Operation]
+        A strategy that generates sympy operations that take a single argument.
+    operations: hypothesis.LazySearchStrategy[tuple[int, sympy.Operation, bool]]
         A strategy that generates operations with the associated number of
-        arguments.
-    n_steps: int
-        The number of steps to take. Larger numbers result in more complex
-        expressions.
+        arguments and whether they commute.
+    n_leaves: int
+        The number of symbols to initially generate. Larger numbers result in
+        more complex expressions.
     allow_number: bool
         Whether the produced expression can be just a number.
 
@@ -38,22 +45,34 @@ def exprs(draw, symbols, operations, n_steps, allow_number=False):
     -------
     sympy.Expression
     """
-    expr = draw(symbols)
-    for _ in range(n_steps):
+    leaves = draw(st.lists(symbols, min_size=n_leaves, max_size=n_leaves))
+
+    unary_ops = st.one_of(st.none(), unary_ops)
+    first_ops = draw(st.lists(unary_ops, min_size=len(leaves), max_size=len(leaves)))
+    leaves = [op(leave) if op else leave for op, leave in zip(first_ops, leaves)]
+ 
+    while len(leaves) != 1:
         nargs, op = draw(operations)
-        args, expr_pos = draw(st.tuples(st.lists(symbols,
-                                                 min_size=nargs-1,
-                                                 max_size=nargs-1),
-                                        st.integers(0, nargs-1)))
-        args.insert(expr_pos, expr)
-        expr = op(*args)
+        idxs = draw(st.lists(st.integers(min_value=0, max_value=len(leaves) - 1),
+                             min_size=nargs,
+                             max_size=nargs,
+                             unique=True))
+        args = [leaves[idx] for idx in idxs]
+        for idx in sorted(idxs, reverse=True):
+            del leaves[idx]
+        new_node = op(*args)
+        op = draw(unary_ops)
+        if op:
+            new_node = op(new_node)
+        leaves.append(new_node)
+    expr = leaves[0]
     assume(allow_number or not expr.is_number)
     return expr
 
 
 # TODO: ODEModels
 @st.composite
-def models(draw, dependent_vars, symbols, steps=5):
+def models(draw, dependent_vars, symbols, leaves=5):
     """
     A strategy for generating :class:`symfit.Model` s. 
 
@@ -72,16 +91,19 @@ def models(draw, dependent_vars, symbols, steps=5):
     -------
     symfit.Model
     """
-    constants = st.sampled_from([0, 1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E])
+    constants = st.sampled_from([1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E])
     symbols = st.one_of(constants, symbols)
     ops = st.sampled_from([(2, sympy.Add),
                            (2, sympy.Mul),
                            # Can cause imaginary numbers (e.g. (-4)**1.2)
-                           #(2, sympy.Pow),
-                           (1, sympy.sin),
+                           # (2, sympy.Pow),
                            # (2, sympy.log),
                           ])
-    expressions = exprs(symbols, ops, steps, False)
+    unary_ops = st.sampled_from([sympy.sin,
+                                 sympy.exp,
+                                 lambda e: -e,
+                                 lambda e: 1/e])
+    expressions = exprs(symbols, unary_ops, ops, leaves, False)
     model_dict = {}
     for dependent_var in draw(dependent_vars):
         expr = draw(expressions)
@@ -134,8 +156,7 @@ def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, m
     """
     indep_vars = model.independent_vars
     shape_groups = _same_shape_vars(model)
-    shapes = st.lists(st.integers(min_value, max_value),
-                      min_size=min_dim, max_size=max_dim).map(tuple)
+    shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value)
     grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups}))
     indep_shape = {}
     for vars, shape in grp_shape.items():
@@ -157,18 +178,14 @@ def numpy_arrays(shape, elements=NUMBERS, **kwargs):
         The strategy which should be used to generate elements of the array.
     kwargs: dict[str]
         Additional keyword arguments to be passed to
-        :func:`hypothesis.strategies.lists`
+        :func:`hypothesis.extra.numpy.arrays`
 
     Returns
     -------
     hypothesis.LazySearchStrategy[np.array]
     """
-    number_of_values = reduce(operator.mul, shape, 1)
-    return st.lists(elements,
-                    min_size=number_of_values,
-                    max_size=number_of_values,
-                    **kwargs).map(lambda l: np.reshape(l, shape))
-
+    return npst.arrays(float, shape, elements=NUMBERS, **kwargs)
+    
 
 @st.composite
 def indep_values(draw, model):
@@ -258,7 +275,7 @@ def dep_values(draw, model, indep_data, param_vals):
 
 
 @st.composite
-def model_with_data(draw, dependent_vars, symbols, steps=5):
+def model_with_data(draw, dependent_vars, symbols, leaves=5):
     """
     Strategy that generates a model with associated data using the given
     dependent variables.
@@ -270,7 +287,7 @@ def model_with_data(draw, dependent_vars, symbols, steps=5):
     symbols: hypothesis.LazySearchStrategy[symfit.Variable or symfit.Parameter]
         Strategy to use to generate independent variables and parameters for the
         model
-    steps: int
+    leaves: int
         The number of operations to perform when generating the expressions for
         every dependent component of the model. More steps means complexer
         expressions.
@@ -286,7 +303,7 @@ def model_with_data(draw, dependent_vars, symbols, steps=5):
     dict[str, np.array]
         Data for the dependent variables, as well as sigmas.
     """
-    model = draw(models(dependent_vars, symbols, steps=steps))
+    model = draw(models(dependent_vars, symbols, leaves=leaves))
     indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters')
     try:
         dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data')

From b96bc3881b118ddcb1e0e35b39ca91cadd5009cd Mon Sep 17 00:00:00 2001
From: Peter Kroon <peterkroon06+github@gmail.com>
Date: Thu, 12 Sep 2019 13:56:35 +0200
Subject: [PATCH 05/11] Add comments, add edgecases

---
 .coveragerc                 |   4 ++
 tests/hypothesis_helpers.py | 105 +++++++++++++++++++++++++++---------
 tests/test_hypothesis.py    |   9 ++--
 3 files changed, 90 insertions(+), 28 deletions(-)
 create mode 100644 .coveragerc

diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 00000000..40c40f0a
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,4 @@
+[run]
+branch = True
+omit = 
+    tests/*
diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py
index 26b7d0a1..cf34039a 100644
--- a/tests/hypothesis_helpers.py
+++ b/tests/hypothesis_helpers.py
@@ -12,13 +12,18 @@
 from hypothesis import assume, note
 
 
-NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e6, max_value=1e6)
+NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e3, max_value=1e3)
 
 
 def _is_sorted(lst, cmp=lambda x, y: x <= y):
     return all(cmp(x, y) for x, y in zip(lst[:-1], lst[1:]))
 
 
+def _is_number(obj):
+    obj = sympy.sympify(obj)
+    return obj.is_number
+
+
 @st.composite
 def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False):
     """
@@ -45,21 +50,29 @@ def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False):
     -------
     sympy.Expression
     """
-    leaves = draw(st.lists(symbols, min_size=n_leaves, max_size=n_leaves))
+    symbol_st = st.lists(symbols, min_size=n_leaves, max_size=n_leaves)
+    # Assert we draw at least one not numerical symbol, unless we allow numbers
+    leaves = draw(symbol_st.filter(
+        lambda l: allow_number or any(not _is_number(val) for val in l))
+    )
 
+    # TODO: allow chaining of unary ops
     unary_ops = st.one_of(st.none(), unary_ops)
     first_ops = draw(st.lists(unary_ops, min_size=len(leaves), max_size=len(leaves)))
-    leaves = [op(leave) if op else leave for op, leave in zip(first_ops, leaves)]
+    leaves = [op(leaf) if op else leaf for op, leaf in zip(first_ops, leaves)]
  
     while len(leaves) != 1:
-        nargs, op = draw(operations)
-        idxs = draw(st.lists(st.integers(min_value=0, max_value=len(leaves) - 1),
-                             min_size=nargs,
-                             max_size=nargs,
-                             unique=True))
+        nargs, op, comm = draw(operations)
+        # Draw indices for the leaves that get combined by the operation.
+        idxs_st = st.lists(st.integers(min_value=0, max_value=len(leaves) - 1),
+                           min_size=nargs,
+                           max_size=nargs,
+                           unique=True)
+        # ... and if the operation commutes, make sure the indices are sorted.
+        idxs = draw(idxs_st.filter(lambda l: not comm or _is_sorted(l)))
         args = [leaves[idx] for idx in idxs]
-        for idx in sorted(idxs, reverse=True):
-            del leaves[idx]
+        # Remove the drawn symbols from `leaves`
+        leaves = [leaf for idx, leaf in enumerate(leaves) if idx not in idxs]
         new_node = op(*args)
         op = draw(unary_ops)
         if op:
@@ -93,19 +106,29 @@ def models(draw, dependent_vars, symbols, leaves=5):
     """
     constants = st.sampled_from([1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E])
     symbols = st.one_of(constants, symbols)
-    ops = st.sampled_from([(2, sympy.Add),
-                           (2, sympy.Mul),
+    ops = st.sampled_from([(2, sympy.Add, True),
+                           (2, sympy.Mul, True),
                            # Can cause imaginary numbers (e.g. (-4)**1.2)
-                           # (2, sympy.Pow),
-                           # (2, sympy.log),
+                           # (2, sympy.Pow, False),
+                           # (2, sympy.log, False),
                           ])
     unary_ops = st.sampled_from([sympy.sin,
                                  sympy.exp,
                                  lambda e: -e,
                                  lambda e: 1/e])
-    expressions = exprs(symbols, unary_ops, ops, leaves, False)
     model_dict = {}
-    for dependent_var in draw(dependent_vars):
+    dep_vars = draw(dependent_vars)
+    for idx, dependent_var in enumerate(dep_vars):
+        # Dependent vars can depend on other dependent vars: {y1: k, y2: y1+1}.
+        # But this can cause cyclical dependency ({y1: y2, y2: y1}), so limit to
+        # idx < jdx. Note that this also prevents {y1: y1}.
+        other_vars = [dep_var for jdx, dep_var in enumerate(dep_vars) if idx < jdx]
+        if other_vars:
+            # Can't sample_from and empty list
+            symbs = st.one_of(symbols, st.sampled_from(other_vars))
+        else:
+            symbs = symbols
+        expressions = exprs(symbs, unary_ops, ops, leaves, False)
         expr = draw(expressions)
         model_dict[dependent_var] = expr
     model = Model(model_dict)
@@ -114,12 +137,20 @@ def models(draw, dependent_vars, symbols, leaves=5):
 
 def _same_shape_vars(model):
     """
-    Helper function that returns a set of frozensets of all independent
-    variables in model that should have broadcast compatible shapes.
+    Helper function that returns a set of frozensets of all variables in model
+    that should have broadcast compatible shapes.
     """
     indep_vars = model.independent_vars
-    con_map = model.connectivity_mapping
+    con_map = model.connectivity_mapping.copy()
     shape_groups = {var: {var} for var in indep_vars}
+
+    # Deal with interdependent vars
+    for k, group in con_map.items():
+        for var in list(group):
+            if var in con_map:
+                con_map[k] = group | con_map[var]
+                con_map[k] -= {var}
+
     for group in con_map.values():
         comb = {var for var in group if var in indep_vars}
         shape_group = set()
@@ -155,13 +186,18 @@ def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, m
     dict[str]: tuple[int, ...]
     """
     indep_vars = model.independent_vars
-    shape_groups = _same_shape_vars(model)
+    shape_groups = map(tuple, _same_shape_vars(model))
     shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value)
     grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups}))
     indep_shape = {}
     for vars, shape in grp_shape.items():
-        for var in vars:
+        # Not all shapes have to be the same size, but they should be
+        # broadcastable. Since it's a pain to find a shape A that is compatible
+        # with both B, C and D we'll just say that the last shape can be
+        # different.
+        for var in vars[:-1]:
             indep_shape[var] = shape
+        indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, min_dim, max_dim, min_value, max_value)
     return indep_shape
 
 
@@ -262,13 +298,19 @@ def dep_values(draw, model, indep_data, param_vals):
         sigmas.
     """
     dep_data = model(**indep_data, **param_vals)._asdict()
-    shapes = {var: data.shape for var, data in dep_data.items()}
+    # You can't provide data for interdependent variables.
+    dep_data = {
+        var: data
+        for var, data in dep_data.items()
+        if var in model.dependent_vars
+    }
+    shapes = {var: data.shape for var, data in dep_data.items() if data is not None}
     sigmas = {
         'sigma_{}'.format(str(var)): st.one_of(
             st.none(),
             st.floats(allow_nan=False, allow_infinity=False, min_value=0, max_value=5),
-            numpy_arrays(vals.shape)
-        ) for var, vals in dep_data.items()
+            numpy_arrays(shape)
+        ) for var, shape in shapes.items()
     }
     sigmas = draw(st.fixed_dictionaries(dict(**sigmas)))
     return dict(**sigmas, **key2str(dep_data))
@@ -304,9 +346,22 @@ def model_with_data(draw, dependent_vars, symbols, leaves=5):
         Data for the dependent variables, as well as sigmas.
     """
     model = draw(models(dependent_vars, symbols, leaves=leaves))
-    indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters')
+    indep_data, param_data = draw(st.tuples(indep_values(model), 
+                                            param_values(model)),
+                                  label='independent data, parameters')
     try:
         dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data')
     except ArithmeticError:
         assume(False)  # Some model + data that causes numerical issues
+    for data in dep_data.values():
+        # In addition, let's make sure all data is finite and real.
+        assume(data is None or (np.all(np.isfinite(data)) and np.all(np.isreal(data))))
     return model, param_data, indep_data, dep_data
+
+
+if __name__ == '__main__':
+    DEPENDENT_VARS = st.lists(st.sampled_from(variables('Y1, Y2')), unique=True, min_size=1)
+    SYMBOLS = st.sampled_from(parameters('a, b, c') + variables('x, y, z'))
+
+    for _ in range(5):
+        print(list(map(str, model_with_data(DEPENDENT_VARS, SYMBOLS).example())))
diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py
index cb2fe1d5..baefb5bc 100644
--- a/tests/test_hypothesis.py
+++ b/tests/test_hypothesis.py
@@ -1,8 +1,9 @@
 from .hypothesis_helpers import model_with_data
 from hypothesis import strategies as st
-from hypothesis import given, note, assume, settings
+from hypothesis import given, note, assume, settings, HealthCheck
 
 import numpy as np
+from pytest import approx
 
 from symfit import Fit, variables, parameters
 
@@ -17,10 +18,10 @@ def _cmp_result(reference, found):
     for k in reference:
         ref_vals.append(reference[k])
         found_vals.append(found[k])
-    assert np.allclose(ref_vals, found_vals)
+    assert found_vals == approx(ref_vals)
 
 
-@settings(max_examples=500)
+@settings(max_examples=100, deadline=None, suppress_health_check=[HealthCheck.too_slow])
 @given(model_with_data(DEPENDENT_VARS, SYMBOLS))
 def test_simple(model_with_data):
     model, param_vals, indep_vals, dep_vals = model_with_data
@@ -30,4 +31,6 @@ def test_simple(model_with_data):
     fit = Fit(model, **indep_vals, **dep_vals)
     result = fit.execute()
     note(str(result))
+    note(str(model(**indep_vals, **result.params)))
+    note(str(fit.minimizer))
     _cmp_result(param_vals, result.params)

From 860e89e60a870fe2228de3662a0060f81a8fec50 Mon Sep 17 00:00:00 2001
From: Peter Kroon <peterkroon06+github@gmail.com>
Date: Fri, 20 Sep 2019 16:28:38 +0200
Subject: [PATCH 06/11] Add polynomial models, finish simple answer=guess test

---
 tests/hypothesis_helpers.py | 290 +++++++++++++++++++++++++-----------
 tests/test_hypothesis.py    |  97 ++++++++++--
 2 files changed, 289 insertions(+), 98 deletions(-)

diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py
index cf34039a..e982ec75 100644
--- a/tests/hypothesis_helpers.py
+++ b/tests/hypothesis_helpers.py
@@ -1,9 +1,6 @@
-from functools import reduce
-import operator
-
 import numpy as np
 
-from symfit import Model, parameters, variables, Variable
+from symfit import Model, variables
 from symfit.core.support import key2str
 import sympy
 
@@ -12,7 +9,13 @@
 from hypothesis import assume, note
 
 
-NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e3, max_value=1e3)
+MAX_VAL = 1e1
+MIN_VAL = 1e-1
+FLOATS = st.floats(allow_nan=False, allow_infinity=False,
+                   min_value=MIN_VAL, max_value=MAX_VAL)
+# Some hoops to jump through to limit the floats to MAX_VAL > abs(f) > MIN_VAL,
+# but still allow f=0
+NUMBERS = st.one_of(FLOATS.map(lambda f: -f), st.just(0), FLOATS)
 
 
 def _is_sorted(lst, cmp=lambda x, y: x <= y):
@@ -25,16 +28,22 @@ def _is_number(obj):
 
 
 @st.composite
-def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False):
+def expressions(draw, variables, parameters, constants, unary_ops, operations,
+                n_leaves, allow_number=False):
     """
     A strategy to generate arbitrary :mod:`Sympy` expressions, based on
     strategies for generating `symbols` and `operations`.
 
     Parameters
     ----------
-    symbols: hypothesis.LazySearchStrategy[sympy.Symbol]
-        A strategy that generates sympy symbols. Should probably include
-        constants such as 1 and -1.
+    variables: hypothesis.LazySearchStrategy[symfit.Variable]
+        A strategy that generates Variables. These variables will be
+        used as independent variables.
+    parameters: hypothesis.LazySearchStrategy[symfit.Parameter]
+        A strategy that generates Parameters.
+    constants: hypothesis.LazySearchStrategy[number]
+        A strategy that generates numbers. Should probably contain at least 1
+        and -1.
     unary_ops: hypothesis.LazySearchStrategy[sympy.Operation]
         A strategy that generates sympy operations that take a single argument.
     operations: hypothesis.LazySearchStrategy[tuple[int, sympy.Operation, bool]]
@@ -50,17 +59,24 @@ def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False):
     -------
     sympy.Expression
     """
-    symbol_st = st.lists(symbols, min_size=n_leaves, max_size=n_leaves)
+    if not allow_number:
+        first = st.one_of(variables, parameters)
+        rest = st.lists(st.one_of(constants, variables, parameters),
+                        min_size=n_leaves - 1, max_size=n_leaves - 1)
+        symbol_st = st.tuples(first, rest).map(lambda t: [t[0], *t[1]])
+    else:
+        symbol_st = st.lists(st.one_of(constants, variables, parameters),
+                             min_size=n_leaves, max_size=n_leaves)
     # Assert we draw at least one not numerical symbol, unless we allow numbers
-    leaves = draw(symbol_st.filter(
-        lambda l: allow_number or any(not _is_number(val) for val in l))
-    )
+    leaves = draw(symbol_st)
 
     # TODO: allow chaining of unary ops
     unary_ops = st.one_of(st.none(), unary_ops)
-    first_ops = draw(st.lists(unary_ops, min_size=len(leaves), max_size=len(leaves)))
+    first_ops = draw(st.lists(unary_ops,
+                              min_size=len(leaves),
+                              max_size=len(leaves)))
     leaves = [op(leaf) if op else leaf for op, leaf in zip(first_ops, leaves)]
- 
+
     while len(leaves) != 1:
         nargs, op, comm = draw(operations)
         # Draw indices for the leaves that get combined by the operation.
@@ -83,53 +99,147 @@ def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False):
     return expr
 
 
-# TODO: ODEModels
-@st.composite
-def models(draw, dependent_vars, symbols, leaves=5):
+def expression_strategy(variables, parameters, n_leaves=5):
     """
-    A strategy for generating :class:`symfit.Model` s. 
+    Helper function that calls :func:`expressions` with sane
+    defaults.
 
     Parameters
     ----------
-    dependent_vars: hypothesis.LazySearchStrategy[list[symfit.Variable]]
-        A strategy that generates lists of Variables. These variables will be
-        used as dependent variables.
-    symbols: hypothesis.LazySearchStrategy[symfit.Parameter or symfit.Variable]
-        A strategy that generates either a parameter or independent variable.
-    steps: int
-        The number of operations per expression. Larger numbers result is more
-        complex models.
+    variables: hypothesis.LazySearchStrategy[symfit.Variable]
+        A strategy that generates Variables. These variables will be
+        used as independent variables.
+    parameters: hypothesis.LazySearchStrategy[symfit.Parameter]
+        A strategy that generates Parameters.
+    n_leaves: int
+        The number of symbols to initially generate. Larger numbers result in
+        more complex expressions.
 
     Returns
     -------
-    symfit.Model
+    hypothesis.LazySearchStrategy[sympy.Expr]
     """
-    constants = st.sampled_from([1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E])
-    symbols = st.one_of(constants, symbols)
+    constants = st.sampled_from([1, -1, 2, 3,
+                                 sympy.Rational(1, 2), sympy.pi, sympy.E])
     ops = st.sampled_from([(2, sympy.Add, True),
                            (2, sympy.Mul, True),
                            # Can cause imaginary numbers (e.g. (-4)**1.2)
                            # (2, sympy.Pow, False),
                            # (2, sympy.log, False),
-                          ])
+                           ])
     unary_ops = st.sampled_from([sympy.sin,
                                  sympy.exp,
                                  lambda e: -e,
                                  lambda e: 1/e])
+    return expressions(variables=variables, parameters=parameters,
+                       constants=constants, unary_ops=unary_ops,
+                       operations=ops, n_leaves=n_leaves)
+
+
+@st.composite
+def polynomial_expressions(draw, variables, parameters, constants,
+                           max_degree=10, max_terms=10):
+    """
+    Generates polynomial expressions that are linear in `parameters` and
+    `constants`. The produced polynomials will have at most `max_terms` terms,
+    and the degree will be between 1 and `max_degree`.
+
+    Parameters
+    ----------
+    variables: hypothesis.LazySearchStrategy[symfit.Variable]
+        A strategy that generates Variables. These variables will be
+        used as independent variables.
+    parameters: hypothesis.LazySearchStrategy[symfit.Parameter]
+        A strategy that generates Parameters.
+    constants: hypothesis.LazySearchStrategy[number]
+        A strategy that generates constants.
+    max_degree: int
+        Maximum degree of the produced polynomial
+    max_terms: int
+        Maximum number of terms of the produced polynomial
+
+    Returns
+    -------
+    hypothesis.LazySearchStrategy[sympy.Expr]
+    """
+    indep_vars = draw(st.lists(variables, unique=True, min_size=1))
+    orders = st.dictionaries(
+        keys=st.tuples(*[st.integers(min_value=0, max_value=max_degree)]*len(indep_vars)).filter(lambda t: 1 <= sum(t) <= max_degree),
+        values=st.one_of(parameters, constants),
+        min_size=1,
+        max_size=10
+    )
+    orders = draw(orders)
+    expr = sympy.Poly(orders, *indep_vars).as_expr()
+    return expr
+
+
+def polynomial_strategy(variables, parameters, max_degree=5, max_terms=10):
+    """
+    Helper function that calls :func:`polynomial_expressions` with sane
+    defaults.
+
+    Parameters
+    ----------
+    variables: hypothesis.LazySearchStrategy[symfit.Variable]
+        A strategy that generates Variables. These variables will be
+        used as independent variables.
+    parameters: hypothesis.LazySearchStrategy[symfit.Parameter]
+        A strategy that generates Parameters.
+    max_degree: int
+        Maximum degree of the produced polynomial
+    max_terms: int
+        Maximum number of terms of the produced polynomial
+
+    Returns
+    -------
+    hypothesis.LazySearchStrategy[sympy.Expr]
+    """
+    return polynomial_expressions(variables, parameters, st.just(1),
+                                  max_degree=max_degree, max_terms=max_terms)
+
+
+# TODO: ODEModels
+@st.composite
+def models(draw, dependent_vars, independent_vars, parameters, expressions_st,
+           allow_interdependent=True):
+    """
+    A strategy for generating :class:`symfit.Model` s. 
+
+    Parameters
+    ----------
+    dependent_vars: hypothesis.LazySearchStrategy[symfit.Variable]
+        A strategy that generates Variables. These variables will be
+        used as dependent variables.
+    independent_vars: hypothesis.LazySearchStrategy[symfit.Variable]
+        A strategy that generates Variables. These variables will be
+        used as independent variables.
+    parameters: hypothesis.LazySearchStrategy[symfit.Parameter]
+        A strategy that generates Parameters.
+    expressions_st: callable[(hypothesis.LazySearchStrategy[symfit.Variable],
+                              hypothesis.LazySearchStrategy[symfit.Parameter]),
+                             hypothesis.LazySearchStrategy[sympy.Expr]]
+        A function that when given variables and parameters, produces a strategy
+        that produces expressions.
+
+    Returns
+    -------
+    symfit.Model
+    """
     model_dict = {}
-    dep_vars = draw(dependent_vars)
+    dep_vars = draw(st.lists(dependent_vars, unique=True, min_size=1))
     for idx, dependent_var in enumerate(dep_vars):
         # Dependent vars can depend on other dependent vars: {y1: k, y2: y1+1}.
-        # But this can cause cyclical dependency ({y1: y2, y2: y1}), so limit to
-        # idx < jdx. Note that this also prevents {y1: y1}.
+        # But this can cause cyclical dependency ({y1: y2, y2: y1}), so limit
+        # to idx < jdx. Note that this also prevents {y1: y1}.
         other_vars = [dep_var for jdx, dep_var in enumerate(dep_vars) if idx < jdx]
-        if other_vars:
+        if other_vars and allow_interdependent:
             # Can't sample_from and empty list
-            symbs = st.one_of(symbols, st.sampled_from(other_vars))
+            indep_variables = st.one_of(independent_vars, st.sampled_from(other_vars))
         else:
-            symbs = symbols
-        expressions = exprs(symbs, unary_ops, ops, leaves, False)
-        expr = draw(expressions)
+            indep_variables = independent_vars
+        expression_st = expressions_st(indep_variables, parameters)
+        expr = draw(expression_st)
         model_dict[dependent_var] = expr
     model = Model(model_dict)
     return model
@@ -163,7 +273,8 @@ def _same_shape_vars(model):
 
 
 @st.composite
-def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, max_dim=3):
+def independent_data_shapes(draw, model, min_num_values=0, min_value=1,
+                            max_value=10, min_dim=1, max_dim=3):
     """
     Strategy that generates shapes for the independent variables of `model` such
     that variables that need to be broadcast compatible will be the same shape.
@@ -187,7 +298,11 @@ def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, m
     """
     indep_vars = model.independent_vars
     shape_groups = map(tuple, _same_shape_vars(model))
-    shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value)
+    # Insist on a minimal number of data points to prevent underspecified models
+    # This is probably overly strict, since it enforces the same minimal shape
+    # on *all* components/variables. Instead it would probably be better to
+    # do this per element of model.connectivity_mapping
+    shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value).filter(lambda shape: np.prod(shape) >= min_num_values)
     grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups}))
     indep_shape = {}
     for vars, shape in grp_shape.items():
@@ -197,32 +312,13 @@ def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, m
         # different.
         for var in vars[:-1]:
             indep_shape[var] = shape
-        indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, min_dim, max_dim, min_value, max_value)
+        # Although technically possible, it can cause a bunch of numerical
+        # issues, and it's meaning is debatable at best.
+        # indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, min_dim, max_dim, min_value, max_value)
+        indep_shape[vars[-1]] = shape
     return indep_shape
 
 
-def numpy_arrays(shape, elements=NUMBERS, **kwargs):
-    """
-    Basic strategy for generating numpy arrays of the specified `shape`, with
-    the specified `elements`.
-
-    Parameters
-    ----------
-    shape: tuple[int, ...]
-        The shape the array should get.
-    elements: hypothesis.LazySearchStrategy
-        The strategy which should be used to generate elements of the array.
-    kwargs: dict[str]
-        Additional keyword arguments to be passed to
-        :func:`hypothesis.extra.numpy.arrays`
-
-    Returns
-    -------
-    hypothesis.LazySearchStrategy[np.array]
-    """
-    return npst.arrays(float, shape, elements=NUMBERS, **kwargs)
-    
-
 @st.composite
 def indep_values(draw, model):
     """
@@ -242,11 +338,12 @@ def indep_values(draw, model):
     dep_vars = model.dependent_vars
     params = model.params
 
-    indep_shape = draw(independent_data_shapes(model))
+    min_num_values = len(params) + 1
+    indep_shape = draw(independent_data_shapes(model, min_num_values))
 
     indep_vals = {}
     for var, shape in indep_shape.items():
-        data = numpy_arrays(shape, unique=True).map(np.sort)
+        data = npst.arrays(float, shape, elements=NUMBERS, unique=True).map(np.sort)
         indep_vals[str(var)] = data
 
     data = draw(st.fixed_dictionaries(indep_vals))
@@ -272,6 +369,24 @@ def param_values(draw, model):
     data = draw(param_vals)
     for param in model.params:
         param.value = data[str(param)]
+        lower, upper = draw(
+            st.lists(
+                st.one_of(
+                    st.none(),
+                    st.floats(0, 2*abs(param.value), allow_infinity=False, allow_nan=False)
+                ),
+                min_size=2,
+                max_size=2
+            )
+        )
+        if lower is not None:
+            param.min = param.value - lower
+        else:
+            param.min = None
+        if upper is not None:
+            param.max = param.value + upper
+        else:
+            param.max = None
     return data
 
 
@@ -298,7 +413,12 @@ def dep_values(draw, model, indep_data, param_vals):
         sigmas.
     """
     dep_data = model(**indep_data, **param_vals)._asdict()
-    # You can't provide data for interdependent variables.
+    # for var, data in dep_data.items():
+    #     if var in model.dependent_vars:
+    #         assume(np.all(((np.abs(data) >= MIN_VAL) | (data == 0)) &
+    #                       (np.abs(data) <= MAX_VAL)
+    #                       ))
+    # You can't provide data for interdependent variables. #270
     dep_data = {
         var: data
         for var, data in dep_data.items()
@@ -308,8 +428,8 @@ def dep_values(draw, model, indep_data, param_vals):
     sigmas = {
         'sigma_{}'.format(str(var)): st.one_of(
             st.none(),
-            st.floats(allow_nan=False, allow_infinity=False, min_value=0, max_value=5),
-            numpy_arrays(shape)
+            st.floats(allow_nan=False, allow_infinity=False, min_value=1e-6, max_value=5),
+            npst.arrays(float, shape, elements=st.floats(allow_nan=False, allow_infinity=False, min_value=1e-6, max_value=5))
         ) for var, shape in shapes.items()
     }
     sigmas = draw(st.fixed_dictionaries(dict(**sigmas)))
@@ -317,22 +437,21 @@ def dep_values(draw, model, indep_data, param_vals):
 
 
 @st.composite
-def model_with_data(draw, dependent_vars, symbols, leaves=5):
+def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_strategy, **kwargs):
     """
     Strategy that generates a model with associated data using the given
     dependent variables.
 
     Parameters
     ----------
-    dependent_vars: hypothesis.LazySearchStrategy[list[symfit.Variable]]
+    dependent_vars: hypothesis.LazySearchStrategy[symfit.Variable]
         Strategy to use to generate dependent variables for the model.
-    symbols: hypothesis.LazySearchStrategy[symfit.Variable or symfit.Parameter]
-        Strategy to use to generate independent variables and parameters for the
-        model
-    leaves: int
-        The number of operations to perform when generating the expressions for
-        every dependent component of the model. More steps means complexer
-        expressions.
+    independent_vars: hypothesis.LazySearchStrategy[symfit.Variable]
+        Strategy to use to generate independent variables for the model.
+    parameters: hypothesis.LazySearchStrategy[symfit.Parameter]
+        Strategy to use to generate independent variables for the model.
+    expr_strategy: hypothesis.LazySearchStrategy[sympy.Expr]
+        Strategy to use to generate expressions for the model.
 
     Returns
     -------
@@ -345,7 +464,7 @@ def model_with_data(draw, dependent_vars, symbols, leaves=5):
     dict[str, np.array]
         Data for the dependent variables, as well as sigmas.
     """
-    model = draw(models(dependent_vars, symbols, leaves=leaves))
+    model = draw(models(dependent_vars, independent_vars, parameters, expr_strategy, **kwargs))
     indep_data, param_data = draw(st.tuples(indep_values(model), 
                                             param_values(model)),
                                   label='independent data, parameters')
@@ -355,13 +474,16 @@ def model_with_data(draw, dependent_vars, symbols, leaves=5):
         assume(False)  # Some model + data that causes numerical issues
     for data in dep_data.values():
         # In addition, let's make sure all data is finite and real.
-        assume(data is None or (np.all(np.isfinite(data)) and np.all(np.isreal(data))))
+        assume(data is None or (np.all(np.isfinite(data) & np.isreal(data))))
     return model, param_data, indep_data, dep_data
 
 
 if __name__ == '__main__':
-    DEPENDENT_VARS = st.lists(st.sampled_from(variables('Y1, Y2')), unique=True, min_size=1)
-    SYMBOLS = st.sampled_from(parameters('a, b, c') + variables('x, y, z'))
-
+    DEPENDENT_VARS = st.sampled_from(variables('Y1, Y2'))
+    INDEPENDENT_VARS = st.sampled_from(variables('x, y, z'))
+    PARAMETERS = st.sampled_from(variables('a, b, c'))
     for _ in range(5):
-        print(list(map(str, model_with_data(DEPENDENT_VARS, SYMBOLS).example())))
+        print(polynomial_expressions(INDEPENDENT_VARS, PARAMETERS, st.just(1)).example())
+
+    # for _ in range(5):
+    #     print(list(map(str, model_with_data(DEPENDENT_VARS, INDEPENDENT_VARS, PARAMETERS).example())))
diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py
index baefb5bc..9d6434b1 100644
--- a/tests/test_hypothesis.py
+++ b/tests/test_hypothesis.py
@@ -1,28 +1,43 @@
-from .hypothesis_helpers import model_with_data
+from .hypothesis_helpers import model_with_data, expression_strategy, polynomial_strategy
 from hypothesis import strategies as st
-from hypothesis import given, note, assume, settings, HealthCheck
+from hypothesis import given, note, assume, settings, HealthCheck, event
 
 import numpy as np
 from pytest import approx
 
 from symfit import Fit, variables, parameters
+from symfit.core.minimizers import TrustConstr, SLSQP, LBFGSB
 
 
-DEPENDENT_VARS = st.lists(st.sampled_from(variables('Y1, Y2')), unique=True, min_size=1)
-SYMBOLS = st.sampled_from(parameters('a, b, c') + variables('x, y, z'))
+DEPENDENT_VARS = st.sampled_from(variables('Y1, Y2'))
+INDEPENDENT_VARS = st.sampled_from(variables('x, y, z'))
+PARAMETERS = st.sampled_from(parameters('a, b, c'))
 
-def _cmp_result(reference, found):
-    assert reference.keys() == found.keys()
-    ref_vals = []
-    found_vals = []
-    for k in reference:
-        ref_vals.append(reference[k])
-        found_vals.append(found[k])
-    assert found_vals == approx(ref_vals)
+def _cmp_result(reference, found, abs=None, rel=None):
+    reference = reference.copy()
+    for k, v in reference.items():
+        reference[k] = approx(v, abs=abs, rel=rel)
+    assert found == reference
 
+    # assert reference.keys() == found.keys()
+    # ref_vals = []
+    # found_vals = []
+    # for k in reference:
+    #     ref_vals.append(reference[k])
+    #     found_vals.append(found[k])
+    # return found_vals == approx(ref_vals, abs=abs, rel=rel)
 
-@settings(max_examples=100, deadline=None, suppress_health_check=[HealthCheck.too_slow])
-@given(model_with_data(DEPENDENT_VARS, SYMBOLS))
+
+def expressions(vars, pars):
+    return st.one_of(polynomial_strategy(vars, pars), expression_strategy(vars, pars))
+
+
+@settings(max_examples=50, deadline=None,
+          suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much])
+@given(model_with_data(DEPENDENT_VARS,
+                       INDEPENDENT_VARS,
+                       PARAMETERS,
+                       expression_strategy))
 def test_simple(model_with_data):
     model, param_vals, indep_vals, dep_vals = model_with_data
     note(str(model))
@@ -34,3 +49,57 @@ def test_simple(model_with_data):
     note(str(model(**indep_vals, **result.params)))
     note(str(fit.minimizer))
     _cmp_result(param_vals, result.params)
+
+
+@settings(max_examples=50, deadline=None,
+          suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much])
+@given(st.data())
+def test_linear_models(data):
+    # minimizer = data.draw(st.sampled_from([None, LBFGSB, TrustConstr, SLSQP]), label='minimizer')
+    minimizer = None
+    model, param_vals, indep_vals, dep_vals = data.draw(
+        model_with_data(DEPENDENT_VARS,
+                        INDEPENDENT_VARS,
+                        PARAMETERS,
+                        polynomial_strategy,
+                        allow_interdependent=False)
+    )
+    note(str(model))
+    note(param_vals)
+    note(indep_vals)
+    note(dep_vals)
+
+    assume(model.params)  # Stacking error in model.eval_jacobian
+    init_vals = param_vals.copy()
+    for param in model.params:
+        # param.value = data.draw(st.floats(param.min, param.max, allow_nan=False, allow_infinity=False))
+        param.value = data.draw(st.floats(-1e6, 1e6, allow_nan=False, allow_infinity=False))
+        init_vals[str(param)] = param.value
+        note((param.name, param.min, param.value, param.max))
+        # assert param.min is None or param.min <= param_vals[str(param)]
+        # assert param.max is None or param_vals[str(param)] <= param.max
+
+    fit = Fit(model, **indep_vals, **dep_vals, minimizer=minimizer)
+    obj_jac = fit.objective.eval_jacobian(**init_vals)
+    note(str(obj_jac))
+    note(str(fit.objective.eval_hessian(**init_vals)))
+    # Exclude some edge cases that won't do anything due to numerical precision
+    # This also catches cases like y=a*x with y=0 and x=0
+    assume(np.any(np.abs(obj_jac) > 1e-3))
+
+    result = fit.execute()
+    # "Minimization stopped due to precision loss" and variants
+    # assume(result.infodict['success'])
+    note(str(result.covariance_matrix))
+    note(str(result))
+    note(str(model(**indep_vals, **result.params)))
+    note(str(fit.objective(**result.params)))
+    note(str(fit.objective.eval_jacobian(**result.params)))
+    note(str(fit.objective.eval_hessian(**result.params)))
+    note(str(fit.minimizer))
+    # If the R2 isn't one, check whether the parameters are ~equal.
+    if result.r_squared != approx(1, abs=5e-3):
+        event('R squared != 1')
+        _cmp_result(param_vals, result.params, rel=1e-3, abs=1e-2)
+    else:
+        event('R squared == 1')
\ No newline at end of file

From 701aa67aa6f705815b93d7aa4e274335ec52e2ab Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 21 Oct 2022 15:13:41 +0200
Subject: [PATCH 07/11] Streamline hypothesis tests

---
 tests/hypothesis_helpers.py | 173 ++++++++++++++++++++++--------------
 tests/test_hypothesis.py    |  71 ++++++++-------
 2 files changed, 146 insertions(+), 98 deletions(-)

diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py
index e982ec75..cb78be6b 100644
--- a/tests/hypothesis_helpers.py
+++ b/tests/hypothesis_helpers.py
@@ -6,20 +6,17 @@
 
 from hypothesis import strategies as st
 from hypothesis.extra import numpy as npst
-from hypothesis import assume, note
+from hypothesis import assume
 
 
-MAX_VAL = 1e1
-MIN_VAL = 1e-1
+MAX_VAL = 2**10
+MIN_VAL = 2**-10
 FLOATS = st.floats(allow_nan=False, allow_infinity=False,
-                   min_value=MIN_VAL, max_value=MAX_VAL)
+                   min_value=MIN_VAL, max_value=MAX_VAL,
+                   allow_subnormal=False, width=32)
 # Some hoops to jump through to limit the floats to MAX_VAL > abs(f) > MIN_VAL,
 # but still allow f=0
-NUMBERS = st.one_of(FLOATS.map(lambda f: -f), st.just(0), FLOATS)
-
-
-def _is_sorted(lst, cmp=lambda x, y: x <= y):
-    return all(cmp(x, y) for x, y in zip(lst[:-1], lst[1:]))
+NUMBERS = st.one_of(st.just(0), FLOATS, FLOATS.map(lambda f: -f))
 
 
 def _is_number(obj):
@@ -59,6 +56,13 @@ def expressions(draw, variables, parameters, constants, unary_ops, operations,
     -------
     sympy.Expression
     """
+    # This will work by generating an expression tree, except that we
+    # immediately collapse it: Generate the leaves, and combine those by
+    # applying operations to them. For extra flavour, apply some unary
+    # operations along the way.
+
+    # Make sure we draw at least one not numerical symbol, unless we allow
+    # an expression consisting of just a number
     if not allow_number:
         first = st.one_of(variables, parameters)
         rest = st.lists(st.one_of(constants, variables, parameters),
@@ -67,15 +71,16 @@ def expressions(draw, variables, parameters, constants, unary_ops, operations,
     else:
         symbol_st = st.lists(st.one_of(constants, variables, parameters),
                              min_size=n_leaves, max_size=n_leaves)
-    # Assert we draw at least one not numerical symbol, unless we allow numbers
     leaves = draw(symbol_st)
 
-    # TODO: allow chaining of unary ops
-    unary_ops = st.one_of(st.none(), unary_ops)
+    unary_ops = st.lists(unary_ops, min_size=0, max_size=3)
     first_ops = draw(st.lists(unary_ops,
                               min_size=len(leaves),
                               max_size=len(leaves)))
-    leaves = [op(leaf) if op else leaf for op, leaf in zip(first_ops, leaves)]
+
+    for idx, ops in enumerate(first_ops):
+        for op in ops:
+            leaves[idx] = op(leaves[idx])
 
     while len(leaves) != 1:
         nargs, op, comm = draw(operations)
@@ -85,13 +90,14 @@ def expressions(draw, variables, parameters, constants, unary_ops, operations,
                            max_size=nargs,
                            unique=True)
         # ... and if the operation commutes, make sure the indices are sorted.
-        idxs = draw(idxs_st.filter(lambda l: not comm or _is_sorted(l)))
+        idxs = draw(idxs_st.map(lambda x: sorted(x) if comm else x))
+        # idxs = draw(idxs_st)
         args = [leaves[idx] for idx in idxs]
         # Remove the drawn symbols from `leaves`
         leaves = [leaf for idx, leaf in enumerate(leaves) if idx not in idxs]
         new_node = op(*args)
-        op = draw(unary_ops)
-        if op:
+        ops = draw(unary_ops)
+        for op in ops:
             new_node = op(new_node)
         leaves.append(new_node)
     expr = leaves[0]
@@ -127,18 +133,19 @@ def expression_strategy(variables, parameters, n_leaves=5):
                            # (2, sympy.Pow, False),
                            # (2, sympy.log, False),
                            ])
-    unary_ops = st.sampled_from([sympy.sin,
-                                 sympy.exp,
-                                 lambda e: -e,
-                                 lambda e: 1/e])
-    return expressions(variables=variables, parameters=parameters,
+    unary_ops = st.sampled_from([lambda e: -e,
+                                 lambda e: 1/e,
+                                 sympy.sin,
+                                 sympy.exp])
+    expr = expressions(variables=variables, parameters=parameters,
                        constants=constants, unary_ops=unary_ops,
                        operations=ops, n_leaves=n_leaves)
+    return expr
 
 
 @st.composite
 def polynomial_expressions(draw, variables, parameters, constants,
-                           max_degree=10, max_terms=10):
+                           max_degree=10, max_terms=5):
     """
     Generates polynomial expressions that are linear in `parameters` and
     `constants`. The produced polynomials will have at most `max_terms` terms,
@@ -164,17 +171,19 @@ def polynomial_expressions(draw, variables, parameters, constants,
     """
     indep_vars = draw(st.lists(variables, unique=True, min_size=1))
     orders = st.dictionaries(
+        # Dict keys are the exponents for the variables, values the coefficient
+        # https://docs.sympy.org/latest/modules/polys/domainsintro.html#sparse-polynomial-representation
         keys=st.tuples(*[st.integers(min_value=0, max_value=max_degree)]*len(indep_vars)).filter(lambda t: 1 <= sum(t) <= max_degree),
-        values=st.one_of(parameters, constants),
+        values=st.one_of(constants, parameters),
         min_size=1,
-        max_size=10
+        max_size=max_terms
     )
     orders = draw(orders)
     expr = sympy.Poly(orders, *indep_vars).as_expr()
     return expr
 
 
-def polynomial_strategy(variables, parameters, max_degree=5, max_terms=10):
+def polynomial_strategy(variables, parameters, max_degree=5, max_terms=5):
     """
     Helper function that calls :func:`polynomial_expressions` with sane
     defaults.
@@ -227,19 +236,22 @@ def models(draw, dependent_vars, independent_vars, parameters, expressions_st,
     symfit.Model
     """
     model_dict = {}
-    dep_vars = draw(st.lists(dependent_vars, unique=True, min_size=1))
+    dep_vars = draw(st.lists(dependent_vars, unique=True, min_size=1, max_size=3))
     for idx, dependent_var in enumerate(dep_vars):
         # Dependent vars can depend on other dependent vars: {y1: k, y2: y1+1}.
         # But this can cause cyclical dependency ({y1: y2, y2: y1}), so limit
         # to idx < jdx. Note that this also prevents {y1: y1}.
-        other_vars = [dep_var for jdx, dep_var in enumerate(dep_vars) if idx < jdx]
+        other_vars = dep_vars[:idx]
         if other_vars and allow_interdependent:
             # Can't sample_from and empty list
-            indep_variables = st.one_of(independent_vars, st.sampled_from(other_vars))
+            valid_variables = st.one_of(independent_vars, st.sampled_from(other_vars))
         else:
-            indep_variables = independent_vars
-        expression_st = expressions_st(indep_variables, parameters)
+            valid_variables = independent_vars
+        expression_st = expressions_st(valid_variables, parameters)
         expr = draw(expression_st)
+        # Ban expressions that have complex infinity, which for some reason
+        # can't be printed to numpy. Maybe add more?
+        assume(not expr.has(sympy.zoo))
         model_dict[dependent_var] = expr
     model = Model(model_dict)
     return model
@@ -256,7 +268,7 @@ def _same_shape_vars(model):
 
     # Deal with interdependent vars
     for k, group in con_map.items():
-        for var in list(group):
+        for var in group:
             if var in con_map:
                 con_map[k] = group | con_map[var]
                 con_map[k] -= {var}
@@ -274,7 +286,7 @@ def _same_shape_vars(model):
 
 @st.composite
 def independent_data_shapes(draw, model, min_num_values=0, min_value=1,
-                            max_value=10, min_dim=1, max_dim=3):
+                            max_value=None, min_dim=1, max_dim=None):
     """
     Strategy that generates shapes for the independent variables of `model` such
     that variables that need to be broadcast compatible will be the same shape.
@@ -283,6 +295,8 @@ def independent_data_shapes(draw, model, min_num_values=0, min_value=1,
     ----------
     model: symfit.Model
         Shapes will be generated for this model's independent variables.
+    min_num_values: int
+        The minimum number of values the final arrays should contain.
     min_value: int
         The minimum size a dimension can be.
     max_value: int
@@ -296,13 +310,17 @@ def independent_data_shapes(draw, model, min_num_values=0, min_value=1,
     -------
     dict[str]: tuple[int, ...]
     """
-    indep_vars = model.independent_vars
     shape_groups = map(tuple, _same_shape_vars(model))
     # Insist on a minimal number of data points to prevent underspecified models
     # This is probably overly strict, since it enforces the same minimal shape
-    # on *all* components/variables. Instead it would probably be better to
+    # on *all* components/variables. Instead, it would probably be better to
     # do this per element of model.connectivity_mapping
-    shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value).filter(lambda shape: np.prod(shape) >= min_num_values)
+    shapes = npst.array_shapes(
+        min_dims=min_dim,
+        max_dims=max_dim,
+        min_side=min_value,
+        max_side=max_value
+    ).filter(lambda shape: np.prod(shape) >= min_num_values)
     grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups}))
     indep_shape = {}
     for vars, shape in grp_shape.items():
@@ -314,11 +332,22 @@ def independent_data_shapes(draw, model, min_num_values=0, min_value=1,
             indep_shape[var] = shape
         # Although technically possible, it can cause a bunch of numerical
         # issues, and it's meaning is debatable at best.
-        # indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, min_dim, max_dim, min_value, max_value)
+        # indep_shape[vars[-1]] = npst.broadcastable_shapes(shape,
+        #     min_dims=min_dim,
+        #     max_dims=max_dim,
+        #     min_side=min_value,
+        #     max_side=max_value
+        # )
         indep_shape[vars[-1]] = shape
     return indep_shape
 
 
+def _count_unique_values(indep_vals):
+    return sum(
+        np.sum(np.unique(ar.ravel(), return_counts=True)[1]) for ar in indep_vals.values()
+    )
+
+
 @st.composite
 def indep_values(draw, model):
     """
@@ -334,20 +363,20 @@ def indep_values(draw, model):
         Dict of numpy arrays with data for every independent variable in
         `model`.
     """
-    indep_vars = model.independent_vars
-    dep_vars = model.dependent_vars
     params = model.params
 
-    min_num_values = len(params) + 1
+    # We need just a bit more data than parameters so that there's a best answer
+    min_num_values = len(params) + 3
     indep_shape = draw(independent_data_shapes(model, min_num_values))
 
     indep_vals = {}
     for var, shape in indep_shape.items():
-        data = npst.arrays(float, shape, elements=NUMBERS, unique=True).map(np.sort)
+        data = npst.arrays(float, shape, elements=NUMBERS, unique=False).map(np.sort)
         indep_vals[str(var)] = data
 
-    data = draw(st.fixed_dictionaries(indep_vals))
-
+    # Data points don't have to be unique, but there should be enough
+    # independent ones to overspecify the fit.
+    data = draw(st.fixed_dictionaries(indep_vals).filter(lambda d: _count_unique_values(d) >= min_num_values))
     return data
 
 
@@ -355,7 +384,7 @@ def indep_values(draw, model):
 def param_values(draw, model):
     """
     Strategy for generating values for the parameters of `model`. Will also set
-    the values of the parameters.
+    the values and bounds of the parameters.
 
     Parameters
     ----------
@@ -373,20 +402,15 @@ def param_values(draw, model):
             st.lists(
                 st.one_of(
                     st.none(),
-                    st.floats(0, 2*abs(param.value), allow_infinity=False, allow_nan=False)
+                    st.floats(1, 1+2*abs(param.value), allow_infinity=False, allow_nan=False)
                 ),
                 min_size=2,
                 max_size=2
             )
         )
-        if lower is not None:
-            param.min = param.value - lower
-        else:
-            param.min = None
-        if upper is not None:
-            param.max = param.value + upper
-        else:
-            param.max = None
+        param.min = param.value - lower if lower is not None else None
+        param.max = param.value + upper if upper is not None else None
+
     return data
 
 
@@ -413,11 +437,7 @@ def dep_values(draw, model, indep_data, param_vals):
         sigmas.
     """
     dep_data = model(**indep_data, **param_vals)._asdict()
-    # for var, data in dep_data.items():
-    #     if var in model.dependent_vars:
-    #         assume(np.all(((np.abs(data) >= MIN_VAL) | (data == 0)) &
-    #                       (np.abs(data) <= MAX_VAL)
-    #                       ))
+
     # You can't provide data for interdependent variables. #270
     dep_data = {
         var: data
@@ -437,7 +457,7 @@ def dep_values(draw, model, indep_data, param_vals):
 
 
 @st.composite
-def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_strategy, **kwargs):
+def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_strategy, max_jacobian=1e3, **kwargs):
     """
     Strategy that generates a model with associated data using the given
     dependent variables.
@@ -449,9 +469,12 @@ def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_str
     independent_vars: hypothesis.LazySearchStrategy[symfit.Variable]
         Strategy to use to generate independent variables for the model.
     parameters: hypothesis.LazySearchStrategy[symfit.Parameter]
-        Strategy to use to generate independent variables for the model.
+        Strategy to use to generate parameters for the model.
     expr_strategy: hypothesis.LazySearchStrategy[sympy.Expr]
         Strategy to use to generate expressions for the model.
+    max_jacobian: float or None
+        If not None, the maximum values allowed for the Jacobian to avoid
+        numerical instabilities
 
     Returns
     -------
@@ -464,17 +487,31 @@ def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_str
     dict[str, np.array]
         Data for the dependent variables, as well as sigmas.
     """
+    def _is_valid_data(data):
+        indep_data, param_data = data
+        try:
+            dep_data = model(**indep_data, **param_data)
+            jac = model.eval_jacobian(**indep_data, **param_data)
+        except ZeroDivisionError:
+            # Can happen with, for example: a**(-1.0) with a=0
+            return False
+
+        if max_jacobian is not None:
+            low_jac = all(np.all(np.abs(component) <= max_jacobian) for component in jac)
+        else:
+            low_jac = True
+
+        # Let's make sure all dependant data is finite and real.
+        finite_data = all(data is None or (np.all(np.isfinite(data) & np.isreal(data))) for data in dep_data)
+
+        return low_jac and finite_data
+
     model = draw(models(dependent_vars, independent_vars, parameters, expr_strategy, **kwargs))
-    indep_data, param_data = draw(st.tuples(indep_values(model), 
-                                            param_values(model)),
+    indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)),
                                   label='independent data, parameters')
-    try:
-        dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data')
-    except ArithmeticError:
-        assume(False)  # Some model + data that causes numerical issues
-    for data in dep_data.values():
-        # In addition, let's make sure all data is finite and real.
-        assume(data is None or (np.all(np.isfinite(data) & np.isreal(data))))
+    assume(_is_valid_data([indep_data, param_data]))
+
+    dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data')
     return model, param_data, indep_data, dep_data
 
 
diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py
index 9d6434b1..8ab8cb44 100644
--- a/tests/test_hypothesis.py
+++ b/tests/test_hypothesis.py
@@ -1,6 +1,6 @@
 from .hypothesis_helpers import model_with_data, expression_strategy, polynomial_strategy
 from hypothesis import strategies as st
-from hypothesis import given, note, assume, settings, HealthCheck, event
+from hypothesis import given, note, assume, settings, HealthCheck
 
 import numpy as np
 from pytest import approx
@@ -13,19 +13,19 @@
 INDEPENDENT_VARS = st.sampled_from(variables('x, y, z'))
 PARAMETERS = st.sampled_from(parameters('a, b, c'))
 
-def _cmp_result(reference, found, abs=None, rel=None):
+def _cmp_params(reference, found, abs=None, rel=None):
     reference = reference.copy()
     for k, v in reference.items():
         reference[k] = approx(v, abs=abs, rel=rel)
     assert found == reference
 
-    # assert reference.keys() == found.keys()
-    # ref_vals = []
-    # found_vals = []
-    # for k in reference:
-    #     ref_vals.append(reference[k])
-    #     found_vals.append(found[k])
-    # return found_vals == approx(ref_vals, abs=abs, rel=rel)
+
+def _cmp_model_vals(reference, found, abs=None, rel=None):
+    actual = {}
+    for k, v in found._asdict().items():
+        if str(k) in reference:
+            actual[str(k)] = approx(v, abs=abs, rel=rel)
+    assert reference == actual
 
 
 def expressions(vars, pars):
@@ -39,53 +39,67 @@ def expressions(vars, pars):
                        PARAMETERS,
                        expression_strategy))
 def test_simple(model_with_data):
+    """
+    Generate a model with data and parameters, and fit the exact data (without
+    any noise), with initial parameter values at the correct answer. Check that
+    the minimizer doesn't drift away from this correct answer.
+    Note. It is not possible to give approximate parameter values as starting
+    point, since there may be multiple minima (and hypothesis *will* find those
+    cases)
+    """
     model, param_vals, indep_vals, dep_vals = model_with_data
     note(str(model))
-    assume(model.params)  # Stacking error in model.eval_jacobian
+    note(str(model(**indep_vals, **param_vals)))
 
     fit = Fit(model, **indep_vals, **dep_vals)
     result = fit.execute()
     note(str(result))
-    note(str(model(**indep_vals, **result.params)))
     note(str(fit.minimizer))
-    _cmp_result(param_vals, result.params)
+    _cmp_params(param_vals, result.params)
 
 
 @settings(max_examples=50, deadline=None,
           suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much])
 @given(st.data())
 def test_linear_models(data):
-    # minimizer = data.draw(st.sampled_from([None, LBFGSB, TrustConstr, SLSQP]), label='minimizer')
-    minimizer = None
+    """
+    Generate a linear model (specifically, a polynomial), and pick parameter
+    values between their min and max, and perform the fit. Note that there
+    is still no noise on the data.
+    """
+    minimizer = data.draw(st.sampled_from([None, LBFGSB, SLSQP]), label='minimizer')
+    # minimizer = None
     model, param_vals, indep_vals, dep_vals = data.draw(
         model_with_data(DEPENDENT_VARS,
                         INDEPENDENT_VARS,
                         PARAMETERS,
                         polynomial_strategy,
-                        allow_interdependent=False)
+                        allow_interdependent=False),  # If true, no longer linear
+        label="Model"
     )
+    note(str(minimizer))
     note(str(model))
-    note(param_vals)
+    for param in model.params:
+        note("{}: {} {} {}".format(param, param.min, param.value, param.max))
     note(indep_vals)
     note(dep_vals)
 
-    assume(model.params)  # Stacking error in model.eval_jacobian
     init_vals = param_vals.copy()
     for param in model.params:
-        # param.value = data.draw(st.floats(param.min, param.max, allow_nan=False, allow_infinity=False))
-        param.value = data.draw(st.floats(-1e6, 1e6, allow_nan=False, allow_infinity=False))
+        param.value = data.draw(st.floats(
+            -1e3, 1e3,
+            allow_nan=False,
+            allow_infinity=False), label="Parameter value {}".format(param))
         init_vals[str(param)] = param.value
         note((param.name, param.min, param.value, param.max))
-        # assert param.min is None or param.min <= param_vals[str(param)]
-        # assert param.max is None or param_vals[str(param)] <= param.max
 
     fit = Fit(model, **indep_vals, **dep_vals, minimizer=minimizer)
     obj_jac = fit.objective.eval_jacobian(**init_vals)
+    # If the jacobian is too large you get numerical issues (in particular
+    # SLSQP struggles?).
+    assume(np.all(np.abs(obj_jac) < 1e5))
     note(str(obj_jac))
     note(str(fit.objective.eval_hessian(**init_vals)))
-    # Exclude some edge cases that won't do anything due to numerical precision
-    # This also catches cases like y=a*x with y=0 and x=0
-    assume(np.any(np.abs(obj_jac) > 1e-3))
 
     result = fit.execute()
     # "Minimization stopped due to precision loss" and variants
@@ -97,9 +111,6 @@ def test_linear_models(data):
     note(str(fit.objective.eval_jacobian(**result.params)))
     note(str(fit.objective.eval_hessian(**result.params)))
     note(str(fit.minimizer))
-    # If the R2 isn't one, check whether the parameters are ~equal.
-    if result.r_squared != approx(1, abs=5e-3):
-        event('R squared != 1')
-        _cmp_result(param_vals, result.params, rel=1e-3, abs=1e-2)
-    else:
-        event('R squared == 1')
\ No newline at end of file
+
+    tmp = {v: dep_vals[str(v)] for v in dep_vals if not v.startswith('sigma_')}
+    _cmp_model_vals(tmp, model(**result.params, **indep_vals), rel=1e-3, abs=1e-2)

From 8dc913095e978395222045f5e52091baad2c0498 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 21 Oct 2022 15:14:18 +0200
Subject: [PATCH 08/11] Update plumbing

---
 .coveragerc                 | 2 ++
 .github/workflows/test.yaml | 2 +-
 .gitignore                  | 8 ++++++++
 requirements_test.txt       | 4 +++-
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/.coveragerc b/.coveragerc
index 40c40f0a..3ca69e1b 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,4 +1,6 @@
 [run]
+source = symfit
 branch = True
+concurrency = multiprocessing
 omit = 
     tests/*
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index be462b0c..deb50a28 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -25,4 +25,4 @@ jobs:
         pip install -e .
     - name: Test with coverage
       run:
-        pytest
\ No newline at end of file
+        pytest --cov=symfit --hypothesis-show-statistics
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 62365349..fb97a378 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,11 @@ build
 docs/_build
 docs/_doctrees
 dist
+
+.hypothesis/
+
+.eggs/
+
+symfit.egg-info/
+
+.coverage
diff --git a/requirements_test.txt b/requirements_test.txt
index 103175ea..2cfea79f 100644
--- a/requirements_test.txt
+++ b/requirements_test.txt
@@ -1,4 +1,6 @@
 pytest
+pytest-cov
+coverage
 coveralls
-hypothesis
+hypothesis[numpy]
 

From f7558604004a51ec87c91056f54ed853971fe86d Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 21 Oct 2022 15:15:00 +0200
Subject: [PATCH 09/11] Allow ScipyMinimizer to deal with models without free
 parameters

---
 symfit/core/minimizers.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/symfit/core/minimizers.py b/symfit/core/minimizers.py
index 7a57edd0..e6b15ee7 100644
--- a/symfit/core/minimizers.py
+++ b/symfit/core/minimizers.py
@@ -347,6 +347,17 @@ def execute(self, bounds=None, jacobian=None, hessian=None, constraints=None, **
             :func:`scipy.optimize.minimize`. Note that your `method` will
             usually be filled by a specific subclass.
         """
+        if not self.params:
+            return FitResults(
+                model=DummyModel(params=[]),
+                popt=[],
+                covariance_matrix=np.array([]),
+                objective=self.objective,
+                minimizer=self,
+                message="No free parameters",
+                fun=self.objective(),
+                niter=0,
+            )
         ans = minimize(
             self.objective,
             self.initial_guesses,

From 26f3f8cab2af486801ccd627e5c2816e76cf1165 Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 21 Oct 2022 15:15:32 +0200
Subject: [PATCH 10/11] Fix stacking of Jac and Hess with empty models

---
 symfit/core/models.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/symfit/core/models.py b/symfit/core/models.py
index da1e72f3..5009444e 100644
--- a/symfit/core/models.py
+++ b/symfit/core/models.py
@@ -913,7 +913,10 @@ def eval_jacobian(self, *args, **kwargs):
         # the parameter dimension. We do not include the component direction in
         # this, because the components can have independent shapes.
         for idx, comp in enumerate(jac):
-            jac[idx] = np.stack(np.broadcast_arrays(*comp))
+            if comp:
+                jac[idx] = np.stack(np.broadcast_arrays(*comp))
+            else:
+                jac[idx] = np.array([])
 
         return ModelOutput(self.keys(), jac)
 
@@ -957,7 +960,10 @@ def eval_hessian(self, *args, **kwargs):
         # the parameter dimension. We do not include the component direction in
         # this, because the components can have independent shapes.
         for idx, comp in enumerate(hess):
-            hess[idx] = np.stack(np.broadcast_arrays(*comp))
+            if comp:
+                hess[idx] = np.stack(np.broadcast_arrays(*comp))
+            else:
+                hess[idx] = np.array([])
 
         return ModelOutput(self.keys(), hess)
 

From 3dc219e5a810cc54614cb3f8a54f3aea9fe5e20e Mon Sep 17 00:00:00 2001
From: Peter Kroon <pckroon@users.noreply.github.com>
Date: Fri, 21 Oct 2022 15:25:35 +0200
Subject: [PATCH 11/11] Re-add TrustConstr

---
 tests/test_hypothesis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py
index 8ab8cb44..9d81b7fc 100644
--- a/tests/test_hypothesis.py
+++ b/tests/test_hypothesis.py
@@ -67,7 +67,7 @@ def test_linear_models(data):
     values between their min and max, and perform the fit. Note that there
     is still no noise on the data.
     """
-    minimizer = data.draw(st.sampled_from([None, LBFGSB, SLSQP]), label='minimizer')
+    minimizer = data.draw(st.sampled_from([None, LBFGSB, SLSQP, TrustConstr]), label='minimizer')
     # minimizer = None
     model, param_vals, indep_vals, dep_vals = data.draw(
         model_with_data(DEPENDENT_VARS,