From 57dacddb91347246ac56a956e66bd342b76a4249 Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Tue, 2 Apr 2019 17:13:17 +0200 Subject: [PATCH 01/11] Create first hypothesis test and fix first bug --- symfit/core/fit.py | 1 + symfit/core/models.py | 10 +- tests/hypothesis_helpers.py | 295 ++++++++++++++++++++++++++++++++++++ tests/test_hypothesis.py | 33 ++++ 4 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 tests/hypothesis_helpers.py create mode 100644 tests/test_hypothesis.py diff --git a/symfit/core/fit.py b/symfit/core/fit.py index 3975d9a9..1a2ef7ab 100644 --- a/symfit/core/fit.py +++ b/symfit/core/fit.py @@ -26,6 +26,7 @@ else: import funcsigs as inspect_sig + class TakesData(object): """ An base class for everything that takes data. Most importantly, it takes care diff --git a/symfit/core/models.py b/symfit/core/models.py index 01a8109f..da1e72f3 100644 --- a/symfit/core/models.py +++ b/symfit/core/models.py @@ -328,8 +328,14 @@ def _init_from_dict(self, model_dict): # Everything at the bottom of the toposort is independent, at the top # dependent, and the rest interdependent. ordered = list(toposort(self.connectivity_mapping)) - independent = sorted(ordered.pop(0), key=sort_func) - self.dependent_vars = sorted(ordered.pop(-1), key=sort_func) + if ordered: + independent = sorted(ordered.pop(0), key=sort_func) + else: + independent = [] + if ordered: + self.dependent_vars = sorted(ordered.pop(-1), key=sort_func) + else: + self.dependent_vars = [] self.interdependent_vars = sorted( [item for items in ordered for item in items], key=sort_func diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py new file mode 100644 index 00000000..ec732012 --- /dev/null +++ b/tests/hypothesis_helpers.py @@ -0,0 +1,295 @@ +from functools import reduce +import operator + +import numpy as np + +from symfit import Model, parameters, variables, Variable +from symfit.core.support import key2str +import sympy + +from hypothesis import strategies as st +from hypothesis import assume, note + + +NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e6, max_value=1e6) + + +@st.composite +def exprs(draw, symbols, operations, n_steps, allow_number=False): + """ + A strategy to generate arbitrary :mod:`Sympy` expressions, based on + strategies for generating `symbols` and `operations`. + + Parameters + ---------- + symbols: hypothesis.LazySearchStrategy[sympy.Symbol] + A strategy that generates sympy symbols. Should probably include + constants such as 1 and -1. + operations: hypothesis.LazySearchStrategy[tuple[int, sympy.Operation]] + A strategy that generates operations with the associated number of + arguments. + n_steps: int + The number of steps to take. Larger numbers result in more complex + expressions. + allow_number: bool + Whether the produced expression can be just a number. + + Returns + ------- + sympy.Expression + """ + expr = draw(symbols) + for _ in range(n_steps): + nargs, op = draw(operations) + args, expr_pos = draw(st.tuples(st.lists(symbols, + min_size=nargs-1, + max_size=nargs-1), + st.integers(0, nargs-1))) + args.insert(expr_pos, expr) + expr = op(*args) + assume(allow_number or not expr.is_number) + return expr + + +# TODO: ODEModels +@st.composite +def models(draw, dependent_vars, symbols, steps=5): + """ + A strategy for generating :class:`symfit.Model` s. + + Parameters + ---------- + dependent_vars: hypothesis.LazySearchStrategy[list[symfit.Variable]] + A strategy that generates lists of Variables. These variables will be + used as dependent variables. + symbols: hypothesis.LazySearchStrategy[symfit.Parameter or symfit.Variable] + A strategy that generates either a parameter or independent variable. + steps: int + The number of operations per expression. Larger numbers result is more + complex models. + + Returns + ------- + symfit.Model + """ + constants = st.sampled_from([0, 1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E]) + symbols = st.one_of(constants, symbols) + ops = st.sampled_from([(2, sympy.Add), + (2, sympy.Mul), + # Can cause imaginary numbers (e.g. (-4)**1.2) + #(2, sympy.Pow), + (1, sympy.sin), + # (2, sympy.log), + ]) + expressions = exprs(symbols, ops, steps, False) + model_dict = {} + for dependent_var in draw(dependent_vars): + expr = draw(expressions) + model_dict[dependent_var] = expr + model = Model(model_dict) + return model + + +def _same_shape_vars(model): + """ + Helper function that returns a set of frozensets of all independent + variables in model that should have broadcast compatible shapes. + """ + indep_vars = model.independent_vars + con_map = model.connectivity_mapping + shape_groups = {var: {var} for var in indep_vars} + for group in con_map.values(): + comb = {var for var in group if var in indep_vars} + shape_group = set() + for var in comb: + shape_group.update(shape_groups[var]) + for var in shape_group: + shape_groups[var] = shape_group + shape_groups = set(frozenset(g) for g in shape_groups.values()) + return shape_groups + + +@st.composite +def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, max_dim=3): + """ + Strategy that generates shapes for the independent variables of `model` such + that variables that need to be broadcast compatible will be the same shape. + + Parameters + ---------- + model: symfit.Model + Shapes will be generated for this model's independent variables. + min_value: int + The minimum size a dimension can be. + max_value: int + The maximum size a dimension can be. + min_dim: int + The minimum number of dimensions the shape should have. + max_dim: int + The maximum number of dimensions the shape should have. + + Returns + ------- + dict[str]: tuple[int, ...] + """ + indep_vars = model.independent_vars + shape_groups = _same_shape_vars(model) + shapes = st.lists(st.integers(min_value, max_value), + min_size=min_dim, max_size=max_dim).map(tuple) + grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups})) + indep_shape = {} + for vars, shape in grp_shape.items(): + for var in vars: + indep_shape[var] = shape + return indep_shape + + +def numpy_arrays(shape, elements=NUMBERS, **kwargs): + """ + Basic strategy for generating numpy arrays of the specified `shape`, with + the specified `elements`. + + Parameters + ---------- + shape: tuple[int, ...] + The shape the array should get. + elements: hypothesis.LazySearchStrategy + The strategy which should be used to generate elements of the array. + kwargs: dict[str] + Additional keyword arguments to be passed to + :func:`hypothesis.strategies.lists` + + Returns + ------- + hypothesis.LazySearchStrategy[np.array] + """ + number_of_values = reduce(operator.mul, shape, 1) + return st.lists(elements, + min_size=number_of_values, + max_size=number_of_values, + **kwargs).map(lambda l: np.reshape(l, shape)) + + +@st.composite +def indep_values(draw, model): + """ + Strategy for generating independent data for a given `model`. + + Parameters + ---------- + model: symfit.Model + + Returns + ------- + dict[str, np.array] + Dict of numpy arrays with data for every independent variable in + `model`. + """ + indep_vars = model.independent_vars + dep_vars = model.dependent_vars + params = model.params + + indep_shape = draw(independent_data_shapes(model)) + + indep_vals = {} + for var, shape in indep_shape.items(): + data = numpy_arrays(shape, unique=True).map(np.sort) + indep_vals[str(var)] = data + + data = draw(st.fixed_dictionaries(indep_vals)) + + return data + + +@st.composite +def param_values(draw, model): + """ + Strategy for generating values for the parameters of `model`. Will also set + the values of the parameters. + + Parameters + ---------- + model: symfit.Model + + Returns + ------- + dict[str, float] + """ + param_vals = st.fixed_dictionaries({str(param): NUMBERS for param in model.params}) + data = draw(param_vals) + for param in model.params: + param.value = data[str(param)] + return data + + +@st.composite +def dep_values(draw, model, indep_data, param_vals): + """ + Strategy for generating dependent data for the given model, independent + data, and parameter values. + + Parameters + ---------- + model: symfit.Model + The model to be used. Will be called with the given data and parameter + values. + indep_data: dict[str, np.array] + Data for all the independent variables in `model`. + param_vals: dict[str, float] + Values for all the parameters in `model`. + + Returns + ------- + dict[str, np.array] + Data for the dependent variables of `model`, as well as associated + sigmas. + """ + try: + dep_data = model(**indep_data, **param_vals)._asdict() + except (OverflowError, ZeroDivisionError): + assume(False) # Some model + data that causes numerical issues + shapes = {var: data.shape for var, data in dep_data.items()} + sigmas = { + 'sigma_{}'.format(str(var)): st.one_of( + st.none(), + st.floats(allow_nan=False, allow_infinity=False, min_value=0, max_value=5), + numpy_arrays(vals.shape) + ) for var, vals in dep_data.items() + } + sigmas = draw(st.fixed_dictionaries(dict(**sigmas))) + return dict(**sigmas, **key2str(dep_data)) + + +@st.composite +def model_with_data(draw, dependent_vars, symbols, steps=5): + """ + Strategy that generates a model with associated data using the given + dependent variables. + + Parameters + ---------- + dependent_vars: hypothesis.LazySearchStrategy[list[symfit.Variable]] + Strategy to use to generate dependent variables for the model. + symbols: hypothesis.LazySearchStrategy[symfit.Variable or symfit.Parameter] + Strategy to use to generate independent variables and parameters for the + model + steps: int + The number of operations to perform when generating the expressions for + every dependent component of the model. More steps means complexer + expressions. + + Returns + ------- + symfit.Model + The generated model. + dict[str, float] + Values for the parameters. + dict[str, np.array] + Data for the independent variables. + dict[str, np.array] + Data for the dependent variables, as well as sigmas. + """ + model = draw(models(dependent_vars, symbols, steps=steps)) + indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters') + dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data') + return model, param_data, indep_data, dep_data diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py new file mode 100644 index 00000000..cb2fe1d5 --- /dev/null +++ b/tests/test_hypothesis.py @@ -0,0 +1,33 @@ +from .hypothesis_helpers import model_with_data +from hypothesis import strategies as st +from hypothesis import given, note, assume, settings + +import numpy as np + +from symfit import Fit, variables, parameters + + +DEPENDENT_VARS = st.lists(st.sampled_from(variables('Y1, Y2')), unique=True, min_size=1) +SYMBOLS = st.sampled_from(parameters('a, b, c') + variables('x, y, z')) + +def _cmp_result(reference, found): + assert reference.keys() == found.keys() + ref_vals = [] + found_vals = [] + for k in reference: + ref_vals.append(reference[k]) + found_vals.append(found[k]) + assert np.allclose(ref_vals, found_vals) + + +@settings(max_examples=500) +@given(model_with_data(DEPENDENT_VARS, SYMBOLS)) +def test_simple(model_with_data): + model, param_vals, indep_vals, dep_vals = model_with_data + note(str(model)) + assume(model.params) # Stacking error in model.eval_jacobian + + fit = Fit(model, **indep_vals, **dep_vals) + result = fit.execute() + note(str(result)) + _cmp_result(param_vals, result.params) From 9930acbc3c7a6718a54595d1f51703c8700d0888 Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Wed, 3 Apr 2019 11:05:19 +0200 Subject: [PATCH 02/11] Change exception/assume --- tests/hypothesis_helpers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py index ec732012..b31f105f 100644 --- a/tests/hypothesis_helpers.py +++ b/tests/hypothesis_helpers.py @@ -244,10 +244,7 @@ def dep_values(draw, model, indep_data, param_vals): Data for the dependent variables of `model`, as well as associated sigmas. """ - try: - dep_data = model(**indep_data, **param_vals)._asdict() - except (OverflowError, ZeroDivisionError): - assume(False) # Some model + data that causes numerical issues + dep_data = model(**indep_data, **param_vals)._asdict() shapes = {var: data.shape for var, data in dep_data.items()} sigmas = { 'sigma_{}'.format(str(var)): st.one_of( @@ -291,5 +288,8 @@ def model_with_data(draw, dependent_vars, symbols, steps=5): """ model = draw(models(dependent_vars, symbols, steps=steps)) indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters') - dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data') + try: + dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data') + except ArithmeticError: + assume(False) # Some model + data that causes numerical issues return model, param_data, indep_data, dep_data From b46a407177be17d36d2ce815e5b5617c90270c30 Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Wed, 3 Apr 2019 11:19:02 +0200 Subject: [PATCH 03/11] Add hypothesis to Github Actions --- .github/workflows/test.yaml | 2 +- .github/workflows/test_and_coveralls.yml | 4 +--- requirements_test.txt | 4 ++++ 3 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 requirements_test.txt diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 69a0098c..be462b0c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -19,8 +19,8 @@ jobs: run: | python -m pip install --upgrade pip pip install wheel - pip install pytest pip install -r requirements.txt + pip install -r requirements_test.txt pip install matplotlib pip install -e . - name: Test with coverage diff --git a/.github/workflows/test_and_coveralls.yml b/.github/workflows/test_and_coveralls.yml index ec2f4690..c3d503ce 100644 --- a/.github/workflows/test_and_coveralls.yml +++ b/.github/workflows/test_and_coveralls.yml @@ -16,9 +16,8 @@ jobs: run: | python -m pip install --upgrade pip pip install wheel - pip install coveralls - pip install pytest pip install -r requirements.txt + pip install -r requirements_test.txt pip install matplotlib pip install -e . - name: Test with coverage @@ -28,5 +27,4 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - pip install --upgrade coveralls coveralls --service=github \ No newline at end of file diff --git a/requirements_test.txt b/requirements_test.txt new file mode 100644 index 00000000..103175ea --- /dev/null +++ b/requirements_test.txt @@ -0,0 +1,4 @@ +pytest +coveralls +hypothesis + From 87be0f268d63d4c5aae3a7a77101bd0900df82ae Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Tue, 16 Apr 2019 11:46:21 +0200 Subject: [PATCH 04/11] Better expression generation --- tests/hypothesis_helpers.py | 79 ++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 31 deletions(-) diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py index b31f105f..26b7d0a1 100644 --- a/tests/hypothesis_helpers.py +++ b/tests/hypothesis_helpers.py @@ -8,14 +8,19 @@ import sympy from hypothesis import strategies as st +from hypothesis.extra import numpy as npst from hypothesis import assume, note NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e6, max_value=1e6) +def _is_sorted(lst, cmp=lambda x, y: x <= y): + return all(cmp(x, y) for x, y in zip(lst[:-1], lst[1:])) + + @st.composite -def exprs(draw, symbols, operations, n_steps, allow_number=False): +def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False): """ A strategy to generate arbitrary :mod:`Sympy` expressions, based on strategies for generating `symbols` and `operations`. @@ -25,12 +30,14 @@ def exprs(draw, symbols, operations, n_steps, allow_number=False): symbols: hypothesis.LazySearchStrategy[sympy.Symbol] A strategy that generates sympy symbols. Should probably include constants such as 1 and -1. - operations: hypothesis.LazySearchStrategy[tuple[int, sympy.Operation]] + unary_ops: hypothesis.LazySearchStrategy[sympy.Operation] + A strategy that generates sympy operations that take a single argument. + operations: hypothesis.LazySearchStrategy[tuple[int, sympy.Operation, bool]] A strategy that generates operations with the associated number of - arguments. - n_steps: int - The number of steps to take. Larger numbers result in more complex - expressions. + arguments and whether they commute. + n_leaves: int + The number of symbols to initially generate. Larger numbers result in + more complex expressions. allow_number: bool Whether the produced expression can be just a number. @@ -38,22 +45,34 @@ def exprs(draw, symbols, operations, n_steps, allow_number=False): ------- sympy.Expression """ - expr = draw(symbols) - for _ in range(n_steps): + leaves = draw(st.lists(symbols, min_size=n_leaves, max_size=n_leaves)) + + unary_ops = st.one_of(st.none(), unary_ops) + first_ops = draw(st.lists(unary_ops, min_size=len(leaves), max_size=len(leaves))) + leaves = [op(leave) if op else leave for op, leave in zip(first_ops, leaves)] + + while len(leaves) != 1: nargs, op = draw(operations) - args, expr_pos = draw(st.tuples(st.lists(symbols, - min_size=nargs-1, - max_size=nargs-1), - st.integers(0, nargs-1))) - args.insert(expr_pos, expr) - expr = op(*args) + idxs = draw(st.lists(st.integers(min_value=0, max_value=len(leaves) - 1), + min_size=nargs, + max_size=nargs, + unique=True)) + args = [leaves[idx] for idx in idxs] + for idx in sorted(idxs, reverse=True): + del leaves[idx] + new_node = op(*args) + op = draw(unary_ops) + if op: + new_node = op(new_node) + leaves.append(new_node) + expr = leaves[0] assume(allow_number or not expr.is_number) return expr # TODO: ODEModels @st.composite -def models(draw, dependent_vars, symbols, steps=5): +def models(draw, dependent_vars, symbols, leaves=5): """ A strategy for generating :class:`symfit.Model` s. @@ -72,16 +91,19 @@ def models(draw, dependent_vars, symbols, steps=5): ------- symfit.Model """ - constants = st.sampled_from([0, 1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E]) + constants = st.sampled_from([1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E]) symbols = st.one_of(constants, symbols) ops = st.sampled_from([(2, sympy.Add), (2, sympy.Mul), # Can cause imaginary numbers (e.g. (-4)**1.2) - #(2, sympy.Pow), - (1, sympy.sin), + # (2, sympy.Pow), # (2, sympy.log), ]) - expressions = exprs(symbols, ops, steps, False) + unary_ops = st.sampled_from([sympy.sin, + sympy.exp, + lambda e: -e, + lambda e: 1/e]) + expressions = exprs(symbols, unary_ops, ops, leaves, False) model_dict = {} for dependent_var in draw(dependent_vars): expr = draw(expressions) @@ -134,8 +156,7 @@ def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, m """ indep_vars = model.independent_vars shape_groups = _same_shape_vars(model) - shapes = st.lists(st.integers(min_value, max_value), - min_size=min_dim, max_size=max_dim).map(tuple) + shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value) grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups})) indep_shape = {} for vars, shape in grp_shape.items(): @@ -157,18 +178,14 @@ def numpy_arrays(shape, elements=NUMBERS, **kwargs): The strategy which should be used to generate elements of the array. kwargs: dict[str] Additional keyword arguments to be passed to - :func:`hypothesis.strategies.lists` + :func:`hypothesis.extra.numpy.arrays` Returns ------- hypothesis.LazySearchStrategy[np.array] """ - number_of_values = reduce(operator.mul, shape, 1) - return st.lists(elements, - min_size=number_of_values, - max_size=number_of_values, - **kwargs).map(lambda l: np.reshape(l, shape)) - + return npst.arrays(float, shape, elements=NUMBERS, **kwargs) + @st.composite def indep_values(draw, model): @@ -258,7 +275,7 @@ def dep_values(draw, model, indep_data, param_vals): @st.composite -def model_with_data(draw, dependent_vars, symbols, steps=5): +def model_with_data(draw, dependent_vars, symbols, leaves=5): """ Strategy that generates a model with associated data using the given dependent variables. @@ -270,7 +287,7 @@ def model_with_data(draw, dependent_vars, symbols, steps=5): symbols: hypothesis.LazySearchStrategy[symfit.Variable or symfit.Parameter] Strategy to use to generate independent variables and parameters for the model - steps: int + leaves: int The number of operations to perform when generating the expressions for every dependent component of the model. More steps means complexer expressions. @@ -286,7 +303,7 @@ def model_with_data(draw, dependent_vars, symbols, steps=5): dict[str, np.array] Data for the dependent variables, as well as sigmas. """ - model = draw(models(dependent_vars, symbols, steps=steps)) + model = draw(models(dependent_vars, symbols, leaves=leaves)) indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters') try: dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data') From b96bc3881b118ddcb1e0e35b39ca91cadd5009cd Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Thu, 12 Sep 2019 13:56:35 +0200 Subject: [PATCH 05/11] Add comments, add edgecases --- .coveragerc | 4 ++ tests/hypothesis_helpers.py | 105 +++++++++++++++++++++++++++--------- tests/test_hypothesis.py | 9 ++-- 3 files changed, 90 insertions(+), 28 deletions(-) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..40c40f0a --- /dev/null +++ b/.coveragerc @@ -0,0 +1,4 @@ +[run] +branch = True +omit = + tests/* diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py index 26b7d0a1..cf34039a 100644 --- a/tests/hypothesis_helpers.py +++ b/tests/hypothesis_helpers.py @@ -12,13 +12,18 @@ from hypothesis import assume, note -NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e6, max_value=1e6) +NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e3, max_value=1e3) def _is_sorted(lst, cmp=lambda x, y: x <= y): return all(cmp(x, y) for x, y in zip(lst[:-1], lst[1:])) +def _is_number(obj): + obj = sympy.sympify(obj) + return obj.is_number + + @st.composite def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False): """ @@ -45,21 +50,29 @@ def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False): ------- sympy.Expression """ - leaves = draw(st.lists(symbols, min_size=n_leaves, max_size=n_leaves)) + symbol_st = st.lists(symbols, min_size=n_leaves, max_size=n_leaves) + # Assert we draw at least one not numerical symbol, unless we allow numbers + leaves = draw(symbol_st.filter( + lambda l: allow_number or any(not _is_number(val) for val in l)) + ) + # TODO: allow chaining of unary ops unary_ops = st.one_of(st.none(), unary_ops) first_ops = draw(st.lists(unary_ops, min_size=len(leaves), max_size=len(leaves))) - leaves = [op(leave) if op else leave for op, leave in zip(first_ops, leaves)] + leaves = [op(leaf) if op else leaf for op, leaf in zip(first_ops, leaves)] while len(leaves) != 1: - nargs, op = draw(operations) - idxs = draw(st.lists(st.integers(min_value=0, max_value=len(leaves) - 1), - min_size=nargs, - max_size=nargs, - unique=True)) + nargs, op, comm = draw(operations) + # Draw indices for the leaves that get combined by the operation. + idxs_st = st.lists(st.integers(min_value=0, max_value=len(leaves) - 1), + min_size=nargs, + max_size=nargs, + unique=True) + # ... and if the operation commutes, make sure the indices are sorted. + idxs = draw(idxs_st.filter(lambda l: not comm or _is_sorted(l))) args = [leaves[idx] for idx in idxs] - for idx in sorted(idxs, reverse=True): - del leaves[idx] + # Remove the drawn symbols from `leaves` + leaves = [leaf for idx, leaf in enumerate(leaves) if idx not in idxs] new_node = op(*args) op = draw(unary_ops) if op: @@ -93,19 +106,29 @@ def models(draw, dependent_vars, symbols, leaves=5): """ constants = st.sampled_from([1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E]) symbols = st.one_of(constants, symbols) - ops = st.sampled_from([(2, sympy.Add), - (2, sympy.Mul), + ops = st.sampled_from([(2, sympy.Add, True), + (2, sympy.Mul, True), # Can cause imaginary numbers (e.g. (-4)**1.2) - # (2, sympy.Pow), - # (2, sympy.log), + # (2, sympy.Pow, False), + # (2, sympy.log, False), ]) unary_ops = st.sampled_from([sympy.sin, sympy.exp, lambda e: -e, lambda e: 1/e]) - expressions = exprs(symbols, unary_ops, ops, leaves, False) model_dict = {} - for dependent_var in draw(dependent_vars): + dep_vars = draw(dependent_vars) + for idx, dependent_var in enumerate(dep_vars): + # Dependent vars can depend on other dependent vars: {y1: k, y2: y1+1}. + # But this can cause cyclical dependency ({y1: y2, y2: y1}), so limit to + # idx < jdx. Note that this also prevents {y1: y1}. + other_vars = [dep_var for jdx, dep_var in enumerate(dep_vars) if idx < jdx] + if other_vars: + # Can't sample_from and empty list + symbs = st.one_of(symbols, st.sampled_from(other_vars)) + else: + symbs = symbols + expressions = exprs(symbs, unary_ops, ops, leaves, False) expr = draw(expressions) model_dict[dependent_var] = expr model = Model(model_dict) @@ -114,12 +137,20 @@ def models(draw, dependent_vars, symbols, leaves=5): def _same_shape_vars(model): """ - Helper function that returns a set of frozensets of all independent - variables in model that should have broadcast compatible shapes. + Helper function that returns a set of frozensets of all variables in model + that should have broadcast compatible shapes. """ indep_vars = model.independent_vars - con_map = model.connectivity_mapping + con_map = model.connectivity_mapping.copy() shape_groups = {var: {var} for var in indep_vars} + + # Deal with interdependent vars + for k, group in con_map.items(): + for var in list(group): + if var in con_map: + con_map[k] = group | con_map[var] + con_map[k] -= {var} + for group in con_map.values(): comb = {var for var in group if var in indep_vars} shape_group = set() @@ -155,13 +186,18 @@ def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, m dict[str]: tuple[int, ...] """ indep_vars = model.independent_vars - shape_groups = _same_shape_vars(model) + shape_groups = map(tuple, _same_shape_vars(model)) shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value) grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups})) indep_shape = {} for vars, shape in grp_shape.items(): - for var in vars: + # Not all shapes have to be the same size, but they should be + # broadcastable. Since it's a pain to find a shape A that is compatible + # with both B, C and D we'll just say that the last shape can be + # different. + for var in vars[:-1]: indep_shape[var] = shape + indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, min_dim, max_dim, min_value, max_value) return indep_shape @@ -262,13 +298,19 @@ def dep_values(draw, model, indep_data, param_vals): sigmas. """ dep_data = model(**indep_data, **param_vals)._asdict() - shapes = {var: data.shape for var, data in dep_data.items()} + # You can't provide data for interdependent variables. + dep_data = { + var: data + for var, data in dep_data.items() + if var in model.dependent_vars + } + shapes = {var: data.shape for var, data in dep_data.items() if data is not None} sigmas = { 'sigma_{}'.format(str(var)): st.one_of( st.none(), st.floats(allow_nan=False, allow_infinity=False, min_value=0, max_value=5), - numpy_arrays(vals.shape) - ) for var, vals in dep_data.items() + numpy_arrays(shape) + ) for var, shape in shapes.items() } sigmas = draw(st.fixed_dictionaries(dict(**sigmas))) return dict(**sigmas, **key2str(dep_data)) @@ -304,9 +346,22 @@ def model_with_data(draw, dependent_vars, symbols, leaves=5): Data for the dependent variables, as well as sigmas. """ model = draw(models(dependent_vars, symbols, leaves=leaves)) - indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters') + indep_data, param_data = draw(st.tuples(indep_values(model), + param_values(model)), + label='independent data, parameters') try: dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data') except ArithmeticError: assume(False) # Some model + data that causes numerical issues + for data in dep_data.values(): + # In addition, let's make sure all data is finite and real. + assume(data is None or (np.all(np.isfinite(data)) and np.all(np.isreal(data)))) return model, param_data, indep_data, dep_data + + +if __name__ == '__main__': + DEPENDENT_VARS = st.lists(st.sampled_from(variables('Y1, Y2')), unique=True, min_size=1) + SYMBOLS = st.sampled_from(parameters('a, b, c') + variables('x, y, z')) + + for _ in range(5): + print(list(map(str, model_with_data(DEPENDENT_VARS, SYMBOLS).example()))) diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py index cb2fe1d5..baefb5bc 100644 --- a/tests/test_hypothesis.py +++ b/tests/test_hypothesis.py @@ -1,8 +1,9 @@ from .hypothesis_helpers import model_with_data from hypothesis import strategies as st -from hypothesis import given, note, assume, settings +from hypothesis import given, note, assume, settings, HealthCheck import numpy as np +from pytest import approx from symfit import Fit, variables, parameters @@ -17,10 +18,10 @@ def _cmp_result(reference, found): for k in reference: ref_vals.append(reference[k]) found_vals.append(found[k]) - assert np.allclose(ref_vals, found_vals) + assert found_vals == approx(ref_vals) -@settings(max_examples=500) +@settings(max_examples=100, deadline=None, suppress_health_check=[HealthCheck.too_slow]) @given(model_with_data(DEPENDENT_VARS, SYMBOLS)) def test_simple(model_with_data): model, param_vals, indep_vals, dep_vals = model_with_data @@ -30,4 +31,6 @@ def test_simple(model_with_data): fit = Fit(model, **indep_vals, **dep_vals) result = fit.execute() note(str(result)) + note(str(model(**indep_vals, **result.params))) + note(str(fit.minimizer)) _cmp_result(param_vals, result.params) From 860e89e60a870fe2228de3662a0060f81a8fec50 Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Fri, 20 Sep 2019 16:28:38 +0200 Subject: [PATCH 06/11] Add polynomial models, finish simple answer=guess test --- tests/hypothesis_helpers.py | 290 +++++++++++++++++++++++++----------- tests/test_hypothesis.py | 97 ++++++++++-- 2 files changed, 289 insertions(+), 98 deletions(-) diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py index cf34039a..e982ec75 100644 --- a/tests/hypothesis_helpers.py +++ b/tests/hypothesis_helpers.py @@ -1,9 +1,6 @@ -from functools import reduce -import operator - import numpy as np -from symfit import Model, parameters, variables, Variable +from symfit import Model, variables from symfit.core.support import key2str import sympy @@ -12,7 +9,13 @@ from hypothesis import assume, note -NUMBERS = st.floats(allow_nan=False, allow_infinity=False, min_value=-1e3, max_value=1e3) +MAX_VAL = 1e1 +MIN_VAL = 1e-1 +FLOATS = st.floats(allow_nan=False, allow_infinity=False, + min_value=MIN_VAL, max_value=MAX_VAL) +# Some hoops to jump through to limit the floats to MAX_VAL > abs(f) > MIN_VAL, +# but still allow f=0 +NUMBERS = st.one_of(FLOATS.map(lambda f: -f), st.just(0), FLOATS) def _is_sorted(lst, cmp=lambda x, y: x <= y): @@ -25,16 +28,22 @@ def _is_number(obj): @st.composite -def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False): +def expressions(draw, variables, parameters, constants, unary_ops, operations, + n_leaves, allow_number=False): """ A strategy to generate arbitrary :mod:`Sympy` expressions, based on strategies for generating `symbols` and `operations`. Parameters ---------- - symbols: hypothesis.LazySearchStrategy[sympy.Symbol] - A strategy that generates sympy symbols. Should probably include - constants such as 1 and -1. + variables: hypothesis.LazySearchStrategy[symfit.Variable] + A strategy that generates Variables. These variables will be + used as independent variables. + parameters: hypothesis.LazySearchStrategy[symfit.Parameter] + A strategy that generates Parameters. + constants: hypothesis.LazySearchStrategy[number] + A strategy that generates numbers. Should probably contain at least 1 + and -1. unary_ops: hypothesis.LazySearchStrategy[sympy.Operation] A strategy that generates sympy operations that take a single argument. operations: hypothesis.LazySearchStrategy[tuple[int, sympy.Operation, bool]] @@ -50,17 +59,24 @@ def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False): ------- sympy.Expression """ - symbol_st = st.lists(symbols, min_size=n_leaves, max_size=n_leaves) + if not allow_number: + first = st.one_of(variables, parameters) + rest = st.lists(st.one_of(constants, variables, parameters), + min_size=n_leaves - 1, max_size=n_leaves - 1) + symbol_st = st.tuples(first, rest).map(lambda t: [t[0], *t[1]]) + else: + symbol_st = st.lists(st.one_of(constants, variables, parameters), + min_size=n_leaves, max_size=n_leaves) # Assert we draw at least one not numerical symbol, unless we allow numbers - leaves = draw(symbol_st.filter( - lambda l: allow_number or any(not _is_number(val) for val in l)) - ) + leaves = draw(symbol_st) # TODO: allow chaining of unary ops unary_ops = st.one_of(st.none(), unary_ops) - first_ops = draw(st.lists(unary_ops, min_size=len(leaves), max_size=len(leaves))) + first_ops = draw(st.lists(unary_ops, + min_size=len(leaves), + max_size=len(leaves))) leaves = [op(leaf) if op else leaf for op, leaf in zip(first_ops, leaves)] - + while len(leaves) != 1: nargs, op, comm = draw(operations) # Draw indices for the leaves that get combined by the operation. @@ -83,53 +99,147 @@ def exprs(draw, symbols, unary_ops, operations, n_leaves, allow_number=False): return expr -# TODO: ODEModels -@st.composite -def models(draw, dependent_vars, symbols, leaves=5): +def expression_strategy(variables, parameters, n_leaves=5): """ - A strategy for generating :class:`symfit.Model` s. + Helper function that calls :func:`expressions` with sane + defaults. Parameters ---------- - dependent_vars: hypothesis.LazySearchStrategy[list[symfit.Variable]] - A strategy that generates lists of Variables. These variables will be - used as dependent variables. - symbols: hypothesis.LazySearchStrategy[symfit.Parameter or symfit.Variable] - A strategy that generates either a parameter or independent variable. - steps: int - The number of operations per expression. Larger numbers result is more - complex models. + variables: hypothesis.LazySearchStrategy[symfit.Variable] + A strategy that generates Variables. These variables will be + used as independent variables. + parameters: hypothesis.LazySearchStrategy[symfit.Parameter] + A strategy that generates Parameters. + n_leaves: int + The number of symbols to initially generate. Larger numbers result in + more complex expressions. Returns ------- - symfit.Model + hypothesis.LazySearchStrategy[sympy.Expr] """ - constants = st.sampled_from([1, -1, 2, 3, sympy.Rational(1, 2), sympy.pi, sympy.E]) - symbols = st.one_of(constants, symbols) + constants = st.sampled_from([1, -1, 2, 3, + sympy.Rational(1, 2), sympy.pi, sympy.E]) ops = st.sampled_from([(2, sympy.Add, True), (2, sympy.Mul, True), # Can cause imaginary numbers (e.g. (-4)**1.2) # (2, sympy.Pow, False), # (2, sympy.log, False), - ]) + ]) unary_ops = st.sampled_from([sympy.sin, sympy.exp, lambda e: -e, lambda e: 1/e]) + return expressions(variables=variables, parameters=parameters, + constants=constants, unary_ops=unary_ops, + operations=ops, n_leaves=n_leaves) + + +@st.composite +def polynomial_expressions(draw, variables, parameters, constants, + max_degree=10, max_terms=10): + """ + Generates polynomial expressions that are linear in `parameters` and + `constants`. The produced polynomials will have at most `max_terms` terms, + and the degree will be between 1 and `max_degree`. + + Parameters + ---------- + variables: hypothesis.LazySearchStrategy[symfit.Variable] + A strategy that generates Variables. These variables will be + used as independent variables. + parameters: hypothesis.LazySearchStrategy[symfit.Parameter] + A strategy that generates Parameters. + constants: hypothesis.LazySearchStrategy[number] + A strategy that generates constants. + max_degree: int + Maximum degree of the produced polynomial + max_terms: int + Maximum number of terms of the produced polynomial + + Returns + ------- + hypothesis.LazySearchStrategy[sympy.Expr] + """ + indep_vars = draw(st.lists(variables, unique=True, min_size=1)) + orders = st.dictionaries( + keys=st.tuples(*[st.integers(min_value=0, max_value=max_degree)]*len(indep_vars)).filter(lambda t: 1 <= sum(t) <= max_degree), + values=st.one_of(parameters, constants), + min_size=1, + max_size=10 + ) + orders = draw(orders) + expr = sympy.Poly(orders, *indep_vars).as_expr() + return expr + + +def polynomial_strategy(variables, parameters, max_degree=5, max_terms=10): + """ + Helper function that calls :func:`polynomial_expressions` with sane + defaults. + + Parameters + ---------- + variables: hypothesis.LazySearchStrategy[symfit.Variable] + A strategy that generates Variables. These variables will be + used as independent variables. + parameters: hypothesis.LazySearchStrategy[symfit.Parameter] + A strategy that generates Parameters. + max_degree: int + Maximum degree of the produced polynomial + max_terms: int + Maximum number of terms of the produced polynomial + + Returns + ------- + hypothesis.LazySearchStrategy[sympy.Expr] + """ + return polynomial_expressions(variables, parameters, st.just(1), + max_degree=max_degree, max_terms=max_terms) + + +# TODO: ODEModels +@st.composite +def models(draw, dependent_vars, independent_vars, parameters, expressions_st, + allow_interdependent=True): + """ + A strategy for generating :class:`symfit.Model` s. + + Parameters + ---------- + dependent_vars: hypothesis.LazySearchStrategy[symfit.Variable] + A strategy that generates Variables. These variables will be + used as dependent variables. + independent_vars: hypothesis.LazySearchStrategy[symfit.Variable] + A strategy that generates Variables. These variables will be + used as independent variables. + parameters: hypothesis.LazySearchStrategy[symfit.Parameter] + A strategy that generates Parameters. + expressions_st: callable[(hypothesis.LazySearchStrategy[symfit.Variable], + hypothesis.LazySearchStrategy[symfit.Parameter]), + hypothesis.LazySearchStrategy[sympy.Expr]] + A function that when given variables and parameters, produces a strategy + that produces expressions. + + Returns + ------- + symfit.Model + """ model_dict = {} - dep_vars = draw(dependent_vars) + dep_vars = draw(st.lists(dependent_vars, unique=True, min_size=1)) for idx, dependent_var in enumerate(dep_vars): # Dependent vars can depend on other dependent vars: {y1: k, y2: y1+1}. - # But this can cause cyclical dependency ({y1: y2, y2: y1}), so limit to - # idx < jdx. Note that this also prevents {y1: y1}. + # But this can cause cyclical dependency ({y1: y2, y2: y1}), so limit + # to idx < jdx. Note that this also prevents {y1: y1}. other_vars = [dep_var for jdx, dep_var in enumerate(dep_vars) if idx < jdx] - if other_vars: + if other_vars and allow_interdependent: # Can't sample_from and empty list - symbs = st.one_of(symbols, st.sampled_from(other_vars)) + indep_variables = st.one_of(independent_vars, st.sampled_from(other_vars)) else: - symbs = symbols - expressions = exprs(symbs, unary_ops, ops, leaves, False) - expr = draw(expressions) + indep_variables = independent_vars + expression_st = expressions_st(indep_variables, parameters) + expr = draw(expression_st) model_dict[dependent_var] = expr model = Model(model_dict) return model @@ -163,7 +273,8 @@ def _same_shape_vars(model): @st.composite -def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, max_dim=3): +def independent_data_shapes(draw, model, min_num_values=0, min_value=1, + max_value=10, min_dim=1, max_dim=3): """ Strategy that generates shapes for the independent variables of `model` such that variables that need to be broadcast compatible will be the same shape. @@ -187,7 +298,11 @@ def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, m """ indep_vars = model.independent_vars shape_groups = map(tuple, _same_shape_vars(model)) - shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value) + # Insist on a minimal number of data points to prevent underspecified models + # This is probably overly strict, since it enforces the same minimal shape + # on *all* components/variables. Instead it would probably be better to + # do this per element of model.connectivity_mapping + shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value).filter(lambda shape: np.prod(shape) >= min_num_values) grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups})) indep_shape = {} for vars, shape in grp_shape.items(): @@ -197,32 +312,13 @@ def independent_data_shapes(draw, model, min_value=1, max_value=10, min_dim=1, m # different. for var in vars[:-1]: indep_shape[var] = shape - indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, min_dim, max_dim, min_value, max_value) + # Although technically possible, it can cause a bunch of numerical + # issues, and it's meaning is debatable at best. + # indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, min_dim, max_dim, min_value, max_value) + indep_shape[vars[-1]] = shape return indep_shape -def numpy_arrays(shape, elements=NUMBERS, **kwargs): - """ - Basic strategy for generating numpy arrays of the specified `shape`, with - the specified `elements`. - - Parameters - ---------- - shape: tuple[int, ...] - The shape the array should get. - elements: hypothesis.LazySearchStrategy - The strategy which should be used to generate elements of the array. - kwargs: dict[str] - Additional keyword arguments to be passed to - :func:`hypothesis.extra.numpy.arrays` - - Returns - ------- - hypothesis.LazySearchStrategy[np.array] - """ - return npst.arrays(float, shape, elements=NUMBERS, **kwargs) - - @st.composite def indep_values(draw, model): """ @@ -242,11 +338,12 @@ def indep_values(draw, model): dep_vars = model.dependent_vars params = model.params - indep_shape = draw(independent_data_shapes(model)) + min_num_values = len(params) + 1 + indep_shape = draw(independent_data_shapes(model, min_num_values)) indep_vals = {} for var, shape in indep_shape.items(): - data = numpy_arrays(shape, unique=True).map(np.sort) + data = npst.arrays(float, shape, elements=NUMBERS, unique=True).map(np.sort) indep_vals[str(var)] = data data = draw(st.fixed_dictionaries(indep_vals)) @@ -272,6 +369,24 @@ def param_values(draw, model): data = draw(param_vals) for param in model.params: param.value = data[str(param)] + lower, upper = draw( + st.lists( + st.one_of( + st.none(), + st.floats(0, 2*abs(param.value), allow_infinity=False, allow_nan=False) + ), + min_size=2, + max_size=2 + ) + ) + if lower is not None: + param.min = param.value - lower + else: + param.min = None + if upper is not None: + param.max = param.value + upper + else: + param.max = None return data @@ -298,7 +413,12 @@ def dep_values(draw, model, indep_data, param_vals): sigmas. """ dep_data = model(**indep_data, **param_vals)._asdict() - # You can't provide data for interdependent variables. + # for var, data in dep_data.items(): + # if var in model.dependent_vars: + # assume(np.all(((np.abs(data) >= MIN_VAL) | (data == 0)) & + # (np.abs(data) <= MAX_VAL) + # )) + # You can't provide data for interdependent variables. #270 dep_data = { var: data for var, data in dep_data.items() @@ -308,8 +428,8 @@ def dep_values(draw, model, indep_data, param_vals): sigmas = { 'sigma_{}'.format(str(var)): st.one_of( st.none(), - st.floats(allow_nan=False, allow_infinity=False, min_value=0, max_value=5), - numpy_arrays(shape) + st.floats(allow_nan=False, allow_infinity=False, min_value=1e-6, max_value=5), + npst.arrays(float, shape, elements=st.floats(allow_nan=False, allow_infinity=False, min_value=1e-6, max_value=5)) ) for var, shape in shapes.items() } sigmas = draw(st.fixed_dictionaries(dict(**sigmas))) @@ -317,22 +437,21 @@ def dep_values(draw, model, indep_data, param_vals): @st.composite -def model_with_data(draw, dependent_vars, symbols, leaves=5): +def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_strategy, **kwargs): """ Strategy that generates a model with associated data using the given dependent variables. Parameters ---------- - dependent_vars: hypothesis.LazySearchStrategy[list[symfit.Variable]] + dependent_vars: hypothesis.LazySearchStrategy[symfit.Variable] Strategy to use to generate dependent variables for the model. - symbols: hypothesis.LazySearchStrategy[symfit.Variable or symfit.Parameter] - Strategy to use to generate independent variables and parameters for the - model - leaves: int - The number of operations to perform when generating the expressions for - every dependent component of the model. More steps means complexer - expressions. + independent_vars: hypothesis.LazySearchStrategy[symfit.Variable] + Strategy to use to generate independent variables for the model. + parameters: hypothesis.LazySearchStrategy[symfit.Parameter] + Strategy to use to generate independent variables for the model. + expr_strategy: hypothesis.LazySearchStrategy[sympy.Expr] + Strategy to use to generate expressions for the model. Returns ------- @@ -345,7 +464,7 @@ def model_with_data(draw, dependent_vars, symbols, leaves=5): dict[str, np.array] Data for the dependent variables, as well as sigmas. """ - model = draw(models(dependent_vars, symbols, leaves=leaves)) + model = draw(models(dependent_vars, independent_vars, parameters, expr_strategy, **kwargs)) indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters') @@ -355,13 +474,16 @@ def model_with_data(draw, dependent_vars, symbols, leaves=5): assume(False) # Some model + data that causes numerical issues for data in dep_data.values(): # In addition, let's make sure all data is finite and real. - assume(data is None or (np.all(np.isfinite(data)) and np.all(np.isreal(data)))) + assume(data is None or (np.all(np.isfinite(data) & np.isreal(data)))) return model, param_data, indep_data, dep_data if __name__ == '__main__': - DEPENDENT_VARS = st.lists(st.sampled_from(variables('Y1, Y2')), unique=True, min_size=1) - SYMBOLS = st.sampled_from(parameters('a, b, c') + variables('x, y, z')) - + DEPENDENT_VARS = st.sampled_from(variables('Y1, Y2')) + INDEPENDENT_VARS = st.sampled_from(variables('x, y, z')) + PARAMETERS = st.sampled_from(variables('a, b, c')) for _ in range(5): - print(list(map(str, model_with_data(DEPENDENT_VARS, SYMBOLS).example()))) + print(polynomial_expressions(INDEPENDENT_VARS, PARAMETERS, st.just(1)).example()) + + # for _ in range(5): + # print(list(map(str, model_with_data(DEPENDENT_VARS, INDEPENDENT_VARS, PARAMETERS).example()))) diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py index baefb5bc..9d6434b1 100644 --- a/tests/test_hypothesis.py +++ b/tests/test_hypothesis.py @@ -1,28 +1,43 @@ -from .hypothesis_helpers import model_with_data +from .hypothesis_helpers import model_with_data, expression_strategy, polynomial_strategy from hypothesis import strategies as st -from hypothesis import given, note, assume, settings, HealthCheck +from hypothesis import given, note, assume, settings, HealthCheck, event import numpy as np from pytest import approx from symfit import Fit, variables, parameters +from symfit.core.minimizers import TrustConstr, SLSQP, LBFGSB -DEPENDENT_VARS = st.lists(st.sampled_from(variables('Y1, Y2')), unique=True, min_size=1) -SYMBOLS = st.sampled_from(parameters('a, b, c') + variables('x, y, z')) +DEPENDENT_VARS = st.sampled_from(variables('Y1, Y2')) +INDEPENDENT_VARS = st.sampled_from(variables('x, y, z')) +PARAMETERS = st.sampled_from(parameters('a, b, c')) -def _cmp_result(reference, found): - assert reference.keys() == found.keys() - ref_vals = [] - found_vals = [] - for k in reference: - ref_vals.append(reference[k]) - found_vals.append(found[k]) - assert found_vals == approx(ref_vals) +def _cmp_result(reference, found, abs=None, rel=None): + reference = reference.copy() + for k, v in reference.items(): + reference[k] = approx(v, abs=abs, rel=rel) + assert found == reference + # assert reference.keys() == found.keys() + # ref_vals = [] + # found_vals = [] + # for k in reference: + # ref_vals.append(reference[k]) + # found_vals.append(found[k]) + # return found_vals == approx(ref_vals, abs=abs, rel=rel) -@settings(max_examples=100, deadline=None, suppress_health_check=[HealthCheck.too_slow]) -@given(model_with_data(DEPENDENT_VARS, SYMBOLS)) + +def expressions(vars, pars): + return st.one_of(polynomial_strategy(vars, pars), expression_strategy(vars, pars)) + + +@settings(max_examples=50, deadline=None, + suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much]) +@given(model_with_data(DEPENDENT_VARS, + INDEPENDENT_VARS, + PARAMETERS, + expression_strategy)) def test_simple(model_with_data): model, param_vals, indep_vals, dep_vals = model_with_data note(str(model)) @@ -34,3 +49,57 @@ def test_simple(model_with_data): note(str(model(**indep_vals, **result.params))) note(str(fit.minimizer)) _cmp_result(param_vals, result.params) + + +@settings(max_examples=50, deadline=None, + suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much]) +@given(st.data()) +def test_linear_models(data): + # minimizer = data.draw(st.sampled_from([None, LBFGSB, TrustConstr, SLSQP]), label='minimizer') + minimizer = None + model, param_vals, indep_vals, dep_vals = data.draw( + model_with_data(DEPENDENT_VARS, + INDEPENDENT_VARS, + PARAMETERS, + polynomial_strategy, + allow_interdependent=False) + ) + note(str(model)) + note(param_vals) + note(indep_vals) + note(dep_vals) + + assume(model.params) # Stacking error in model.eval_jacobian + init_vals = param_vals.copy() + for param in model.params: + # param.value = data.draw(st.floats(param.min, param.max, allow_nan=False, allow_infinity=False)) + param.value = data.draw(st.floats(-1e6, 1e6, allow_nan=False, allow_infinity=False)) + init_vals[str(param)] = param.value + note((param.name, param.min, param.value, param.max)) + # assert param.min is None or param.min <= param_vals[str(param)] + # assert param.max is None or param_vals[str(param)] <= param.max + + fit = Fit(model, **indep_vals, **dep_vals, minimizer=minimizer) + obj_jac = fit.objective.eval_jacobian(**init_vals) + note(str(obj_jac)) + note(str(fit.objective.eval_hessian(**init_vals))) + # Exclude some edge cases that won't do anything due to numerical precision + # This also catches cases like y=a*x with y=0 and x=0 + assume(np.any(np.abs(obj_jac) > 1e-3)) + + result = fit.execute() + # "Minimization stopped due to precision loss" and variants + # assume(result.infodict['success']) + note(str(result.covariance_matrix)) + note(str(result)) + note(str(model(**indep_vals, **result.params))) + note(str(fit.objective(**result.params))) + note(str(fit.objective.eval_jacobian(**result.params))) + note(str(fit.objective.eval_hessian(**result.params))) + note(str(fit.minimizer)) + # If the R2 isn't one, check whether the parameters are ~equal. + if result.r_squared != approx(1, abs=5e-3): + event('R squared != 1') + _cmp_result(param_vals, result.params, rel=1e-3, abs=1e-2) + else: + event('R squared == 1') \ No newline at end of file From 701aa67aa6f705815b93d7aa4e274335ec52e2ab Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Fri, 21 Oct 2022 15:13:41 +0200 Subject: [PATCH 07/11] Streamline hypothesis tests --- tests/hypothesis_helpers.py | 173 ++++++++++++++++++++++-------------- tests/test_hypothesis.py | 71 ++++++++------- 2 files changed, 146 insertions(+), 98 deletions(-) diff --git a/tests/hypothesis_helpers.py b/tests/hypothesis_helpers.py index e982ec75..cb78be6b 100644 --- a/tests/hypothesis_helpers.py +++ b/tests/hypothesis_helpers.py @@ -6,20 +6,17 @@ from hypothesis import strategies as st from hypothesis.extra import numpy as npst -from hypothesis import assume, note +from hypothesis import assume -MAX_VAL = 1e1 -MIN_VAL = 1e-1 +MAX_VAL = 2**10 +MIN_VAL = 2**-10 FLOATS = st.floats(allow_nan=False, allow_infinity=False, - min_value=MIN_VAL, max_value=MAX_VAL) + min_value=MIN_VAL, max_value=MAX_VAL, + allow_subnormal=False, width=32) # Some hoops to jump through to limit the floats to MAX_VAL > abs(f) > MIN_VAL, # but still allow f=0 -NUMBERS = st.one_of(FLOATS.map(lambda f: -f), st.just(0), FLOATS) - - -def _is_sorted(lst, cmp=lambda x, y: x <= y): - return all(cmp(x, y) for x, y in zip(lst[:-1], lst[1:])) +NUMBERS = st.one_of(st.just(0), FLOATS, FLOATS.map(lambda f: -f)) def _is_number(obj): @@ -59,6 +56,13 @@ def expressions(draw, variables, parameters, constants, unary_ops, operations, ------- sympy.Expression """ + # This will work by generating an expression tree, except that we + # immediately collapse it: Generate the leaves, and combine those by + # applying operations to them. For extra flavour, apply some unary + # operations along the way. + + # Make sure we draw at least one not numerical symbol, unless we allow + # an expression consisting of just a number if not allow_number: first = st.one_of(variables, parameters) rest = st.lists(st.one_of(constants, variables, parameters), @@ -67,15 +71,16 @@ def expressions(draw, variables, parameters, constants, unary_ops, operations, else: symbol_st = st.lists(st.one_of(constants, variables, parameters), min_size=n_leaves, max_size=n_leaves) - # Assert we draw at least one not numerical symbol, unless we allow numbers leaves = draw(symbol_st) - # TODO: allow chaining of unary ops - unary_ops = st.one_of(st.none(), unary_ops) + unary_ops = st.lists(unary_ops, min_size=0, max_size=3) first_ops = draw(st.lists(unary_ops, min_size=len(leaves), max_size=len(leaves))) - leaves = [op(leaf) if op else leaf for op, leaf in zip(first_ops, leaves)] + + for idx, ops in enumerate(first_ops): + for op in ops: + leaves[idx] = op(leaves[idx]) while len(leaves) != 1: nargs, op, comm = draw(operations) @@ -85,13 +90,14 @@ def expressions(draw, variables, parameters, constants, unary_ops, operations, max_size=nargs, unique=True) # ... and if the operation commutes, make sure the indices are sorted. - idxs = draw(idxs_st.filter(lambda l: not comm or _is_sorted(l))) + idxs = draw(idxs_st.map(lambda x: sorted(x) if comm else x)) + # idxs = draw(idxs_st) args = [leaves[idx] for idx in idxs] # Remove the drawn symbols from `leaves` leaves = [leaf for idx, leaf in enumerate(leaves) if idx not in idxs] new_node = op(*args) - op = draw(unary_ops) - if op: + ops = draw(unary_ops) + for op in ops: new_node = op(new_node) leaves.append(new_node) expr = leaves[0] @@ -127,18 +133,19 @@ def expression_strategy(variables, parameters, n_leaves=5): # (2, sympy.Pow, False), # (2, sympy.log, False), ]) - unary_ops = st.sampled_from([sympy.sin, - sympy.exp, - lambda e: -e, - lambda e: 1/e]) - return expressions(variables=variables, parameters=parameters, + unary_ops = st.sampled_from([lambda e: -e, + lambda e: 1/e, + sympy.sin, + sympy.exp]) + expr = expressions(variables=variables, parameters=parameters, constants=constants, unary_ops=unary_ops, operations=ops, n_leaves=n_leaves) + return expr @st.composite def polynomial_expressions(draw, variables, parameters, constants, - max_degree=10, max_terms=10): + max_degree=10, max_terms=5): """ Generates polynomial expressions that are linear in `parameters` and `constants`. The produced polynomials will have at most `max_terms` terms, @@ -164,17 +171,19 @@ def polynomial_expressions(draw, variables, parameters, constants, """ indep_vars = draw(st.lists(variables, unique=True, min_size=1)) orders = st.dictionaries( + # Dict keys are the exponents for the variables, values the coefficient + # https://docs.sympy.org/latest/modules/polys/domainsintro.html#sparse-polynomial-representation keys=st.tuples(*[st.integers(min_value=0, max_value=max_degree)]*len(indep_vars)).filter(lambda t: 1 <= sum(t) <= max_degree), - values=st.one_of(parameters, constants), + values=st.one_of(constants, parameters), min_size=1, - max_size=10 + max_size=max_terms ) orders = draw(orders) expr = sympy.Poly(orders, *indep_vars).as_expr() return expr -def polynomial_strategy(variables, parameters, max_degree=5, max_terms=10): +def polynomial_strategy(variables, parameters, max_degree=5, max_terms=5): """ Helper function that calls :func:`polynomial_expressions` with sane defaults. @@ -227,19 +236,22 @@ def models(draw, dependent_vars, independent_vars, parameters, expressions_st, symfit.Model """ model_dict = {} - dep_vars = draw(st.lists(dependent_vars, unique=True, min_size=1)) + dep_vars = draw(st.lists(dependent_vars, unique=True, min_size=1, max_size=3)) for idx, dependent_var in enumerate(dep_vars): # Dependent vars can depend on other dependent vars: {y1: k, y2: y1+1}. # But this can cause cyclical dependency ({y1: y2, y2: y1}), so limit # to idx < jdx. Note that this also prevents {y1: y1}. - other_vars = [dep_var for jdx, dep_var in enumerate(dep_vars) if idx < jdx] + other_vars = dep_vars[:idx] if other_vars and allow_interdependent: # Can't sample_from and empty list - indep_variables = st.one_of(independent_vars, st.sampled_from(other_vars)) + valid_variables = st.one_of(independent_vars, st.sampled_from(other_vars)) else: - indep_variables = independent_vars - expression_st = expressions_st(indep_variables, parameters) + valid_variables = independent_vars + expression_st = expressions_st(valid_variables, parameters) expr = draw(expression_st) + # Ban expressions that have complex infinity, which for some reason + # can't be printed to numpy. Maybe add more? + assume(not expr.has(sympy.zoo)) model_dict[dependent_var] = expr model = Model(model_dict) return model @@ -256,7 +268,7 @@ def _same_shape_vars(model): # Deal with interdependent vars for k, group in con_map.items(): - for var in list(group): + for var in group: if var in con_map: con_map[k] = group | con_map[var] con_map[k] -= {var} @@ -274,7 +286,7 @@ def _same_shape_vars(model): @st.composite def independent_data_shapes(draw, model, min_num_values=0, min_value=1, - max_value=10, min_dim=1, max_dim=3): + max_value=None, min_dim=1, max_dim=None): """ Strategy that generates shapes for the independent variables of `model` such that variables that need to be broadcast compatible will be the same shape. @@ -283,6 +295,8 @@ def independent_data_shapes(draw, model, min_num_values=0, min_value=1, ---------- model: symfit.Model Shapes will be generated for this model's independent variables. + min_num_values: int + The minimum number of values the final arrays should contain. min_value: int The minimum size a dimension can be. max_value: int @@ -296,13 +310,17 @@ def independent_data_shapes(draw, model, min_num_values=0, min_value=1, ------- dict[str]: tuple[int, ...] """ - indep_vars = model.independent_vars shape_groups = map(tuple, _same_shape_vars(model)) # Insist on a minimal number of data points to prevent underspecified models # This is probably overly strict, since it enforces the same minimal shape - # on *all* components/variables. Instead it would probably be better to + # on *all* components/variables. Instead, it would probably be better to # do this per element of model.connectivity_mapping - shapes = npst.array_shapes(min_dim, max_dim, min_value, max_value).filter(lambda shape: np.prod(shape) >= min_num_values) + shapes = npst.array_shapes( + min_dims=min_dim, + max_dims=max_dim, + min_side=min_value, + max_side=max_value + ).filter(lambda shape: np.prod(shape) >= min_num_values) grp_shape = draw(st.fixed_dictionaries({var_grp: shapes for var_grp in shape_groups})) indep_shape = {} for vars, shape in grp_shape.items(): @@ -314,11 +332,22 @@ def independent_data_shapes(draw, model, min_num_values=0, min_value=1, indep_shape[var] = shape # Although technically possible, it can cause a bunch of numerical # issues, and it's meaning is debatable at best. - # indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, min_dim, max_dim, min_value, max_value) + # indep_shape[vars[-1]] = npst.broadcastable_shapes(shape, + # min_dims=min_dim, + # max_dims=max_dim, + # min_side=min_value, + # max_side=max_value + # ) indep_shape[vars[-1]] = shape return indep_shape +def _count_unique_values(indep_vals): + return sum( + np.sum(np.unique(ar.ravel(), return_counts=True)[1]) for ar in indep_vals.values() + ) + + @st.composite def indep_values(draw, model): """ @@ -334,20 +363,20 @@ def indep_values(draw, model): Dict of numpy arrays with data for every independent variable in `model`. """ - indep_vars = model.independent_vars - dep_vars = model.dependent_vars params = model.params - min_num_values = len(params) + 1 + # We need just a bit more data than parameters so that there's a best answer + min_num_values = len(params) + 3 indep_shape = draw(independent_data_shapes(model, min_num_values)) indep_vals = {} for var, shape in indep_shape.items(): - data = npst.arrays(float, shape, elements=NUMBERS, unique=True).map(np.sort) + data = npst.arrays(float, shape, elements=NUMBERS, unique=False).map(np.sort) indep_vals[str(var)] = data - data = draw(st.fixed_dictionaries(indep_vals)) - + # Data points don't have to be unique, but there should be enough + # independent ones to overspecify the fit. + data = draw(st.fixed_dictionaries(indep_vals).filter(lambda d: _count_unique_values(d) >= min_num_values)) return data @@ -355,7 +384,7 @@ def indep_values(draw, model): def param_values(draw, model): """ Strategy for generating values for the parameters of `model`. Will also set - the values of the parameters. + the values and bounds of the parameters. Parameters ---------- @@ -373,20 +402,15 @@ def param_values(draw, model): st.lists( st.one_of( st.none(), - st.floats(0, 2*abs(param.value), allow_infinity=False, allow_nan=False) + st.floats(1, 1+2*abs(param.value), allow_infinity=False, allow_nan=False) ), min_size=2, max_size=2 ) ) - if lower is not None: - param.min = param.value - lower - else: - param.min = None - if upper is not None: - param.max = param.value + upper - else: - param.max = None + param.min = param.value - lower if lower is not None else None + param.max = param.value + upper if upper is not None else None + return data @@ -413,11 +437,7 @@ def dep_values(draw, model, indep_data, param_vals): sigmas. """ dep_data = model(**indep_data, **param_vals)._asdict() - # for var, data in dep_data.items(): - # if var in model.dependent_vars: - # assume(np.all(((np.abs(data) >= MIN_VAL) | (data == 0)) & - # (np.abs(data) <= MAX_VAL) - # )) + # You can't provide data for interdependent variables. #270 dep_data = { var: data @@ -437,7 +457,7 @@ def dep_values(draw, model, indep_data, param_vals): @st.composite -def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_strategy, **kwargs): +def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_strategy, max_jacobian=1e3, **kwargs): """ Strategy that generates a model with associated data using the given dependent variables. @@ -449,9 +469,12 @@ def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_str independent_vars: hypothesis.LazySearchStrategy[symfit.Variable] Strategy to use to generate independent variables for the model. parameters: hypothesis.LazySearchStrategy[symfit.Parameter] - Strategy to use to generate independent variables for the model. + Strategy to use to generate parameters for the model. expr_strategy: hypothesis.LazySearchStrategy[sympy.Expr] Strategy to use to generate expressions for the model. + max_jacobian: float or None + If not None, the maximum values allowed for the Jacobian to avoid + numerical instabilities Returns ------- @@ -464,17 +487,31 @@ def model_with_data(draw, dependent_vars, independent_vars, parameters, expr_str dict[str, np.array] Data for the dependent variables, as well as sigmas. """ + def _is_valid_data(data): + indep_data, param_data = data + try: + dep_data = model(**indep_data, **param_data) + jac = model.eval_jacobian(**indep_data, **param_data) + except ZeroDivisionError: + # Can happen with, for example: a**(-1.0) with a=0 + return False + + if max_jacobian is not None: + low_jac = all(np.all(np.abs(component) <= max_jacobian) for component in jac) + else: + low_jac = True + + # Let's make sure all dependant data is finite and real. + finite_data = all(data is None or (np.all(np.isfinite(data) & np.isreal(data))) for data in dep_data) + + return low_jac and finite_data + model = draw(models(dependent_vars, independent_vars, parameters, expr_strategy, **kwargs)) - indep_data, param_data = draw(st.tuples(indep_values(model), - param_values(model)), + indep_data, param_data = draw(st.tuples(indep_values(model), param_values(model)), label='independent data, parameters') - try: - dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data') - except ArithmeticError: - assume(False) # Some model + data that causes numerical issues - for data in dep_data.values(): - # In addition, let's make sure all data is finite and real. - assume(data is None or (np.all(np.isfinite(data) & np.isreal(data)))) + assume(_is_valid_data([indep_data, param_data])) + + dep_data = draw(dep_values(model, indep_data, param_data), label='dependent data') return model, param_data, indep_data, dep_data diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py index 9d6434b1..8ab8cb44 100644 --- a/tests/test_hypothesis.py +++ b/tests/test_hypothesis.py @@ -1,6 +1,6 @@ from .hypothesis_helpers import model_with_data, expression_strategy, polynomial_strategy from hypothesis import strategies as st -from hypothesis import given, note, assume, settings, HealthCheck, event +from hypothesis import given, note, assume, settings, HealthCheck import numpy as np from pytest import approx @@ -13,19 +13,19 @@ INDEPENDENT_VARS = st.sampled_from(variables('x, y, z')) PARAMETERS = st.sampled_from(parameters('a, b, c')) -def _cmp_result(reference, found, abs=None, rel=None): +def _cmp_params(reference, found, abs=None, rel=None): reference = reference.copy() for k, v in reference.items(): reference[k] = approx(v, abs=abs, rel=rel) assert found == reference - # assert reference.keys() == found.keys() - # ref_vals = [] - # found_vals = [] - # for k in reference: - # ref_vals.append(reference[k]) - # found_vals.append(found[k]) - # return found_vals == approx(ref_vals, abs=abs, rel=rel) + +def _cmp_model_vals(reference, found, abs=None, rel=None): + actual = {} + for k, v in found._asdict().items(): + if str(k) in reference: + actual[str(k)] = approx(v, abs=abs, rel=rel) + assert reference == actual def expressions(vars, pars): @@ -39,53 +39,67 @@ def expressions(vars, pars): PARAMETERS, expression_strategy)) def test_simple(model_with_data): + """ + Generate a model with data and parameters, and fit the exact data (without + any noise), with initial parameter values at the correct answer. Check that + the minimizer doesn't drift away from this correct answer. + Note. It is not possible to give approximate parameter values as starting + point, since there may be multiple minima (and hypothesis *will* find those + cases) + """ model, param_vals, indep_vals, dep_vals = model_with_data note(str(model)) - assume(model.params) # Stacking error in model.eval_jacobian + note(str(model(**indep_vals, **param_vals))) fit = Fit(model, **indep_vals, **dep_vals) result = fit.execute() note(str(result)) - note(str(model(**indep_vals, **result.params))) note(str(fit.minimizer)) - _cmp_result(param_vals, result.params) + _cmp_params(param_vals, result.params) @settings(max_examples=50, deadline=None, suppress_health_check=[HealthCheck.too_slow, HealthCheck.filter_too_much]) @given(st.data()) def test_linear_models(data): - # minimizer = data.draw(st.sampled_from([None, LBFGSB, TrustConstr, SLSQP]), label='minimizer') - minimizer = None + """ + Generate a linear model (specifically, a polynomial), and pick parameter + values between their min and max, and perform the fit. Note that there + is still no noise on the data. + """ + minimizer = data.draw(st.sampled_from([None, LBFGSB, SLSQP]), label='minimizer') + # minimizer = None model, param_vals, indep_vals, dep_vals = data.draw( model_with_data(DEPENDENT_VARS, INDEPENDENT_VARS, PARAMETERS, polynomial_strategy, - allow_interdependent=False) + allow_interdependent=False), # If true, no longer linear + label="Model" ) + note(str(minimizer)) note(str(model)) - note(param_vals) + for param in model.params: + note("{}: {} {} {}".format(param, param.min, param.value, param.max)) note(indep_vals) note(dep_vals) - assume(model.params) # Stacking error in model.eval_jacobian init_vals = param_vals.copy() for param in model.params: - # param.value = data.draw(st.floats(param.min, param.max, allow_nan=False, allow_infinity=False)) - param.value = data.draw(st.floats(-1e6, 1e6, allow_nan=False, allow_infinity=False)) + param.value = data.draw(st.floats( + -1e3, 1e3, + allow_nan=False, + allow_infinity=False), label="Parameter value {}".format(param)) init_vals[str(param)] = param.value note((param.name, param.min, param.value, param.max)) - # assert param.min is None or param.min <= param_vals[str(param)] - # assert param.max is None or param_vals[str(param)] <= param.max fit = Fit(model, **indep_vals, **dep_vals, minimizer=minimizer) obj_jac = fit.objective.eval_jacobian(**init_vals) + # If the jacobian is too large you get numerical issues (in particular + # SLSQP struggles?). + assume(np.all(np.abs(obj_jac) < 1e5)) note(str(obj_jac)) note(str(fit.objective.eval_hessian(**init_vals))) - # Exclude some edge cases that won't do anything due to numerical precision - # This also catches cases like y=a*x with y=0 and x=0 - assume(np.any(np.abs(obj_jac) > 1e-3)) result = fit.execute() # "Minimization stopped due to precision loss" and variants @@ -97,9 +111,6 @@ def test_linear_models(data): note(str(fit.objective.eval_jacobian(**result.params))) note(str(fit.objective.eval_hessian(**result.params))) note(str(fit.minimizer)) - # If the R2 isn't one, check whether the parameters are ~equal. - if result.r_squared != approx(1, abs=5e-3): - event('R squared != 1') - _cmp_result(param_vals, result.params, rel=1e-3, abs=1e-2) - else: - event('R squared == 1') \ No newline at end of file + + tmp = {v: dep_vals[str(v)] for v in dep_vals if not v.startswith('sigma_')} + _cmp_model_vals(tmp, model(**result.params, **indep_vals), rel=1e-3, abs=1e-2) From 8dc913095e978395222045f5e52091baad2c0498 Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Fri, 21 Oct 2022 15:14:18 +0200 Subject: [PATCH 08/11] Update plumbing --- .coveragerc | 2 ++ .github/workflows/test.yaml | 2 +- .gitignore | 8 ++++++++ requirements_test.txt | 4 +++- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index 40c40f0a..3ca69e1b 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,4 +1,6 @@ [run] +source = symfit branch = True +concurrency = multiprocessing omit = tests/* diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index be462b0c..deb50a28 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -25,4 +25,4 @@ jobs: pip install -e . - name: Test with coverage run: - pytest \ No newline at end of file + pytest --cov=symfit --hypothesis-show-statistics \ No newline at end of file diff --git a/.gitignore b/.gitignore index 62365349..fb97a378 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,11 @@ build docs/_build docs/_doctrees dist + +.hypothesis/ + +.eggs/ + +symfit.egg-info/ + +.coverage diff --git a/requirements_test.txt b/requirements_test.txt index 103175ea..2cfea79f 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,4 +1,6 @@ pytest +pytest-cov +coverage coveralls -hypothesis +hypothesis[numpy] From f7558604004a51ec87c91056f54ed853971fe86d Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Fri, 21 Oct 2022 15:15:00 +0200 Subject: [PATCH 09/11] Allow ScipyMinimizer to deal with models without free parameters --- symfit/core/minimizers.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/symfit/core/minimizers.py b/symfit/core/minimizers.py index 7a57edd0..e6b15ee7 100644 --- a/symfit/core/minimizers.py +++ b/symfit/core/minimizers.py @@ -347,6 +347,17 @@ def execute(self, bounds=None, jacobian=None, hessian=None, constraints=None, ** :func:`scipy.optimize.minimize`. Note that your `method` will usually be filled by a specific subclass. """ + if not self.params: + return FitResults( + model=DummyModel(params=[]), + popt=[], + covariance_matrix=np.array([]), + objective=self.objective, + minimizer=self, + message="No free parameters", + fun=self.objective(), + niter=0, + ) ans = minimize( self.objective, self.initial_guesses, From 26f3f8cab2af486801ccd627e5c2816e76cf1165 Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Fri, 21 Oct 2022 15:15:32 +0200 Subject: [PATCH 10/11] Fix stacking of Jac and Hess with empty models --- symfit/core/models.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/symfit/core/models.py b/symfit/core/models.py index da1e72f3..5009444e 100644 --- a/symfit/core/models.py +++ b/symfit/core/models.py @@ -913,7 +913,10 @@ def eval_jacobian(self, *args, **kwargs): # the parameter dimension. We do not include the component direction in # this, because the components can have independent shapes. for idx, comp in enumerate(jac): - jac[idx] = np.stack(np.broadcast_arrays(*comp)) + if comp: + jac[idx] = np.stack(np.broadcast_arrays(*comp)) + else: + jac[idx] = np.array([]) return ModelOutput(self.keys(), jac) @@ -957,7 +960,10 @@ def eval_hessian(self, *args, **kwargs): # the parameter dimension. We do not include the component direction in # this, because the components can have independent shapes. for idx, comp in enumerate(hess): - hess[idx] = np.stack(np.broadcast_arrays(*comp)) + if comp: + hess[idx] = np.stack(np.broadcast_arrays(*comp)) + else: + hess[idx] = np.array([]) return ModelOutput(self.keys(), hess) From 3dc219e5a810cc54614cb3f8a54f3aea9fe5e20e Mon Sep 17 00:00:00 2001 From: Peter Kroon Date: Fri, 21 Oct 2022 15:25:35 +0200 Subject: [PATCH 11/11] Re-add TrustConstr --- tests/test_hypothesis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_hypothesis.py b/tests/test_hypothesis.py index 8ab8cb44..9d81b7fc 100644 --- a/tests/test_hypothesis.py +++ b/tests/test_hypothesis.py @@ -67,7 +67,7 @@ def test_linear_models(data): values between their min and max, and perform the fit. Note that there is still no noise on the data. """ - minimizer = data.draw(st.sampled_from([None, LBFGSB, SLSQP]), label='minimizer') + minimizer = data.draw(st.sampled_from([None, LBFGSB, SLSQP, TrustConstr]), label='minimizer') # minimizer = None model, param_vals, indep_vals, dep_vals = data.draw( model_with_data(DEPENDENT_VARS,