diff --git a/tests/test_argument.py b/tests/test_argument.py index 2dde2756..707ed8dd 100644 --- a/tests/test_argument.py +++ b/tests/test_argument.py @@ -1,21 +1,10 @@ from __future__ import division, print_function import pickle -import unittest +import pytest import sys import sympy -import warnings -import numpy as np -import scipy.stats -from scipy.optimize import curve_fit, minimize - -from symfit import ( - Variable, Parameter, Fit, FitResults, log, variables, - parameters, Model, Eq, Ge -) -from symfit.core.minimizers import BFGS, MINPACK, SLSQP, LBFGSB -from symfit.core.objectives import LogLikelihood -from symfit.distributions import Gaussian, Exp +from symfit import Variable, Parameter if sys.version_info >= (3, 0): import inspect as inspect_sig @@ -23,101 +12,68 @@ import funcsigs as inspect_sig -class TestArgument(unittest.TestCase): - def test_parameter_add(self): - """ - Makes sure the __add__ method of Parameters behaves as expected. - """ - a = Parameter(value=1.0, min=0.5, max=1.5) - b = Parameter(value=1.0, min=0.0) - new = a + b - self.assertIsInstance(new, sympy.Add) - - def test_argument_unnamed(self): - """ - Make sure the generated parameter names follow the pattern - """ - a = Parameter() - b = Parameter('b', 10) - c = Parameter(value=10) - x = Variable() - y = Variable('y') - - self.assertEqual(str(a), '{}_{}'.format(a._argument_name, a._argument_index)) - self.assertEqual(str(a), 'par_{}'.format(a._argument_index)) - self.assertNotEqual(str(b), '{}_{}'.format(b._argument_name, b._argument_index)) - self.assertEqual(str(c), '{}_{}'.format(c._argument_name, c._argument_index)) - self.assertEqual(c.value, 10) - self.assertEqual(b.value, 10) - self.assertEqual(str(x), 'var_{}'.format(x._argument_index)) - self.assertEqual(str(y), 'y') - - with self.assertRaises(TypeError): - d = Parameter(10) - - - def test_argument_name(self): - """ - Make sure that Parameters have a name attribute with the expected - value. - """ - a = Parameter() - b = Parameter(name='b') - c = Parameter(name='d') - self.assertNotEqual(a.name, 'a') - self.assertEqual(b.name, 'b') - self.assertEqual(c.name, 'd') - - def test_symbol_add(self): - """ - Makes sure the __add__ method of symbols behaves as expected. - """ - x, y = sympy.symbols('x y') - new = x + y - self.assertIsInstance(new, sympy.Add) - - def test_pickle(self): - """ - Make sure attributes are preserved when pickling - """ - A = Parameter('A', min=0., max=1e3, fixed=True) - new_A = pickle.loads(pickle.dumps(A)) - self.assertEqual((A.min, A.value, A.max, A.fixed, A.name), - (new_A.min, new_A.value, new_A.max, new_A.fixed, new_A.name)) - - A = Parameter(min=0., max=1e3, fixed=True) - new_A = pickle.loads(pickle.dumps(A)) - self.assertEqual((A.min, A.value, A.max, A.fixed, A.name), - (new_A.min, new_A.value, new_A.max, new_A.fixed, new_A.name)) - - def test_slots(self): - """ - Make sure Parameters and Variables don't have a __dict__ - """ - P = Parameter('P') - - # If you only have __slots__ you can't set arbitrary attributes, but - # you *should* be able to set those that are in your __slots__ - try: - P.min = 0 - except AttributeError: - self.fail() - - with self.assertRaises(AttributeError): - P.foo = None - - V = Variable('V') - with self.assertRaises(AttributeError): - V.bar = None - - -if __name__ == '__main__': +def test_parameter_add(): + """ + Makes sure the __add__ method of Parameters behaves as expected. + """ + a = Parameter(value=1.0, min=0.5, max=1.5) + b = Parameter(value=1.0, min=0.0) + new = a + b + assert isinstance(new, sympy.Add) + + +def test_argument_unnamed(): + """ + Make sure the generated parameter names follow the pattern + """ + a = Parameter() + b = Parameter('b', 10) + c = Parameter(value=10) + x = Variable() + y = Variable('y') + + assert str(a) == '{}_{}'.format(a._argument_name, a._argument_index) + assert str(a) == 'par_{}'.format(a._argument_index) + assert str(b) != '{}_{}'.format(b._argument_name, b._argument_index) + assert str(c) == '{}_{}'.format(c._argument_name, c._argument_index) + assert c.value == 10 + assert b.value == 10 + assert str(x) == 'var_{}'.format(x._argument_index) + assert str(y) == 'y' + + with pytest.raises(TypeError): + d = Parameter(10) + + +def test_pickle(): + """ + Make sure attributes are preserved when pickling + """ + A = Parameter('A', min=0., max=1e3, fixed=True) + new_A = pickle.loads(pickle.dumps(A)) + assert (A.min, A.value, A.max, A.fixed, A.name) == (new_A.min, new_A.value, new_A.max, new_A.fixed, new_A.name) + + A = Parameter(min=0., max=1e3, fixed=True) + new_A = pickle.loads(pickle.dumps(A)) + assert (A.min, A.value, A.max, A.fixed, A.name) == (new_A.min, new_A.value, new_A.max, new_A.fixed, new_A.name) + + +def test_slots(): + """ + Make sure Parameters and Variables don't have a __dict__ + """ + P = Parameter('P') + + # If you only have __slots__ you can't set arbitrary attributes, but + # you *should* be able to set those that are in your __slots__ try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() + P.min = 0 + except AttributeError: + assert False + + with pytest.raises(AttributeError): + P.foo = None + + V = Variable('V') + with pytest.raises(AttributeError): + V.bar = None diff --git a/tests/test_auto_fit.py b/tests/test_auto_fit.py index 4f88207c..5ef977a7 100644 --- a/tests/test_auto_fit.py +++ b/tests/test_auto_fit.py @@ -1,8 +1,6 @@ from __future__ import division, print_function -import unittest - +import pytest import numpy as np - from symfit import ( variables, parameters, Fit, Parameter, Variable, Equality, Model, GradientModel @@ -10,299 +8,305 @@ from symfit.core.minimizers import BFGS, MINPACK, SLSQP, LBFGSB from symfit.distributions import Gaussian -class TestAutoFit(unittest.TestCase): - @classmethod - def setUpClass(cls): - np.random.seed(0) - - def test_vector_fitting(self): - """ - Test the behavior in the presence of bounds or constraints: `Fit` should - select `ConstrainedNumericalLeastSquares` when bounds or constraints are - provided, or for vector models in general. For scalar models, use - `NumericalLeastSquares`. - """ - a, b = parameters('a, b') - a_i, = variables('a_i') - - xdata = np.array([ - [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], - [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], - [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], - ]) - - # Make a new scalar model. - scalar_model = {a_i: a + b} - simple_fit = Fit( - model=scalar_model, - a_i=xdata[0], - minimizer=MINPACK - ) - self.assertIsInstance(simple_fit.minimizer, MINPACK) - - constrained_fit = Fit( - model=scalar_model, - a_i=xdata[0], - constraints=[Equality(a + b, 110)] - ) - self.assertIsInstance(constrained_fit.minimizer, SLSQP) - - a.min = 0 - a.max = 25 - a.value = 10 - b.min = 80 - b.max = 120 - b.value = 100 - bound_fit = Fit( - model=scalar_model, - a_i=xdata[0], - ) - self.assertIsInstance(bound_fit.minimizer, LBFGSB) - - # Repeat all of the above for the Vector model - a, b, c = parameters('a, b, c') - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = {a_i: a, b_i: b, c_i: c} - - simple_fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - ) - self.assertIsInstance(simple_fit.minimizer, BFGS) - - constrained_fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - constraints=[Equality(a + b + c, 180)] - ) - self.assertIsInstance(constrained_fit.minimizer, SLSQP) - - a.min = 0 - a.max = 25 - a.value = 10 - b.min = 80 - b.max = 120 - b.value = 100 - bound_fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - ) - self.assertIsInstance(bound_fit.minimizer, LBFGSB) - - fit_result = bound_fit.execute() - self.assertAlmostEqual(fit_result.value(a), np.mean(xdata[0]), 6) - self.assertAlmostEqual(fit_result.value(b), np.mean(xdata[1]), 6) - self.assertAlmostEqual(fit_result.value(c), np.mean(xdata[2]), 6) - - def test_vector_fitting_bounds(self): - """ - Tests fitting to a 3 component vector valued function, with bounds. - """ - a, b, c = parameters('a, b, c') - a.min = 0 - a.max = 25 - b.min = 0 - b.max = 500 - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = {a_i: a, b_i: b, c_i: c} - - xdata = np.array([ - [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], - [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], - [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], - ]) - - fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - ) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(a), np.mean(xdata[0]), 4) - self.assertAlmostEqual(fit_result.value(b), np.mean(xdata[1]), 4) - self.assertAlmostEqual(fit_result.value(c), np.mean(xdata[2]), 4) - - def test_vector_fitting_guess(self): - """ - Tests fitting to a 3 component vector valued function, with guesses. - """ - a, b, c = parameters('a, b, c') - a.value = 10 - b.value = 100 - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = {a_i: a, b_i: b, c_i: c} - - xdata = np.array([ - [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], - [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], - [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], - ]) - - fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - ) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(a), np.mean(xdata[0]), 4) - self.assertAlmostEqual(fit_result.value(b), np.mean(xdata[1]), 4) - self.assertAlmostEqual(fit_result.value(c), np.mean(xdata[2]), 4) - - def test_global_fitting(self): - """ - In case of shared parameters between the components of the model, `Fit` - should automatically use `ConstrainedLeastSquares`. - :return: - """ - x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') - y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') - - # The following vector valued function links all the equations together - # as stated in the intro. - model = Model({ - y_1: a_1 * x_1**2 + b_1 * x_1 + y0, - y_2: a_2 * x_2**2 + b_2 * x_2 + y0, - }) - self.assertTrue(model.shared_parameters) - - # Generate data from this model - xdata1 = np.linspace(0, 10) - xdata2 = xdata1[::2] # Only every other point. - - ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) - # Add some noise to make it appear like real data - np.random.seed(1) - ydata1 += np.random.normal(0, 2, size=ydata1.shape) - ydata2 += np.random.normal(0, 2, size=ydata2.shape) - - xdata = [xdata1, xdata2] - ydata = [ydata1, ydata2] - - # Guesses - a_1.value = 100 - a_2.value = 50 - b_1.value = 1 - b_2.value = 1 - y0.value = 10 - - fit = Fit( - model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1] - ) - self.assertIsInstance(fit.minimizer, BFGS) - - # The next model does not share parameters, but is still a vector - model = Model({ - y_1: a_1 * x_1**2 + b_1 * x_1, - y_2: a_2 * x_2**2 + b_2 * x_2, - }) - fit = Fit( - model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1] - ) - self.assertFalse(model.shared_parameters) - self.assertIsInstance(fit.minimizer, BFGS) - - # Scalar model, still use bfgs. - model = Model({ - y_1: a_1 * x_1**2 + b_1 * x_1, - }) - fit = Fit(model, x_1=xdata[0], y_1=ydata[0]) - self.assertFalse(model.shared_parameters) - self.assertIsInstance(fit.minimizer, BFGS) - - def test_gaussian_2d_fitting(self): - """ - Tests fitting to a scalar gaussian function with 2 independent - variables. - """ - mean = (0.6, 0.4) # x, y mean 0.6, 0.4 - cov = [[0.2**2, 0], [0, 0.1**2]] - - data = np.random.multivariate_normal(mean, cov, 1000000) - - # Insert them as y,x here as np fucks up cartesian conventions. - ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, - range=[[0.0, 1.0], [0.0, 1.0]]) - xcentres = (xedges[:-1] + xedges[1:]) / 2 - ycentres = (yedges[:-1] + yedges[1:]) / 2 - - # Make a valid grid to match ydata - xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') - - x0 = Parameter(value=mean[0], min=0.0, max=1.0) - sig_x = Parameter(value=0.2, min=0.0, max=0.3) - y0 = Parameter(value=mean[1], min=0.0, max=1.0) - sig_y = Parameter(value=0.1, min=0.0, max=0.3) - A = Parameter(value=np.mean(ydata), min=0.0) - x = Variable('x') - y = Variable('y') - g = Variable('g') - - model = GradientModel({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) - fit = Fit(model, x=xx, y=yy, g=ydata) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(x0), np.mean(data[:, 0]), 3) - self.assertAlmostEqual(fit_result.value(y0), np.mean(data[:, 1]), 3) - self.assertAlmostEqual(np.abs(fit_result.value(sig_x)), np.std(data[:, 0]), 2) - self.assertAlmostEqual(np.abs(fit_result.value(sig_y)), np.std(data[:, 1]), 2) - self.assertGreaterEqual(fit_result.r_squared, 0.96) - - def test_gaussian_2d_fitting_background(self): - """ - Tests fitting to a scalar gaussian function with 2 independent - variables to data with a background. Added after #149. - """ - mean = (0.6, 0.4) # x, y mean 0.6, 0.4 - cov = [[0.2**2, 0], [0, 0.1**2]] - background = 3.0 - - data = np.random.multivariate_normal(mean, cov, 500000) - # print(data.shape) - # Insert them as y,x here as np fucks up cartesian conventions. - ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, - range=[[0.0, 1.0], [0.0, 1.0]]) - xcentres = (xedges[:-1] + xedges[1:]) / 2 - ycentres = (yedges[:-1] + yedges[1:]) / 2 - ydata += background # Background - - # Make a valid grid to match ydata - xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') - - x0 = Parameter(value=1.1 * mean[0], min=0.0, max=1.0) - sig_x = Parameter(value=1.1 * 0.2, min=0.0, max=0.3) - y0 = Parameter(value=1.1 * mean[1], min=0.0, max=1.0) - sig_y = Parameter(value=1.1 * 0.1, min=0.0, max=0.3) - A = Parameter(value=1.1 * np.mean(ydata), min=0.0) - b = Parameter(value=1.2 * background, min=0.0) - x = Variable('x') - y = Variable('y') - g = Variable('g') - - model = GradientModel({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y) + b}) - - # ydata, = model(x=xx, y=yy, x0=mean[0], y0=mean[1], sig_x=np.sqrt(cov[0][0]), sig_y=np.sqrt(cov[1][1]), A=1, b=3.0) - fit = Fit(model, x=xx, y=yy, g=ydata) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(x0) / np.mean(data[:, 0]), 1.0, 2) - self.assertAlmostEqual(fit_result.value(y0) / np.mean(data[:, 1]), 1.0, 2) - self.assertAlmostEqual(np.abs(fit_result.value(sig_x)) / np.std(data[:, 0]), 1.0, 2) - self.assertAlmostEqual(np.abs(fit_result.value(sig_y)) / np.std(data[:, 1]), 1.0, 2) - self.assertAlmostEqual(background / fit_result.value(b), 1.0, 1) - self.assertGreaterEqual(fit_result.r_squared / 0.96, 1.0) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +def test_vector_fitting(): + """ + Test the behavior in the presence of bounds or constraints: `Fit` should + select `ConstrainedNumericalLeastSquares` when bounds or constraints are + provided, or for vector models in general. For scalar models, use + `NumericalLeastSquares`. + """ + a, b = parameters('a, b') + a_i, = variables('a_i') + + xdata = np.array([ + [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], + [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], + [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], + ]) + + # Make a new scalar model. + scalar_model = {a_i: a + b} + simple_fit = Fit( + model=scalar_model, + a_i=xdata[0], + minimizer=MINPACK + ) + assert isinstance(simple_fit.minimizer, MINPACK) + + constrained_fit = Fit( + model=scalar_model, + a_i=xdata[0], + constraints=[Equality(a + b, 110)] + ) + assert isinstance(constrained_fit.minimizer, SLSQP) + + a.min = 0 + a.max = 25 + a.value = 10 + b.min = 80 + b.max = 120 + b.value = 100 + bound_fit = Fit( + model=scalar_model, + a_i=xdata[0], + ) + assert isinstance(bound_fit.minimizer, LBFGSB) + + # Repeat all of the above for the Vector model + a, b, c = parameters('a, b, c') + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = {a_i: a, b_i: b, c_i: c} + + simple_fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + ) + assert isinstance(simple_fit.minimizer, BFGS) + + constrained_fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + constraints=[Equality(a + b + c, 180)] + ) + assert isinstance(constrained_fit.minimizer, SLSQP) + + a.min = 0 + a.max = 25 + a.value = 10 + b.min = 80 + b.max = 120 + b.value = 100 + bound_fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + ) + assert isinstance(bound_fit.minimizer, LBFGSB) + + fit_result = bound_fit.execute() + assert fit_result.value(a) == pytest.approx(np.mean(xdata[0]), rel=1e-6) + assert fit_result.value(b) == pytest.approx(np.mean(xdata[1]), rel=1e-6) + assert fit_result.value(c) == pytest.approx(np.mean(xdata[2]), rel=1e-6) + + +def test_vector_fitting_bounds(): + """ + Tests fitting to a 3 component vector valued function, with bounds. + """ + a, b, c = parameters('a, b, c') + a.min = 0 + a.max = 25 + b.min = 0 + b.max = 500 + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = {a_i: a, b_i: b, c_i: c} + + xdata = np.array([ + [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], + [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], + [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], + ]) + + fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + ) + fit_result = fit.execute() + + assert fit_result.value(a) == pytest.approx(np.mean(xdata[0]), rel=1e-4) + assert fit_result.value(b) == pytest.approx(np.mean(xdata[1]), rel=1e-4) + assert fit_result.value(c) == pytest.approx(np.mean(xdata[2]), rel=1e-4) + + +def test_vector_fitting_guess(): + """ + Tests fitting to a 3 component vector valued function, with guesses. + """ + a, b, c = parameters('a, b, c') + a.value = 10 + b.value = 100 + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = {a_i: a, b_i: b, c_i: c} + + xdata = np.array([ + [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], + [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], + [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], + ]) + + fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + ) + fit_result = fit.execute() + + assert fit_result.value(a) == pytest.approx(np.mean(xdata[0]), rel=1e-4) + assert fit_result.value(b) == pytest.approx(np.mean(xdata[1]), rel=1e-4) + assert fit_result.value(c) == pytest.approx(np.mean(xdata[2]), rel=1e-4) + + +def test_global_fitting(): + """ + In case of shared parameters between the components of the model, `Fit` + should automatically use `ConstrainedLeastSquares`. + :return: + """ + x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') + y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') + + # The following vector valued function links all the equations together + # as stated in the intro. + model = Model({ + y_1: a_1 * x_1**2 + b_1 * x_1 + y0, + y_2: a_2 * x_2**2 + b_2 * x_2 + y0, + }) + assert model.shared_parameters + + # Generate data from this model + xdata1 = np.linspace(0, 10) + xdata2 = xdata1[::2] # Only every other point. + + ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, + b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) + # Add some noise to make it appear like real data + np.random.seed(1) + ydata1 += np.random.normal(0, 2, size=ydata1.shape) + ydata2 += np.random.normal(0, 2, size=ydata2.shape) + + xdata = [xdata1, xdata2] + ydata = [ydata1, ydata2] + + # Guesses + a_1.value = 100 + a_2.value = 50 + b_1.value = 1 + b_2.value = 1 + y0.value = 10 + + fit = Fit( + model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1] + ) + assert isinstance(fit.minimizer, BFGS) + + # The next model does not share parameters, but is still a vector + model = Model({ + y_1: a_1 * x_1**2 + b_1 * x_1, + y_2: a_2 * x_2**2 + b_2 * x_2, + }) + fit = Fit( + model, x_1=xdata[0], x_2=xdata[1], y_1=ydata[0], y_2=ydata[1] + ) + assert not model.shared_parameters + assert isinstance(fit.minimizer, BFGS) + + # Scalar model, still use bfgs. + model = Model({ + y_1: a_1 * x_1**2 + b_1 * x_1, + }) + fit = Fit(model, x_1=xdata[0], y_1=ydata[0]) + assert model.shared_parameters is False + assert isinstance(fit.minimizer, BFGS) + + +def test_gaussian_2d_fitting(): + """ + Tests fitting to a scalar gaussian function with 2 independent + variables. + """ + mean = (0.6, 0.4) # x, y mean 0.6, 0.4 + cov = [[0.2**2, 0], [0, 0.1**2]] + + # TODO: Since we bin this data later on in a 100 bins, just evaluate 100 + # points on a Gaussian, and add an appropriate amount of noise. This + # burns CPU cycles without good reason. + data = np.random.multivariate_normal(mean, cov, 1000000) + + # Insert them as y,x here as np fucks up cartesian conventions. + ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, + range=[[0.0, 1.0], [0.0, 1.0]]) + xcentres = (xedges[:-1] + xedges[1:]) / 2 + ycentres = (yedges[:-1] + yedges[1:]) / 2 + + # Make a valid grid to match ydata + xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') + + x0 = Parameter(value=mean[0], min=0.0, max=1.0) + sig_x = Parameter(value=0.2, min=0.0, max=0.3) + y0 = Parameter(value=mean[1], min=0.0, max=1.0) + sig_y = Parameter(value=0.1, min=0.0, max=0.3) + A = Parameter(value=np.mean(ydata), min=0.0) + x = Variable('x') + y = Variable('y') + g = Variable('g') + + model = GradientModel( + {g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) + fit = Fit(model, x=xx, y=yy, g=ydata) + fit_result = fit.execute() + + assert fit_result.value(x0) == pytest.approx(np.mean(data[:, 0]), 1e-3) + assert fit_result.value(y0) == pytest.approx(np.mean(data[:, 1]), 1e-3) + assert np.abs(fit_result.value(sig_x)) == pytest.approx(np.std(data[:, 0]), 1e-2) + assert np.abs(fit_result.value(sig_y)) == pytest.approx(np.std(data[:, 1]), 1e-2) + assert fit_result.r_squared >= 0.96 + + +def test_gaussian_2d_fitting_background(): + """ + Tests fitting to a scalar gaussian function with 2 independent + variables to data with a background. Added after #149. + """ + mean = (0.6, 0.4) # x, y mean 0.6, 0.4 + cov = [[0.2**2, 0], [0, 0.1**2]] + background = 3.0 + + # TODO: Since we bin this data later on in a 100 bins, just evaluate 100 + # points on a Gaussian, and add an appropriate amount of noise. This + # burns CPU cycles without good reason. + data = np.random.multivariate_normal(mean, cov, 500000) + # print(data.shape) + # Insert them as y,x here as np fucks up cartesian conventions. + ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, + range=[[0.0, 1.0], [0.0, 1.0]]) + xcentres = (xedges[:-1] + xedges[1:]) / 2 + ycentres = (yedges[:-1] + yedges[1:]) / 2 + ydata += background # Background + + # Make a valid grid to match ydata + xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') + + x0 = Parameter(value=1.1 * mean[0], min=0.0, max=1.0) + sig_x = Parameter(value=1.1 * 0.2, min=0.0, max=0.3) + y0 = Parameter(value=1.1 * mean[1], min=0.0, max=1.0) + sig_y = Parameter(value=1.1 * 0.1, min=0.0, max=0.3) + A = Parameter(value=1.1 * np.mean(ydata), min=0.0) + b = Parameter(value=1.2 * background, min=0.0) + x = Variable('x') + y = Variable('y') + g = Variable('g') + + model = GradientModel({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y) + b}) + + # ydata, = model(x=xx, y=yy, x0=mean[0], y0=mean[1], sig_x=np.sqrt(cov[0][0]), sig_y=np.sqrt(cov[1][1]), A=1, b=3.0) + fit = Fit(model, x=xx, y=yy, g=ydata) + fit_result = fit.execute() + + assert fit_result.value(x0) / np.mean(data[:, 0]) == pytest.approx(1.0, 1e-2) + assert fit_result.value(y0) / np.mean(data[:, 1]) == pytest.approx(1.0, 1e-2) + assert np.abs(fit_result.value(sig_x)) / np.std(data[:, 0]) == pytest.approx(1.0, 1e-2) + assert np.abs(fit_result.value(sig_y)) / np.std(data[:, 1]) == pytest.approx(1.0, 1e-2) + assert background / fit_result.value(b) == pytest.approx(1.0, 1e-1) + assert fit_result.r_squared >= 0.96 diff --git a/tests/test_constrained.py b/tests/test_constrained.py index a3b7aa74..8150804d 100644 --- a/tests/test_constrained.py +++ b/tests/test_constrained.py @@ -1,6 +1,5 @@ from __future__ import division, print_function -import unittest -import sys +import pytest import numpy as np import sympy @@ -25,1081 +24,1056 @@ from tests.test_minimizers import subclasses -class TestConstrained(unittest.TestCase): +""" +Tests for the `Fit` object. This object does +everything the normal `NumericalLeastSquares` does and more. Tests should +therefore cover the full range of scenarios `symfit` currently handles. +""" + + +def test_simple_kinetics(): """ - Tests for the `Fit` object. This object does - everything the normal `NumericalLeastSquares` does and more. Tests should - therefore cover the full range of scenarios `symfit` currently handles. + Simple kinetics data to test fitting """ - def test_simple_kinetics(self): - """ - Simple kinetics data to test fitting - """ - tdata = np.array([10, 26, 44, 70, 120]) - adata = 10e-4 * np.array([44, 34, 27, 20, 14]) - a, b, t = variables('a, b, t') - k, a0 = parameters('k, a0') - k.value = 0.01 - # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4 - a0 = 54 * 10e-4 - - model_dict = { - D(a, t): - k * a**2, - D(b, t): k * a**2, - } - - ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0}) - - fit = Fit(ode_model, t=tdata, a=adata, b=None) - fit_result = fit.execute(tol=1e-9) - - self.assertAlmostEqual(fit_result.value(k) / 4.302875e-01, 1.0, 5) - self.assertAlmostEqual(fit_result.stdev(k) / 6.447068e-03, 1.0, 5) - - def test_global_fitting(self): - """ - Test a global fitting scenario with datasets of unequal length. In this - scenario, a quartic equation is fitted where the constant term is shared - between the datasets. (e.g. identical background noise) - """ - x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') - y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') - - # The following vector valued function links all the equations together - # as stated in the intro. - model = Model({ - y_1: a_1 * x_1**2 + b_1 * x_1 + y0, - y_2: a_2 * x_2**2 + b_2 * x_2 + y0, - }) - - # Generate data from this model - # xdata = np.linspace(0, 10) - xdata1 = np.linspace(0, 10) - xdata2 = xdata1[::2] # Make the sets of unequal size - - ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) - # Add some noise to make it appear like real data - np.random.seed(1) - ydata1 += np.random.normal(0, 2, size=ydata1.shape) - ydata2 += np.random.normal(0, 2, size=ydata2.shape) - - xdata = [xdata1, xdata2] - ydata = [ydata1, ydata2] - - # Guesses - a_1.value = 100 - a_2.value = 50 - b_1.value = 1 - b_2.value = 1 - y0.value = 10 - - eval_jac = model.eval_jacobian(x_1=xdata1, x_2=xdata2, a_1=101.3, - b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) - self.assertEqual(len(eval_jac), 2) - for comp in eval_jac: - self.assertEqual(len(comp), len(model.params)) - - sigma_y = np.concatenate((np.ones(20), [2., 4., 5, 7, 3])) - - fit = Fit(model, x_1=xdata[0], x_2=xdata[1], - y_1=ydata[0], y_2=ydata[1], sigma_y_2=sigma_y) - fit_result = fit.execute() - - # fit_curves = model(x_1=xdata[0], x_2=xdata[1], **fit_result.params) - self.assertAlmostEqual(fit_result.value(y0), 1.061892e+01, 3) - self.assertAlmostEqual(fit_result.value(a_1), 1.013269e+02, 3) - self.assertAlmostEqual(fit_result.value(a_2), 5.625694e+01, 3) - self.assertAlmostEqual(fit_result.value(b_1), 3.362240e-01, 3) - self.assertAlmostEqual(fit_result.value(b_2), 1.565253e+00, 3) - - def test_named_fitting(self): - xdata = np.linspace(1, 10, 10) - ydata = 3*xdata**2 - - a = Parameter('a', 1.0) - b = Parameter('b', 2.5) - x, y = variables('x, y') - model = {y: a*x**b} - - fit = Fit(model, x=xdata, y=ydata) - fit_result = fit.execute() - self.assertIsInstance(fit_result, FitResults) - self.assertAlmostEqual(fit_result.value(a), 3.0, 3) - self.assertAlmostEqual(fit_result.value(b), 2.0, 4) - - def test_param_error_analytical(self): - """ - Take an example in which the parameter errors are known and see if - `Fit` reproduces them. - - It also needs to support the absolute_sigma argument. - """ - N = 10000 - sigma = 25.0 - xn = np.arange(N, dtype=np.float) - np.random.seed(110) - yn = np.random.normal(size=xn.shape, scale=sigma) - - a = Parameter() - y = Variable('y') - model = {y: a} - - constr_fit = Fit(model, y=yn, sigma_y=sigma) - constr_result = constr_fit.execute() - - fit = Fit(model, y=yn, sigma_y=sigma, minimizer=MINPACK) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(a), constr_result.value(a), 5) - self.assertAlmostEqual(fit_result.stdev(a), constr_result.stdev(a), 5) - - # Analytical answer for mean of N(0,sigma): - sigma_mu = sigma/N**0.5 - - self.assertAlmostEqual(fit_result.value(a), np.mean(yn), 5) - self.assertAlmostEqual(fit_result.stdev(a), sigma_mu, 5) - - # Compare for absolute_sigma = False. - constr_fit = Fit(model, y=yn, sigma_y=sigma, absolute_sigma=False) - constr_result = constr_fit.execute() - - fit = Fit(model, y=yn, sigma_y=sigma, minimizer=MINPACK, absolute_sigma=False) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(a), constr_result.value(a), 5) - self.assertAlmostEqual(fit_result.stdev(a), constr_result.stdev(a), 5) - - def test_grid_fitting(self): - xdata = np.arange(-5, 5, 1) - ydata = np.arange(5, 15, 1) - xx, yy = np.meshgrid(xdata, ydata, sparse=False) - - zdata = (2.5*xx**2 + 3.0*yy**2) - - a = Parameter(value=2.4, max=2.75) - b = Parameter(value=3.1, min=2.75) - x = Variable('x') - y = Variable('y') - z = Variable('z') - new = {z: a*x**2 + b*y**2} - - fit = Fit(new, x=xx, y=yy, z=zdata) - # results = fit.execute(options={'maxiter': 10}) - results = fit.execute() - - self.assertAlmostEqual(results.value(a), 2.5, 4) - self.assertAlmostEqual(results.value(b), 3.0, 4) - - @unittest.skip('Fit fails to compute the ' - 'covariance matrix for a sparse grid.') - def test_grid_fitting_sparse(self): - xdata = np.arange(-5, 5, 1) - ydata = np.arange(5, 15, 1) - xx, yy = np.meshgrid(xdata, ydata, sparse=True) - - zdata = (2.5*xx**2 + 3.0*yy**2) - - a = Parameter(value=2.4, max=2.75) - b = Parameter(value=3.1, min=2.75) - x = Variable('x') - y = Variable('y') - z = Variable('z') - new = {z: a*x**2 + b*y**2} - - fit = Fit(new, x=xx, y=yy, z=zdata) - results = fit.execute() - - self.assertAlmostEqual(results.value(a), 2.5, 4) - self.assertAlmostEqual(results.value(b), 3.0, 4) - - def test_vector_constrained_fitting(self): - """ - Tests `Fit` with vector models. The - classical example of fitting measurements of the angles of a triangle is - taken. In this case we know they should add up to 180 degrees, so this - can be added as a constraint. Additionally, not even all three angles - have to be provided with measurement data since the constrained means - the angles are not independent. - """ - a, b, c = parameters('a, b, c') - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = {a_i: a, b_i: b, c_i: c} - - xdata = np.array([ - [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], - [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], - [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], - ]) - - fit_none = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=None, - ) - fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - ) - fit_std = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - minimizer = MINPACK - ) - fit_constrained = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - constraints=[Equality(a + b + c, 180)] - ) - fit_none_result = fit_none.execute() - fit_new_result = fit.execute() - std_result = fit_std.execute() - constr_result = fit_constrained.execute() - - # The total of averages should equal the total of the params by definition - mean_total = np.mean(np.sum(xdata, axis=0)) - params_tot = std_result.value(a) + std_result.value(b) + std_result.value(c) - self.assertAlmostEqual(mean_total / params_tot, 1.0, 4) - - # The total after constraining to 180 should be exactly 180. - params_tot = constr_result.value(a) + constr_result.value(b) + constr_result.value(c) - self.assertIsInstance(fit_constrained.minimizer, SLSQP) - self.assertAlmostEqual(180.0, params_tot, 4) - - # The standard method and the Constrained object called without constraints - # should behave roughly the same. - self.assertAlmostEqual(fit_new_result.value(a), std_result.value(a), 4) - self.assertAlmostEqual(fit_new_result.value(b), std_result.value(b), 4) - self.assertAlmostEqual(fit_new_result.value(c), std_result.value(c), 4) - - # When fitting with a dataset set to None, for this example the value of c - # should be unaffected. - self.assertAlmostEqual(fit_none_result.value(a), std_result.value(a), 4) - self.assertAlmostEqual(fit_none_result.value(b), std_result.value(b), 4) - self.assertAlmostEqual(fit_none_result.value(c), c.value) - - fit_none_constr = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=None, - constraints=[Equality(a + b + c, 180)] - ) - none_constr_result = fit_none_constr.execute() - params_tot = none_constr_result.value(a) + none_constr_result.value(b) + none_constr_result.value(c) - self.assertAlmostEqual(180.0, params_tot, 4) - - def test_vector_parameter_error(self): - """ - Tests `Fit` parameter error estimation with - vector models. This is done by using the typical angles of a triangle - example. For completeness, we throw in covariance between the angles. - - As per 0.5.0 this test has been updated in an important way. Previously - the covariance matrix was estimated on a per component basis for global - fitting problems. This was incorrect, but no solution was possible at - the time. Now, we calculate the covariance matrix from the Hessian of - the function being optimized, and so now the covariance is calculated - correctly in those scenarios. - - As a result for this particular test however, it means we lose - sensitivity to the error of each parameter separately. This makes sense, - since the uncertainty is now being spread out over the components. To - regain this, the user should just fit the components separately. - """ - N = 10000 - a, b, c = parameters('a, b, c') - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = {a_i: a, b_i: b, c_i: c} - - np.random.seed(1) - # Sample from a multivariate normal with correlation. - pcov = np.array([[0.4, 0.3, 0.5], [0.3, 0.8, 0.4], [0.5, 0.4, 1.2]]) - xdata = np.random.multivariate_normal([10, 100, 70], pcov, N).T - - fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - ) - fit_std = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - minimizer = MINPACK - ) - fit_new_result = fit.execute() - std_result = fit_std.execute() - - # When no errors are given, we default to `absolute_sigma=False`, since - # that is the best we can do. - self.assertFalse(fit.absolute_sigma) - self.assertFalse(fit_std.absolute_sigma) - - # The standard method and the Constrained object called without constraints - # should give roughly the same parameter values. - self.assertAlmostEqual(fit_new_result.value(a), std_result.value(a), 3) - self.assertAlmostEqual(fit_new_result.value(b), std_result.value(b), 3) - self.assertAlmostEqual(fit_new_result.value(c), std_result.value(c), 3) - - # in this toy model, fitting is identical to simply taking the average - self.assertAlmostEqual(fit_new_result.value(a), np.mean(xdata[0]), 4) - self.assertAlmostEqual(fit_new_result.value(b), np.mean(xdata[1]), 4) - self.assertAlmostEqual(fit_new_result.value(c), np.mean(xdata[2]), 4) - - # All stdev's must be equal - self.assertAlmostEqual(fit_new_result.stdev(a)/fit_new_result.stdev(b), 1.0, 3) - self.assertAlmostEqual(fit_new_result.stdev(a)/fit_new_result.stdev(c), 1.0, 3) - # Test for a miss on the exact value - self.assertNotAlmostEqual(fit_new_result.stdev(a)/np.sqrt(pcov[0, 0]/N), 1.0, 3) - self.assertNotAlmostEqual(fit_new_result.stdev(b)/np.sqrt(pcov[1, 1]/N), 1.0, 3) - self.assertNotAlmostEqual(fit_new_result.stdev(c)/np.sqrt(pcov[2, 2]/N), 1.0, 3) - - # The standard object actually does not predict the right values for - # stdev, because its method for computing them apparently does not allow - # for vector valued functions. - # So actually, for vector valued functions its better to use - # Fit, though this does not give covariances. - - # With the correct values of sigma, absolute_sigma=True should be in - # agreement with analytical. - sigmadata = np.array([ - np.sqrt(pcov[0, 0]), - np.sqrt(pcov[1, 1]), - np.sqrt(pcov[2, 2]) - ]) - fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - sigma_a_i=sigmadata[0], - sigma_b_i=sigmadata[1], - sigma_c_i=sigmadata[2], - ) - self.assertTrue(fit.absolute_sigma) - fit_result = fit.execute() - # The standard deviation in the mean is stdev/sqrt(N), - # see test_param_error_analytical - self.assertAlmostEqual(fit_result.stdev(a)/np.sqrt(pcov[0, 0]/N), 1.0, 4) - self.assertAlmostEqual(fit_result.stdev(b)/np.sqrt(pcov[1, 1]/N), 1.0, 4) - self.assertAlmostEqual(fit_result.stdev(c)/np.sqrt(pcov[2, 2]/N), 1.0, 4) - - - # Finally, we should confirm that with unrealistic sigma and - # absolute_sigma=True, we are no longer in agreement with the analytical result - # Let's take everything to be 1 to point out the dangers of doing so. - sigmadata = np.array([1, 1, 1]) - fit2 = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - sigma_a_i=sigmadata[0], - sigma_b_i=sigmadata[1], - sigma_c_i=sigmadata[2], - absolute_sigma=True - ) - fit_result = fit2.execute() - # Should be off bigly - self.assertNotAlmostEqual(fit_result.stdev(a)/np.sqrt(pcov[0, 0]/N), 1.0, 1) - self.assertNotAlmostEqual(fit_result.stdev(b)/np.sqrt(pcov[1, 1]/N), 1.0, 1) - self.assertNotAlmostEqual(fit_result.stdev(c)/np.sqrt(pcov[2, 2]/N), 1.0, 1) - - def test_error_advanced(self): - """ - Compare the error propagation of Fit against - NumericalLeastSquares. - Models an example from the mathematica docs and try's to replicate it: - http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html - """ - data = [ - [0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6], - [1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.], - [7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4], - [4.7, 8.4, 10.] - ] - xdata, ydata, zdata = [np.array(data) for data in zip(*data)] - # errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2]) - - a = Parameter('a', 3.0) - b = Parameter('b', 0.9) - c = Parameter('c', 5.0) - x = Variable('x') - y = Variable('y') - z = Variable('z') - model = {z: a * log(b * x + c * y)} - - const_fit = Fit(model, xdata, ydata, zdata, absolute_sigma=False) - self.assertEqual(len(const_fit.model(x=xdata, y=ydata, a=2, b=2, c=5)), 1) - self.assertEqual( - const_fit.model(x=xdata, y=ydata, a=2, b=2, c=5)[0].shape, - (10,) - ) - self.assertEqual(len(const_fit.model.eval_jacobian(x=xdata, y=ydata, a=2, b=2, c=5)), 1) - self.assertEqual( - const_fit.model.eval_jacobian(x=xdata, y=ydata, a=2, b=2, c=5)[0].shape, - (3, 10) - ) - self.assertEqual(len(const_fit.model.eval_hessian(x=xdata, y=ydata, a=2, b=2, c=5)), 1) - self.assertEqual( - const_fit.model.eval_hessian(x=xdata, y=ydata, a=2, b=2, c=5)[0].shape, - (3, 3, 10) - ) - - self.assertEqual(const_fit.objective(a=2, b=2, c=5).shape, - tuple()) - self.assertEqual( - const_fit.objective.eval_jacobian(a=2, b=2, c=5).shape, - (3,) - ) - self.assertEqual( - const_fit.objective.eval_hessian(a=2, b=2, c=5).shape, - (3, 3) - ) - self.assertNotEqual( - const_fit.objective.eval_hessian(a=2, b=2, c=5).dtype, - object - ) - - const_result = const_fit.execute() - fit = Fit(model, xdata, ydata, zdata, absolute_sigma=False, minimizer=MINPACK) - std_result = fit.execute() - - self.assertEqual(const_fit.absolute_sigma, fit.absolute_sigma) - - self.assertAlmostEqual(const_result.value(a), std_result.value(a), 4) - self.assertAlmostEqual(const_result.value(b), std_result.value(b), 4) - self.assertAlmostEqual(const_result.value(c), std_result.value(c), 4) - - # This used to be a tighter equality test, but since we now use the - # Hessian we actually get a more accurate value from the standard fit - # then for MINPACK. Hence we check if it is roughly equal, and if our - # stdev is greater than that of minpack. - self.assertAlmostEqual(const_result.stdev(a) / std_result.stdev(a), 1, 2) - self.assertAlmostEqual(const_result.stdev(b) / std_result.stdev(b), 1, 1) - self.assertAlmostEqual(const_result.stdev(c) / std_result.stdev(c), 1, 2) - - self.assertGreaterEqual(const_result.stdev(a), std_result.stdev(a)) - self.assertGreaterEqual(const_result.stdev(b), std_result.stdev(b)) - self.assertGreaterEqual(const_result.stdev(c), std_result.stdev(c)) - - def test_gaussian_2d_fitting(self): - """ - Tests fitting to a scalar gaussian function with 2 independent - variables. Very sensitive to initial guesses, and if they are chosen too - restrictive Fit actually throws a tantrum. - It therefore appears to be more sensitive than NumericalLeastSquares. - """ - mean = (0.6, 0.4) # x, y mean 0.6, 0.4 - cov = [[0.2**2, 0], [0, 0.1**2]] - - np.random.seed(0) - data = np.random.multivariate_normal(mean, cov, 100000) - - # Insert them as y,x here as np fucks up cartesian conventions. - ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, - range=[[0.0, 1.0], [0.0, 1.0]]) - xcentres = (xedges[:-1] + xedges[1:]) / 2 - ycentres = (yedges[:-1] + yedges[1:]) / 2 - - # Make a valid grid to match ydata - xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') - - x0 = Parameter(value=mean[0], min=0.0, max=1.0) - sig_x = Parameter(value=0.2, min=0.0, max=0.3) - y0 = Parameter(value=mean[1], min=0.0, max=1.0) - sig_y = Parameter(value=0.1, min=0.0, max=0.3) - A = Parameter(value=np.mean(ydata), min=0.0) - x = Variable('x') - y = Variable('y') - g = Variable('g') - model = GradientModel({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) - fit = Fit(model, x=xx, y=yy, g=ydata) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(x0), np.mean(data[:, 0]), 3) - self.assertAlmostEqual(fit_result.value(y0), np.mean(data[:, 1]), 3) - self.assertAlmostEqual(np.abs(fit_result.value(sig_x)), np.std(data[:, 0]), 2) - self.assertAlmostEqual(np.abs(fit_result.value(sig_y)), np.std(data[:, 1]), 2) - self.assertGreaterEqual(fit_result.r_squared, 0.96) - - # Compare with industry standard MINPACK - fit_std = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK) - fit_std_result = fit_std.execute() - - self.assertAlmostEqual(fit_std_result.value(x0), fit_result.value(x0), 4) - self.assertAlmostEqual(fit_std_result.value(y0), fit_result.value(y0), 4) - self.assertAlmostEqual(fit_std_result.value(sig_x), fit_result.value(sig_x), 4) - self.assertAlmostEqual(fit_std_result.value(sig_y), fit_result.value(sig_y), 4) - self.assertAlmostEqual(fit_std_result.r_squared, fit_result.r_squared, 4) - - def test_fixed_and_constrained(self): - """ - Taken from #165. Fixing parameters and constraining others caused a - TypeError: missing a required argument: 'theta1', which was caused by a - mismatch in the shape of the initial guesses given and the number of - parameters constraints expected. The initial_guesses no longer contained - those corresponding to fixed parameters. - """ - phi1, phi2, theta1, theta2 = parameters('phi1, phi2, theta1, theta2') - x, y = variables('x, y') - - model_dict = {y: (1 + x * theta1 + theta2 * x ** 2) / ( - 1 + phi1 * x * theta1 + phi2 * theta2 * x ** 2)} - constraints = [GreaterThan(theta1, theta2)] - - xdata = np.array( - [0., 0.000376, 0.000752, 0.0015, 0.00301, 0.00601, 0.00902]) - ydata = np.array( - [1., 1.07968041, 1.08990638, 1.12151629, 1.13068452, 1.15484109, - 1.19883952]) - - phi1.value = 0.845251484373516 - phi1.fixed = True - - phi2.value = 0.7105427053026403 - phi2.fixed = True - - fit = Fit(model_dict, x=xdata, y=ydata, - constraints=constraints, minimizer=SLSQP) - fit_result_slsqp = fit.execute() - # The data and fixed parameters should be partialed away. - objective_kwargs = { - phi2.name: phi2.value, - phi1.name: phi1.value, - x.name: xdata, - } - constraint_kwargs = { - phi2.name: phi2.value, - phi1.name: phi1.value, - } - for index, constraint in enumerate(fit.minimizer.constraints): - self.assertIsInstance(constraint, MinimizeModel) - self.assertEqual(constraint.model, fit.constraints[index]) - self.assertEqual(constraint.data, fit.data) - self.assertEqual(constraint.data, fit.objective.data) - - # Data should be the same memory location so they can share state. - self.assertEqual(id(fit.objective.data), - id(constraint.data)) - - # Test if the fixed params have been partialed away - self.assertEqual(key2str(constraint._invariant_kwargs).keys(), - constraint_kwargs.keys()) - self.assertEqual(key2str(fit.objective._invariant_kwargs).keys(), - objective_kwargs.keys()) - - # Compare the shapes. The constraint shape should now be the same as - # that of the objective - obj_val = fit.minimizer.objective(fit.minimizer.initial_guesses) - obj_jac = fit.minimizer.wrapped_jacobian(fit.minimizer.initial_guesses) - with self.assertRaises(TypeError): - len(obj_val) # scalars don't have lengths - self.assertEqual(len(obj_jac), 2) - - for index, constraint in enumerate(fit.minimizer.wrapped_constraints): - self.assertEqual(constraint['type'], 'ineq') - self.assertTrue('args' not in constraint) - self.assertTrue(callable(constraint['fun'])) - self.assertTrue(callable(constraint['jac'])) - - # The argument should be the partialed Constraint object - self.assertEqual(constraint['fun'], fit.minimizer.constraints[index]) - self.assertIsInstance(constraint['fun'], MinimizeModel) - self.assertTrue('jac' in constraint) - - # Test the shapes - cons_val = constraint['fun'](fit.minimizer.initial_guesses) - cons_jac = constraint['jac'](fit.minimizer.initial_guesses) - self.assertEqual(cons_val.shape, (1,)) - self.assertIsInstance(cons_val[0], float) - self.assertEqual(obj_jac.shape, cons_jac.shape) - self.assertEqual(obj_jac.shape, (2,)) - - def test_interdependency_constrained(self): - """ - Test a model with interdependent components, and with constraints which - depend on the Model's output. - This is done in the MatrixSymbol formalism, using a Tikhonov - regularization as an example. In this, a matrix inverse has to be - calculated and is used multiple times. Therefore we split that term of - into a seperate component, so the inverse only has to be computed once - per model call. - - See https://arxiv.org/abs/1901.05348 for a more detailed background. - """ - N = Symbol('N', integer=True) - M = MatrixSymbol('M', N, N) - W = MatrixSymbol('W', N, N) - I = MatrixSymbol('I', N, N) - y = MatrixSymbol('y', N, 1) - c = MatrixSymbol('c', N, 1) - a, = parameters('a') - z, = variables('z') - i = Idx('i') - - model_dict = { - W: Inverse(I + M / a ** 2), - c: - W * y, - z: sqrt(c.T * c) - } - # Sympy currently does not support derivatives of matrix expressions, - # so we use CallableModel instead of Model. - model = CallableModel(model_dict) - - # Generate data - iden = np.eye(2) - M_mat = np.array([[2, 1], [3, 4]]) - y_vec = np.array([[3], [5]]) - eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) - # Calculate the answers 'manually' so I know it was done properly - W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) - c_manual = - np.atleast_2d(W_manual.dot(y_vec)) - z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) - - self.assertEqual(y_vec.shape, (2, 1)) - self.assertEqual(M_mat.shape, (2, 2)) - self.assertEqual(iden.shape, (2, 2)) - self.assertEqual(W_manual.shape, (2, 2)) - self.assertEqual(c_manual.shape, (2, 1)) - self.assertEqual(z_manual.shape, (1, 1)) - np.testing.assert_almost_equal(W_manual, eval_model.W) - np.testing.assert_almost_equal(c_manual, eval_model.c) - np.testing.assert_almost_equal(z_manual, eval_model.z) - fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) - fit_result = fit.execute() - - # See if a == 0.1 was reconstructed properly. Since only a**2 features - # in the equations, we check for the absolute value. Setting a.min = 0.0 - # is not appreciated by the Minimizer, it seems. - self.assertAlmostEqual(np.abs(fit_result.value(a)), 0.1) - - def test_data_for_constraint(self): - """ - Test the signature handling when constraints are at play. Constraints - should take seperate data, but still kwargs that are not found in either - the model nor the constraints should raise an error. - """ - A, mu, sig = parameters('A, mu, sig') - x, y, Y = variables('x, y, Y') - - model = Model({y: A * Gaussian(x, mu=mu, sig=sig)}) - constraint = Model.as_constraint(Y, model, constraint_type=Eq) - - np.random.seed(2) - xdata = np.random.normal(1.2, 2, 10) - ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), - density=True) - - # Allowed - fit = Fit(model, x=xdata, y=ydata, Y=2, constraints=[constraint]) - self.assertIsInstance(fit.objective, LeastSquares) - self.assertIsInstance(fit.minimizer.constraints[0], MinimizeModel) - fit = Fit(model, x=xdata, y=ydata) - self.assertIsInstance(fit.objective, LeastSquares) - fit = Fit(model, x=xdata, objective=LogLikelihood) - self.assertIsInstance(fit.objective, LogLikelihood) - - # Not allowed - with self.assertRaises(TypeError): - fit = Fit(model, x=xdata, y=ydata, Y=2) - with self.assertRaises(TypeError): - fit = Fit(model, x=xdata, y=ydata, Y=2, Z=3, constraints=[constraint]) - # Since #214 has been fixed, these properly raise an error. - with self.assertRaises(TypeError): - fit = Fit(model, x=xdata) - with self.assertRaises(TypeError): - fit = Fit(model, x=xdata, y=ydata, objective=LogLikelihood) - - - def test_constrained_dependent_on_model(self): - """ - For a simple Gaussian distribution, we test if Models of various types - can be used as constraints. Of particular interest are NumericalModels, - which can be used to fix the integral of the model during the fit to 1, - as it should be for a probability distribution. - :return: - """ - A, mu, sig = parameters('A, mu, sig') - x, y, Y = variables('x, y, Y') - i = Idx('i', (0, 1000)) - sig.min = 0.0 - - model = GradientModel({y: A * Gaussian(x, mu=mu, sig=sig)}) - - # Generate data, 100 samples from a N(1.2, 2) distribution - np.random.seed(2) - xdata = np.random.normal(1.2, 2, 1000) - ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), density=True) - xcentres = (xedges[1:] + xedges[:-1]) / 2 + tdata = np.array([10, 26, 44, 70, 120]) + adata = 10e-4 * np.array([44, 34, 27, 20, 14]) + a, b, t = variables('a, b, t') + k, a0 = parameters('k, a0') + k.value = 0.01 + # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4 + a0 = 54 * 10e-4 + + model_dict = { + D(a, t): - k * a**2, + D(b, t): k * a**2, + } + + ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0}) + + fit = Fit(ode_model, t=tdata, a=adata, b=None) + fit_result = fit.execute(tol=1e-9) + + assert fit_result.value(k) == pytest.approx(4.302875e-01, 1e-5) + assert fit_result.stdev(k) == pytest.approx(6.447068e-03, 1e-5) + + +def test_global_fitting(): + """ + Test a global fitting scenario with datasets of unequal length. In this + scenario, a quartic equation is fitted where the constant term is shared + between the datasets. (e.g. identical background noise) + """ + x_1, x_2, y_1, y_2 = variables('x_1, x_2, y_1, y_2') + y0, a_1, a_2, b_1, b_2 = parameters('y0, a_1, a_2, b_1, b_2') + + # The following vector valued function links all the equations together + # as stated in the intro. + model = Model({ + y_1: a_1 * x_1**2 + b_1 * x_1 + y0, + y_2: a_2 * x_2**2 + b_2 * x_2 + y0, + }) + + # Generate data from this model + # xdata = np.linspace(0, 10) + xdata1 = np.linspace(0, 10) + xdata2 = xdata1[::2] # Make the sets of unequal size + + ydata1, ydata2 = model(x_1=xdata1, x_2=xdata2, a_1=101.3, + b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) + # Add some noise to make it appear like real data + np.random.seed(1) + ydata1 += np.random.normal(0, 2, size=ydata1.shape) + ydata2 += np.random.normal(0, 2, size=ydata2.shape) + + xdata = [xdata1, xdata2] + ydata = [ydata1, ydata2] + + # Guesses + a_1.value = 100 + a_2.value = 50 + b_1.value = 1 + b_2.value = 1 + y0.value = 10 + + eval_jac = model.eval_jacobian(x_1=xdata1, x_2=xdata2, a_1=101.3, + b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) + assert len(eval_jac) == 2 + for comp in eval_jac: + assert len(comp) == len(model.params) + + sigma_y = np.concatenate((np.ones(20), [2., 4., 5, 7, 3])) + + fit = Fit(model, x_1=xdata[0], x_2=xdata[1], + y_1=ydata[0], y_2=ydata[1], sigma_y_2=sigma_y) + fit_result = fit.execute() + + # fit_curves = model(x_1=xdata[0], x_2=xdata[1], **fit_result.params) + assert fit_result.value(y0) == pytest.approx(1.061892e+01, 1e-03) + assert fit_result.value(a_1) == pytest.approx(1.013269e+02, 1e-03) + assert fit_result.value(a_2) == pytest.approx(5.625694e+01, 1e-03) + assert fit_result.value(b_1) == pytest.approx(3.362240e-01, 1e-03) + assert fit_result.value(b_2) == pytest.approx(1.565253e+00, 1e-03) + + +def test_named_fitting(): + xdata = np.linspace(1, 10, 10) + ydata = 3*xdata**2 + + a = Parameter('a', 1.0) + b = Parameter('b', 2.5) + x, y = variables('x, y') + model = {y: a*x**b} + + fit = Fit(model, x=xdata, y=ydata) + fit_result = fit.execute() + assert isinstance(fit_result, FitResults) + assert fit_result.value(a) == pytest.approx(3.0, 1e-3) + assert fit_result.value(b) == pytest.approx(2.0, 1e-4) + + +def test_param_error_analytical(): + """ + Take an example in which the parameter errors are known and see if + `Fit` reproduces them. + + It also needs to support the absolute_sigma argument. + """ + N = 10000 + sigma = 25.0 + xn = np.arange(N, dtype=np.float) + np.random.seed(110) + yn = np.random.normal(size=xn.shape, scale=sigma) + + a = Parameter() + y = Variable('y') + model = {y: a} + + constr_fit = Fit(model, y=yn, sigma_y=sigma) + constr_result = constr_fit.execute() + + fit = Fit(model, y=yn, sigma_y=sigma, minimizer=MINPACK) + fit_result = fit.execute() + + assert fit_result.value(a) == pytest.approx(constr_result.value(a), 1e-5) + assert fit_result.stdev(a) == pytest.approx(constr_result.stdev(a), 1e-5) + + # Analytical answer for mean of N(0,sigma): + sigma_mu = sigma/N**0.5 + + assert fit_result.value(a) == pytest.approx(np.mean(yn), 1e-5) + assert fit_result.stdev(a) == pytest.approx(sigma_mu, 1e-5) + + # Compare for absolute_sigma = False. + constr_fit = Fit(model, y=yn, sigma_y=sigma, absolute_sigma=False) + constr_result = constr_fit.execute() + + fit = Fit(model, y=yn, sigma_y=sigma, + minimizer=MINPACK, absolute_sigma=False) + fit_result = fit.execute() + + assert fit_result.value(a) == pytest.approx(constr_result.value(a), 1e-5) + assert fit_result.stdev(a) == pytest.approx(constr_result.stdev(a), 1e-5) + + +def test_grid_fitting(): + xdata = np.arange(-5, 5, 1) + ydata = np.arange(5, 15, 1) + xx, yy = np.meshgrid(xdata, ydata, sparse=False) + + zdata = (2.5*xx**2 + 3.0*yy**2) + + a = Parameter(value=2.4, max=2.75) + b = Parameter(value=3.1, min=2.75) + x = Variable('x') + y = Variable('y') + z = Variable('z') + new = {z: a*x**2 + b*y**2} + + fit = Fit(new, x=xx, y=yy, z=zdata) + # results = fit.execute(options={'maxiter': 10}) + results = fit.execute() + + assert results.value(a) == pytest.approx(2.5, 1e-4) + assert results.value(b) == pytest.approx(3.0, 1e-4) + + +@pytest.mark.skip(reason='Fit fails to compute the covariance matrix for a sparse grid.') +def test_grid_fitting_sparse(): + xdata = np.arange(-5, 5, 1) + ydata = np.arange(5, 15, 1) + xx, yy = np.meshgrid(xdata, ydata, sparse=True) + + zdata = (2.5*xx**2 + 3.0*yy**2) + + a = Parameter(value=2.4, max=2.75) + b = Parameter(value=3.1, min=2.75) + x = Variable('x') + y = Variable('y') + z = Variable('z') + new = {z: a*x**2 + b*y**2} + + fit = Fit(new, x=xx, y=yy, z=zdata) + results = fit.execute() + + assert results.value(a) == pytest.approx(2.5, 1e-4) + assert results.value(b) == pytest.approx(3.0, 1e-4) + - # Unconstrained fit - fit = Fit(model, x=xcentres, y=ydata) - unconstr_result = fit.execute() - - # Constraints must be scalar models. - with self.assertRaises(ModelError): - Model.as_constraint([A - 1, sig - 1], model, constraint_type=Eq) - constraint_exact = Model.as_constraint( - A * sqrt(2 * sympy.pi) * sig - 1, model, constraint_type=Eq - ) - # Only when explicitly asked, do models behave as constraints. - self.assertTrue(hasattr(constraint_exact, 'constraint_type')) - self.assertEqual(constraint_exact.constraint_type, Eq) - self.assertFalse(hasattr(model, 'constraint_type')) - - # Now lets make some valid constraints and see if they are respected! - # TODO: These first two should be symbolical integrals over `y` instead, - # but currently this is not converted into a numpy/scipy function. So instead the first two are not valid constraints. - constraint_model = Model.as_constraint(A - 1, model, constraint_type=Eq) - constraint_exact = Eq(A, 1) - constraint_num = CallableNumericalModel.as_constraint( - {Y: lambda x, y: simps(y, x) - 1}, # Integrate using simps - model=model, - connectivity_mapping={Y: {x, y}}, - constraint_type=Eq - ) - - # Test for all these different types of constraint. - for constraint in [constraint_model, constraint_exact, constraint_num]: - if not isinstance(constraint, Eq): - self.assertEqual(constraint.constraint_type, Eq) - - xcentres = (xedges[1:] + xedges[:-1]) / 2 - fit = Fit(model, x=xcentres, y=ydata, constraints=[constraint]) - # Test if conversion into a constraint was done properly - fit_constraint = fit.constraints[0] - self.assertEqual(fit.model.params, fit_constraint.params) - self.assertEqual(fit_constraint.constraint_type, Eq) - - con_map = fit_constraint.connectivity_mapping - if isinstance(constraint, CallableNumericalModel): - self.assertEqual(con_map, {Y: {x, y}, y: {x, mu, sig, A}}) - self.assertEqual(fit_constraint.independent_vars, [x]) - self.assertEqual(fit_constraint.dependent_vars, [Y]) - self.assertEqual(fit_constraint.interdependent_vars, [y]) - self.assertEqual(fit_constraint.params, [A, mu, sig]) - else: - # ToDo: if these constraints can somehow be written as integrals - # depending on y and x this if/else should be removed. - self.assertEqual(con_map, - {fit_constraint.dependent_vars[0]: {A}}) - self.assertEqual(fit_constraint.independent_vars, []) - self.assertEqual(len(fit_constraint.dependent_vars), 1) - self.assertEqual(fit_constraint.interdependent_vars, []) - self.assertEqual(fit_constraint.params, [A, mu, sig]) - - # Finally, test if the constraint worked - fit_result = fit.execute(options={'eps': 1e-15, 'ftol': 1e-10}) - unconstr_value = fit.minimizer.wrapped_constraints[0]['fun'](**unconstr_result.params) - constr_value = fit.minimizer.wrapped_constraints[0]['fun'](**fit_result.params) - self.assertAlmostEqual(constr_value[0], 0.0, 10) - # And if it was very poorly met before - self.assertNotAlmostEqual(unconstr_value[0], 0.0, 2) - - def test_constrained_dependent_on_matrixmodel(self): - """ - Similar to test_constrained_dependent_on_model, but now using - MatrixSymbols. This is much more powerful, since now the constraint can - really be written down as a symbolical one as well. - """ - A, mu, sig = parameters('A, mu, sig') - M = symbols('M', integer=True) # Number of measurements - - # Create vectors for all the quantities - x = MatrixSymbol('x', M, 1) - dx = MatrixSymbol('dx', M, 1) - y = MatrixSymbol('y', M, 1) - I = MatrixSymbol('I', M, 1) # 'identity' vector - Y = MatrixSymbol('Y', 1, 1) - B = MatrixSymbol('B', M, 1) - i = Idx('i', M) - - # Looks overly complicated, but it's just a simple Gaussian - model = CallableModel( - {y: A * sympy.exp(- HadamardProduct(B, B) / (2 * sig**2)) - /sympy.sqrt(2*sympy.pi*sig**2), - B: (x - mu * I)} - ) - self.assertEqual(model.independent_vars, [I, x]) - self.assertEqual(model.dependent_vars, [y]) - self.assertEqual(model.interdependent_vars, [B]) - self.assertEqual(model.params, [A, mu, sig]) - - # Generate data, sample from a N(1.2, 2) distribution. Has to be 2D. - np.random.seed(2) - xdata = np.random.normal(1.2, 2, size=10000) - ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), density=True) - xcentres = np.atleast_2d((xedges[1:] + xedges[:-1]) / 2).T - xdiff = np.atleast_2d((xedges[1:] - xedges[:-1])).T - ydata = np.atleast_2d(ydata).T - Idata = np.ones_like(xcentres) - - self.assertEqual(xcentres.shape, (int(np.sqrt(len(xdata))), 1)) - self.assertEqual(xdiff.shape, (int(np.sqrt(len(xdata))), 1)) - self.assertEqual(ydata.shape, (int(np.sqrt(len(xdata))), 1)) - - fit = Fit(model, x=xcentres, y=ydata, I=Idata) - unconstr_result = fit.execute() - - constraint = CallableModel({Y: Sum(y[i, 0] * dx[i, 0], i) - 1}) - with self.assertRaises(ModelError): - fit = Fit(model, x=xcentres, y=ydata, dx=xdiff, M=len(xcentres), - I=Idata, constraints=[constraint]) - - constraint = CallableModel.as_constraint( - {Y: Sum(y[i, 0] * dx[i, 0], i) - 1}, - model=model, - constraint_type=Eq - ) - self.assertEqual(constraint.independent_vars, [I, M, dx, x]) - self.assertEqual(constraint.dependent_vars, [Y]) - self.assertEqual(constraint.interdependent_vars, [B, y]) - self.assertEqual(constraint.params, [A, mu, sig]) - self.assertEqual(constraint.constraint_type, Eq) - - # Provide the extra data needed for the constraints as well +def test_vector_constrained_fitting(): + """ + Tests `Fit` with vector models. The + classical example of fitting measurements of the angles of a triangle is + taken. In this case we know they should add up to 180 degrees, so this + can be added as a constraint. Additionally, not even all three angles + have to be provided with measurement data since the constrained means + the angles are not independent. + """ + a, b, c = parameters('a, b, c') + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = {a_i: a, b_i: b, c_i: c} + + xdata = np.array([ + [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], + [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], + [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], + ]) + + fit_none = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=None, + ) + fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + ) + fit_std = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + minimizer=MINPACK + ) + fit_constrained = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + constraints=[Equality(a + b + c, 180)] + ) + fit_none_result = fit_none.execute() + fit_new_result = fit.execute() + std_result = fit_std.execute() + constr_result = fit_constrained.execute() + + # The total of averages should equal the total of the params by definition + mean_total = np.mean(np.sum(xdata, axis=0)) + params_tot = std_result.value(a) + std_result.value(b) + std_result.value(c) + assert mean_total / params_tot == pytest.approx(1.0, 1e-4) + + # The total after constraining to 180 should be exactly 180. + params_tot = constr_result.value(a) + constr_result.value(b) + constr_result.value(c) + assert isinstance(fit_constrained.minimizer, SLSQP) + assert 180.0 == pytest.approx(params_tot, 1e-4) + + # The standard method and the Constrained object called without constraints + # should behave roughly the same. + assert fit_new_result.value(b) == pytest.approx(std_result.value(b), 1e-4) + assert fit_new_result.value(a) == pytest.approx(std_result.value(a), 1e-4) + assert fit_new_result.value(c) == pytest.approx(std_result.value(c), 1e-4) + + # When fitting with a dataset set to None, for this example the value of c + # should be unaffected. + assert fit_none_result.value(a) == pytest.approx(std_result.value(a), 1e-4) + assert fit_none_result.value(b) == pytest.approx(std_result.value(b), 1e-4) + assert fit_none_result.value(c) == pytest.approx(c.value) + + fit_none_constr = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=None, + constraints=[Equality(a + b + c, 180)] + ) + none_constr_result = fit_none_constr.execute() + params_tot = none_constr_result.value(a) + none_constr_result.value(b) + none_constr_result.value(c) + assert 180.0 == pytest.approx(params_tot, 1e-4) + + +def test_vector_parameter_error(): + """ + Tests `Fit` parameter error estimation with + vector models. This is done by using the typical angles of a triangle + example. For completeness, we throw in covariance between the angles. + + As per 0.5.0 this test has been updated in an important way. Previously + the covariance matrix was estimated on a per component basis for global + fitting problems. This was incorrect, but no solution was possible at + the time. Now, we calculate the covariance matrix from the Hessian of + the function being optimized, and so now the covariance is calculated + correctly in those scenarios. + + As a result for this particular test however, it means we lose + sensitivity to the error of each parameter separately. This makes sense, + since the uncertainty is now being spread out over the components. To + regain this, the user should just fit the components separately. + """ + N = 10000 + a, b, c = parameters('a, b, c') + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = {a_i: a, b_i: b, c_i: c} + + np.random.seed(1) + # Sample from a multivariate normal with correlation. + pcov = np.array([[0.4, 0.3, 0.5], [0.3, 0.8, 0.4], [0.5, 0.4, 1.2]]) + xdata = np.random.multivariate_normal([10, 100, 70], pcov, N).T + + fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + ) + fit_std = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + minimizer=MINPACK + ) + fit_new_result = fit.execute() + std_result = fit_std.execute() + + # When no errors are given, we default to `absolute_sigma=False`, since + # that is the best we can do. + assert not fit.absolute_sigma + assert not fit_std.absolute_sigma + + # The standard method and the Constrained object called without constraints + # should give roughly the same parameter values. + assert fit_new_result.value(a) == pytest.approx(std_result.value(a), 1e-3) + assert fit_new_result.value(b) == pytest.approx(std_result.value(b), 1e-3) + assert fit_new_result.value(c) == pytest.approx(std_result.value(c), 1e-3) + + # in this toy model, fitting is identical to simply taking the average + assert fit_new_result.value(a) == pytest.approx(np.mean(xdata[0]), 1e-4) + assert fit_new_result.value(b) == pytest.approx(np.mean(xdata[1]), 1e-4) + assert fit_new_result.value(c) == pytest.approx(np.mean(xdata[2]), 1e-4) + + # All stdev's must be equal + assert fit_new_result.stdev(a) == pytest.approx(fit_new_result.stdev(b), 1e-3) + assert fit_new_result.stdev(a) == pytest.approx(fit_new_result.stdev(c), 1e-3) + # Test for a miss on the exact value + assert not fit_new_result.stdev(a) == pytest.approx(np.sqrt(pcov[0, 0]/N), 1e-3) + assert not fit_new_result.stdev(b) == pytest.approx(np.sqrt(pcov[1, 1]/N), 1e-3) + assert not fit_new_result.stdev(c) == pytest.approx(np.sqrt(pcov[2, 2]/N), 1e-3) + + # The standard object actually does not predict the right values for + # stdev, because its method for computing them apparently does not allow + # for vector valued functions. + # So actually, for vector valued functions its better to use + # Fit, though this does not give covariances. + + # With the correct values of sigma, absolute_sigma=True should be in + # agreement with analytical. + sigmadata = np.array([ + np.sqrt(pcov[0, 0]), + np.sqrt(pcov[1, 1]), + np.sqrt(pcov[2, 2]) + ]) + fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + sigma_a_i=sigmadata[0], + sigma_b_i=sigmadata[1], + sigma_c_i=sigmadata[2], + ) + assert fit.absolute_sigma + fit_result = fit.execute() + # The standard deviation in the mean is stdev/sqrt(N), + # see test_param_error_analytical + assert fit_result.stdev(a)/np.sqrt(pcov[0, 0]/N) == pytest.approx(1.0, 1e-4) + assert fit_result.stdev(b)/np.sqrt(pcov[1, 1]/N) == pytest.approx(1.0, 1e-4) + assert fit_result.stdev(c)/np.sqrt(pcov[2, 2]/N) == pytest.approx(1.0, 1e-4) + + # Finally, we should confirm that with unrealistic sigma and + # absolute_sigma=True, we are no longer in agreement with the analytical result + # Let's take everything to be 1 to point out the dangers of doing so. + sigmadata = np.array([1, 1, 1]) + fit2 = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + sigma_a_i=sigmadata[0], + sigma_b_i=sigmadata[1], + sigma_c_i=sigmadata[2], + absolute_sigma=True + ) + fit_result = fit2.execute() + # Should be off bigly + assert not fit_result.stdev(a)/np.sqrt(pcov[0, 0]/N) == pytest.approx(1.0, 1e-1) + assert not fit_result.stdev(b)/np.sqrt(pcov[1, 1]/N) == pytest.approx(1.0, 1e-1) + assert not fit_result.stdev(c)/np.sqrt(pcov[2, 2]/N) == pytest.approx(1.0, 1e-5) + + +def test_error_advanced(): + """ + Compare the error propagation of Fit against + NumericalLeastSquares. + Models an example from the mathematica docs and tries to replicate it: + http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html + """ + data = [ + [0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6], + [1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.], + [7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4], + [4.7, 8.4, 10.] + ] + xdata, ydata, zdata = [np.array(data) for data in zip(*data)] + # errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2]) + + a = Parameter('a', 3.0) + b = Parameter('b', 0.9) + c = Parameter('c', 5.0) + x = Variable('x') + y = Variable('y') + z = Variable('z') + model = {z: a * log(b * x + c * y)} + + const_fit = Fit(model, xdata, ydata, zdata, absolute_sigma=False) + assert len(const_fit.model(x=xdata, y=ydata, a=2, b=2, c=5)) == 1 + assert const_fit.model(x=xdata, y=ydata, a=2, b=2, c=5)[0].shape == (10,) + + assert len(const_fit.model.eval_jacobian(x=xdata, y=ydata, a=2, b=2, c=5)) == 1 + assert const_fit.model.eval_jacobian(x=xdata, y=ydata, a=2, b=2, c=5)[0].shape == (3, 10) + + assert len(const_fit.model.eval_hessian(x=xdata, y=ydata, a=2, b=2, c=5)) == 1 + assert const_fit.model.eval_hessian(x=xdata, y=ydata, a=2, b=2, c=5)[0].shape == (3, 3, 10) + + assert const_fit.objective(a=2, b=2, c=5).shape == tuple() + assert const_fit.objective.eval_jacobian(a=2, b=2, c=5).shape == (3,) + + assert const_fit.objective.eval_hessian(a=2, b=2, c=5).shape == (3, 3) + assert const_fit.objective.eval_hessian(a=2, b=2, c=5).dtype != object + + const_result = const_fit.execute() + fit = Fit(model, xdata, ydata, zdata, + absolute_sigma=False, minimizer=MINPACK) + std_result = fit.execute() + + assert const_fit.absolute_sigma == fit.absolute_sigma + + assert const_result.value(a) == pytest.approx(std_result.value(a), 1e-4) + assert const_result.value(b) == pytest.approx(std_result.value(b), 1e-4) + assert const_result.value(c) == pytest.approx(std_result.value(c), 1e-4) + + # This used to be a tighter equality test, but since we now use the + # Hessian we actually get a more accurate value from the standard fit + # then for MINPACK. Hence we check if it is roughly equal, and if our + # stdev is greater than that of minpack. + assert const_result.stdev(a) / std_result.stdev(a) == pytest.approx(1, 1e-2) + assert const_result.stdev(b) / std_result.stdev(b) == pytest.approx(1, 1e-1) + assert const_result.stdev(c) / std_result.stdev(c) == pytest.approx(1, 1e-2) + + assert const_result.stdev(a) >= std_result.stdev(a) + assert const_result.stdev(b) >= std_result.stdev(b) + assert const_result.stdev(c) >= std_result.stdev(c) + + +def test_gaussian_2d_fitting(): + """ + Tests fitting to a scalar gaussian function with 2 independent + variables. Very sensitive to initial guesses, and if they are chosen too + restrictive Fit actually throws a tantrum. + It therefore appears to be more sensitive than NumericalLeastSquares. + """ + mean = (0.6, 0.4) # x, y mean 0.6, 0.4 + cov = [[0.2**2, 0], [0, 0.1**2]] + + np.random.seed(0) + data = np.random.multivariate_normal(mean, cov, 100000) + + # Insert them as y,x here as np fucks up cartesian conventions. + ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, + range=[[0.0, 1.0], [0.0, 1.0]]) + xcentres = (xedges[:-1] + xedges[1:]) / 2 + ycentres = (yedges[:-1] + yedges[1:]) / 2 + + # Make a valid grid to match ydata + xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') + + x0 = Parameter(value=mean[0], min=0.0, max=1.0) + sig_x = Parameter(value=0.2, min=0.0, max=0.3) + y0 = Parameter(value=mean[1], min=0.0, max=1.0) + sig_y = Parameter(value=0.1, min=0.0, max=0.3) + A = Parameter(value=np.mean(ydata), min=0.0) + x = Variable('x') + y = Variable('y') + g = Variable('g') + model = GradientModel({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) + fit = Fit(model, x=xx, y=yy, g=ydata) + fit_result = fit.execute() + + assert fit_result.value(x0) == pytest.approx(np.mean(data[:, 0]), 1e-3) + assert fit_result.value(y0) == pytest.approx(np.mean(data[:, 1]), 1e-3) + assert np.abs(fit_result.value(sig_x)) == pytest.approx(np.std(data[:, 0]), 1e-2) + assert np.abs(fit_result.value(sig_y)) == pytest.approx(np.std(data[:, 1]), 1e-2) + assert (fit_result.r_squared, 0.96) + + # Compare with industry standard MINPACK + fit_std = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK) + fit_std_result = fit_std.execute() + + assert fit_std_result.value(x0) == pytest.approx(fit_result.value(x0), 1e-4) + assert fit_std_result.value(y0) == pytest.approx(fit_result.value(y0), 1e-4) + assert fit_std_result.value(sig_x) == pytest.approx(fit_result.value(sig_x), 1e-4) + assert fit_std_result.value(sig_y) == pytest.approx(fit_result.value(sig_y), 1e-4) + assert fit_std_result.r_squared == pytest.approx(fit_result.r_squared, 1e-4) + + +def test_fixed_and_constrained(): + """ + Taken from #165. Fixing parameters and constraining others caused a + TypeError: missing a required argument: 'theta1', which was caused by a + mismatch in the shape of the initial guesses given and the number of + parameters constraints expected. The initial_guesses no longer contained + those corresponding to fixed parameters. + """ + phi1, phi2, theta1, theta2 = parameters('phi1, phi2, theta1, theta2') + x, y = variables('x, y') + + model_dict = {y: (1 + x * theta1 + theta2 * x ** 2) / + (1 + phi1 * x * theta1 + phi2 * theta2 * x ** 2)} + constraints = [GreaterThan(theta1, theta2)] + + xdata = np.array([0., 0.000376, 0.000752, 0.0015, 0.00301, 0.00601, 0.00902]) + ydata = np.array([1., 1.07968041, 1.08990638, 1.12151629, 1.13068452, 1.15484109, 1.19883952]) + + phi1.value = 0.845251484373516 + phi1.fixed = True + + phi2.value = 0.7105427053026403 + phi2.fixed = True + + fit = Fit(model_dict, x=xdata, y=ydata, + constraints=constraints, minimizer=SLSQP) + fit_result_slsqp = fit.execute() + # The data and fixed parameters should be partialed away. + objective_kwargs = { + phi2.name: phi2.value, + phi1.name: phi1.value, + x.name: xdata, + } + constraint_kwargs = { + phi2.name: phi2.value, + phi1.name: phi1.value, + } + for index, constraint in enumerate(fit.minimizer.constraints): + assert isinstance(constraint, MinimizeModel) + assert constraint.model == fit.constraints[index] + assert constraint.data == fit.data + assert constraint.data == fit.objective.data + + # Data should be the same memory location so they can share state. + assert id(fit.objective.data) == id(constraint.data) + + # Test if the fixed params have been partialed away + assert key2str(constraint._invariant_kwargs).keys() == constraint_kwargs.keys() + assert key2str(fit.objective._invariant_kwargs).keys() == objective_kwargs.keys() + + # Compare the shapes. The constraint shape should now be the same as + # that of the objective + obj_val = fit.minimizer.objective(fit.minimizer.initial_guesses) + obj_jac = fit.minimizer.wrapped_jacobian(fit.minimizer.initial_guesses) + + # scalars don't have lengths + with pytest.raises(TypeError): + len(obj_val) + assert len(obj_jac) == 2 + + for index, constraint in enumerate(fit.minimizer.wrapped_constraints): + assert constraint['type'] == 'ineq' + assert 'args' not in constraint + assert callable(constraint['fun']) + assert callable(constraint['jac']) + + # The argument should be the partialed Constraint object + assert constraint['fun'] == fit.minimizer.constraints[index] + assert isinstance(constraint['fun'], MinimizeModel) + assert 'jac' in constraint + + # Test the shapes + cons_val = constraint['fun'](fit.minimizer.initial_guesses) + cons_jac = constraint['jac'](fit.minimizer.initial_guesses) + assert cons_val.shape == (1,) + assert isinstance(cons_val[0], float) + assert obj_jac.shape == cons_jac.shape + assert obj_jac.shape == (2,) + + +def test_interdependency_constrained(): + """ + Test a model with interdependent components, and with constraints which + depend on the Model's output. + This is done in the MatrixSymbol formalism, using a Tikhonov + regularization as an example. In this, a matrix inverse has to be + calculated and is used multiple times. Therefore we split that term of + into a seperate component, so the inverse only has to be computed once + per model call. + + See https://arxiv.org/abs/1901.05348 for a more detailed background. + """ + N = Symbol('N', integer=True) + M = MatrixSymbol('M', N, N) + W = MatrixSymbol('W', N, N) + I = MatrixSymbol('I', N, N) + y = MatrixSymbol('y', N, 1) + c = MatrixSymbol('c', N, 1) + a, = parameters('a') + z, = variables('z') + i = Idx('i') + + model_dict = { + W: Inverse(I + M / a ** 2), + c: - W * y, + z: sqrt(c.T * c) + } + # Sympy currently does not support derivatives of matrix expressions, + # so we use CallableModel instead of Model. + model = CallableModel(model_dict) + + # Generate data + iden = np.eye(2) + M_mat = np.array([[2, 1], [3, 4]]) + y_vec = np.array([[3], [5]]) + eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) + # Calculate the answers 'manually' so I know it was done properly + W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) + c_manual = - np.atleast_2d(W_manual.dot(y_vec)) + z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) + + assert y_vec.shape == (2, 1) + assert M_mat.shape == (2, 2) + assert iden.shape == (2, 2) + assert W_manual.shape == (2, 2) + assert c_manual.shape == (2, 1) + assert z_manual.shape == (1, 1) + assert W_manual == pytest.approx(eval_model.W) + assert c_manual == pytest.approx(eval_model.c) + assert z_manual == pytest.approx(eval_model.z) + fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) + fit_result = fit.execute() + + # See if a == 0.1 was reconstructed properly. Since only a**2 features + # in the equations, we check for the absolute value. Setting a.min = 0.0 + # is not appreciated by the Minimizer, it seems. + assert np.abs(fit_result.value(a)) == pytest.approx(0.1) + + +def test_data_for_constraint(): + """ + Test the signature handling when constraints are at play. Constraints + should take seperate data, but still kwargs that are not found in either + the model nor the constraints should raise an error. + """ + A, mu, sig = parameters('A, mu, sig') + x, y, Y = variables('x, y, Y') + + model = Model({y: A * Gaussian(x, mu=mu, sig=sig)}) + constraint = Model.as_constraint(Y, model, constraint_type=Eq) + + np.random.seed(2) + xdata = np.random.normal(1.2, 2, 10) + ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), density=True) + + # Allowed + fit = Fit(model, x=xdata, y=ydata, Y=2, constraints=[constraint]) + assert isinstance(fit.objective, LeastSquares) + assert isinstance(fit.minimizer.constraints[0], MinimizeModel) + fit = Fit(model, x=xdata, y=ydata) + assert isinstance(fit.objective, LeastSquares) + fit = Fit(model, x=xdata, objective=LogLikelihood) + assert isinstance(fit.objective, LogLikelihood) + + # Not allowed + with pytest.raises(TypeError): + fit = Fit(model, x=xdata, y=ydata, Y=2) + + with pytest.raises(TypeError): + fit = Fit(model, x=xdata, y=ydata, Y=2, Z=3, constraints=[constraint]) + + with pytest.raises(TypeError): + fit = Fit(model, x=xdata, y=ydata, objective=LogLikelihood) + + +def test_constrained_dependent_on_model(): + """ + For a simple Gaussian distribution, we test if Models of various types + can be used as constraints. Of particular interest are NumericalModels, + which can be used to fix the integral of the model during the fit to 1, + as it should be for a probability distribution. + :return: + """ + A, mu, sig = parameters('A, mu, sig') + x, y, Y = variables('x, y, Y') + i = Idx('i', (0, 1000)) + sig.min = 0.0 + + model = GradientModel({y: A * Gaussian(x, mu=mu, sig=sig)}) + + # Generate data, 100 samples from a N(1.2, 2) distribution + np.random.seed(2) + xdata = np.random.normal(1.2, 2, 1000) + ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), density=True) + xcentres = (xedges[1:] + xedges[:-1]) / 2 + + # Unconstrained fit + fit = Fit(model, x=xcentres, y=ydata) + unconstr_result = fit.execute() + + # Constraints must be scalar models. + with pytest.raises(ModelError): + Model.as_constraint([A - 1, sig - 1], model, constraint_type=Eq) + + constraint_exact = Model.as_constraint(A * sqrt(2 * sympy.pi) * sig - 1, + model, constraint_type=Eq) + # Only when explicitly asked, do models behave as constraints. + assert hasattr(constraint_exact, 'constraint_type') + assert constraint_exact.constraint_type == Eq + assert not hasattr(model, 'constraint_type') + + # Now lets make some valid constraints and see if they are respected! + # FIXME These first two should be symbolical integrals over `y` instead, + # but currently this is not converted into a numpy/scipy function. So + # instead the first two are not valid constraints. + constraint_model = Model.as_constraint(A - 1, model, constraint_type=Eq) + constraint_exact = Eq(A, 1) + constraint_num = CallableNumericalModel.as_constraint( + {Y: lambda x, y: simps(y, x) - 1}, # Integrate using simps + model=model, + connectivity_mapping={Y: {x, y}}, + constraint_type=Eq + ) + + # Test for all these different types of constraint. + for constraint in [constraint_model, constraint_exact, constraint_num]: + if not isinstance(constraint, Eq): + assert constraint.constraint_type == Eq + + xcentres = (xedges[1:] + xedges[:-1]) / 2 + fit = Fit(model, x=xcentres, y=ydata, constraints=[constraint]) + # Test if conversion into a constraint was done properly + fit_constraint = fit.constraints[0] + assert fit.model.params == fit_constraint.params + assert fit_constraint.constraint_type == Eq + + con_map = fit_constraint.connectivity_mapping + if isinstance(constraint, CallableNumericalModel): + assert con_map == {Y: {x, y}, y: {x, mu, sig, A}} + assert fit_constraint.independent_vars == [x] + assert fit_constraint.dependent_vars == [Y] + assert fit_constraint.interdependent_vars == [y] + assert fit_constraint.params == [A, mu, sig] + else: + # TODO if these constraints can somehow be written as integrals + # depending on y and x this if/else should be removed. + assert con_map == {fit_constraint.dependent_vars[0]: {A}} + assert fit_constraint.independent_vars == [] + assert len(fit_constraint.dependent_vars) == 1 + assert fit_constraint.interdependent_vars == [] + assert fit_constraint.params == [A, mu, sig] + + # Finally, test if the constraint worked + fit_result = fit.execute(options={'eps': 1e-15, 'ftol': 1e-10}) + unconstr_value = fit.minimizer.wrapped_constraints[0]['fun'](**unconstr_result.params) + constr_value = fit.minimizer.wrapped_constraints[0]['fun'](**fit_result.params) + + # TODO because of a bug by pytest we have to solve it like this + assert constr_value[0] == pytest.approx(0, abs=1e-10) + # And if it was very poorly met before + assert not unconstr_value[0] == pytest.approx(0.0, 1e-1) + + +def test_constrained_dependent_on_matrixmodel(): + """ + Similar to test_constrained_dependent_on_model, but now using + MatrixSymbols. This is much more powerful, since now the constraint can + really be written down as a symbolical one as well. + """ + A, mu, sig = parameters('A, mu, sig') + M = symbols('M', integer=True) # Number of measurements + + # Create vectors for all the quantities + x = MatrixSymbol('x', M, 1) + dx = MatrixSymbol('dx', M, 1) + y = MatrixSymbol('y', M, 1) + I = MatrixSymbol('I', M, 1) # 'identity' vector + Y = MatrixSymbol('Y', 1, 1) + B = MatrixSymbol('B', M, 1) + i = Idx('i', M) + + # Looks overly complicated, but it's just a simple Gaussian + model = CallableModel( + {y: A * sympy.exp(- HadamardProduct(B, B) / (2 * sig**2)) / sympy.sqrt(2*sympy.pi*sig**2), + B: (x - mu * I)} + ) + assert model.independent_vars == [I, x] + assert model.dependent_vars == [y] + assert model.interdependent_vars == [B] + assert model.params == [A, mu, sig] + + # Generate data, sample from a N(1.2, 2) distribution. Has to be 2D. + np.random.seed(2) + # TODO: sample points on a Guassian and add appropriate noise. + xdata = np.random.normal(1.2, 2, size=10000) + ydata, xedges = np.histogram(xdata, bins=int(np.sqrt(len(xdata))), density=True) + xcentres = np.atleast_2d((xedges[1:] + xedges[:-1]) / 2).T + xdiff = np.atleast_2d((xedges[1:] - xedges[:-1])).T + ydata = np.atleast_2d(ydata).T + Idata = np.ones_like(xcentres) + + assert xcentres.shape == (int(np.sqrt(len(xdata))), 1) + assert xdiff.shape == (int(np.sqrt(len(xdata))), 1) + assert ydata.shape == (int(np.sqrt(len(xdata))), 1) + + fit = Fit(model, x=xcentres, y=ydata, I=Idata) + unconstr_result = fit.execute() + + constraint = CallableModel({Y: Sum(y[i, 0] * dx[i, 0], i) - 1}) + + with pytest.raises(ModelError): fit = Fit(model, x=xcentres, y=ydata, dx=xdiff, M=len(xcentres), I=Idata, constraints=[constraint]) - # After treatment, our constraint should have `y` & `b` dependencies - self.assertEqual(fit.constraints[0].independent_vars, [I, M, dx, x]) - self.assertEqual(fit.constraints[0].dependent_vars, [Y]) - self.assertEqual(fit.constraints[0].interdependent_vars, [B, y]) - self.assertEqual(fit.constraints[0].params, [A, mu, sig]) - self.assertEqual(fit.constraints[0].constraint_type, Eq) - self.assertIsInstance(fit.objective, LeastSquares) - self.assertIsInstance(fit.minimizer.constraints[0], MinimizeModel) - - self.assertEqual({k for k, v in fit.data.items() if v is not None}, - {x, y, dx, M, I, fit.model.sigmas[y]}) - # These belong to internal variables - self.assertEqual({k for k, v in fit.data.items() if v is None}, - {constraint.sigmas[Y], Y}) - - constr_result = fit.execute() - # The constraint should not be met for the unconstrained fit - self.assertNotAlmostEqual( - fit.minimizer.wrapped_constraints[0]['fun']( - **unconstr_result.params - )[0], 0, 3 - ) - # And at high precision with constraint - self.assertAlmostEqual( - fit.minimizer.wrapped_constraints[0]['fun']( - **constr_result.params - )[0], 0, 8 - ) - - # Constraining will negatively effect the R^2 value, but... - self.assertLess(constr_result.r_squared, unconstr_result.r_squared) - # both should be pretty good - self.assertGreater(constr_result.r_squared, 0.99) - - def test_fixed_and_constrained_tc(self): - """ - Taken from #165. Make sure the TrustConstr minimizer can deal with - constraints and fixed parameters. - """ - phi1, phi2, theta1, theta2 = parameters('phi1, phi2, theta1, theta2') - x, y = variables('x, y') - - model_dict = {y: (1 + x * theta1 + theta2 * x ** 2) / ( - 1 + phi1 * x * theta1 + phi2 * theta2 * x ** 2)} - constraints = [GreaterThan(theta1, theta2)] - - xdata = np.array( - [0., 0.000376, 0.000752, 0.0015, 0.00301, 0.00601, 0.00902]) - ydata = np.array( - [1., 1.07968041, 1.08990638, 1.12151629, 1.13068452, 1.15484109, - 1.19883952]) - - phi1.value = 0.845251484373516 - phi1.fixed = True - - phi2.value = 0.7105427053026403 - phi2.fixed = True - - fit = Fit(model_dict, x=xdata, y=ydata, - constraints=constraints, minimizer=TrustConstr) - fit_result_tc = fit.execute() - # The data and fixed parameters should be partialed away. - objective_kwargs = { - phi2.name: phi2.value, - phi1.name: phi1.value, - x.name: xdata, - } - constraint_kwargs = { - phi2.name: phi2.value, - phi1.name: phi1.value, - } - for index, constraint in enumerate(fit.minimizer.constraints): - self.assertIsInstance(constraint, MinimizeModel) - self.assertEqual(constraint.model, fit.constraints[index]) - self.assertEqual(constraint.data, fit.data) - self.assertEqual(constraint.data, fit.objective.data) - - # Data should be the same memory location so they can share state. - self.assertEqual(id(fit.objective.data), - id(constraint.data)) - - # Test if the data and fixed params have been partialed away - self.assertEqual(key2str(constraint._invariant_kwargs).keys(), - constraint_kwargs.keys()) - self.assertEqual(key2str(fit.objective._invariant_kwargs).keys(), - objective_kwargs.keys()) - - # Compare the shapes. The constraint shape should now be the same as - # that of the objective - obj_val = fit.minimizer.objective(fit.minimizer.initial_guesses) - obj_jac = fit.minimizer.wrapped_jacobian(fit.minimizer.initial_guesses) - with self.assertRaises(TypeError): - len(obj_val) # scalars don't have lengths - self.assertEqual(len(obj_jac), 2) - - for index, constraint in enumerate(fit.minimizer.wrapped_constraints): - self.assertTrue(callable(constraint.fun)) - self.assertTrue(callable(constraint.jac)) - - # The argument should be the partialed Constraint object - self.assertEqual(constraint.fun, fit.minimizer.constraints[index]) - self.assertIsInstance(constraint.fun, MinimizeModel) - - # Test the shapes - cons_val = constraint.fun(fit.minimizer.initial_guesses) - cons_jac = constraint.jac(fit.minimizer.initial_guesses) - self.assertEqual(cons_val.shape, (1,)) - self.assertIsInstance(cons_val[0], float) - self.assertEqual(obj_jac.shape, cons_jac.shape) - self.assertEqual(obj_jac.shape, (2,)) - - def test_constrainedminimizers(self): - """ - Compare the different constrained minimizers, to make sure all support - constraints, and converge to the same answer. - """ - minimizers = list(subclasses(ScipyConstrainedMinimize)) - x = Parameter('x', value=-1.0) - y = Parameter('y', value=1.0) - z = Variable('z') - model = Model({z: 2 * x * y + 2 * x - x ** 2 - 2 * y ** 2}) - - # First we try an unconstrained fit - results = [] - for minimizer in minimizers: - fit = Fit(- model, minimizer=minimizer) - self.assertIsInstance(fit.objective, MinimizeModel) - fit_result = fit.execute(tol=1e-15) - results.append(fit_result) - - # Compare the parameter values. - for r1, r2 in zip(results[:-1], results[1:]): - self.assertAlmostEqual(r1.value(x), r2.value(x), 6) - self.assertAlmostEqual(r1.value(y), r2.value(y), 6) - np.testing.assert_almost_equal(r1.covariance_matrix, - r2.covariance_matrix) - - constraints = [ - Ge(y - 1, 0), # y - 1 >= 0, - Eq(x ** 3 - y, 0), # x**3 - y == 0, - ] - - # Constrained fit. - results = [] - for minimizer in minimizers: - if minimizer is COBYLA: - # COBYLA only supports inequility. - continue - fit = Fit(- model, constraints=constraints, minimizer=minimizer) - fit_result = fit.execute(tol=1e-15) - results.append(fit_result) - - for r1, r2 in zip(results[:-1], results[1:]): - self.assertAlmostEqual(r1.value(x), r2.value(x), 6) - self.assertAlmostEqual(r1.value(y), r2.value(y), 6) - np.testing.assert_almost_equal(r1.covariance_matrix, - r2.covariance_matrix) - - def test_trustconstr(self): - """ - Solve the standard constrained example from - https://docs.scipy.org/doc/scipy-0.18.1/reference/tutorial/optimize.html#constrained-minimization-of-multivariate-scalar-functions-minimize - using the trust-constr method. - """ - def func(x, sign=1.0): - """ Objective function """ - return sign*(2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2) - - def func_jac(x, sign=1.0): - """ Derivative of objective function """ - dfdx0 = sign*(-2*x[0] + 2*x[1] + 2) - dfdx1 = sign*(2*x[0] - 4*x[1]) - return np.array([ dfdx0, dfdx1 ]) - - def func_hess(x, sign=1.0): - """ Derivative of objective function """ - dfdx2 = sign*(-2) - dfdxdy = sign * 2 - dfdy2 = sign * (-4) - return np.array([[ dfdx2, dfdxdy ], [ dfdxdy, dfdy2 ]]) - - def cons_f(x): - return [x[1] - 1, x[0]**3 - x[1]] - - def cons_J(x): - return [[0, 1], [3 * x[0] ** 2, -1]] - - def cons_H(x, v): - return v[0] * np.array([[0, 0], [0, 0]]) + \ - v[1] * np.array([[6 * x[0], 0], [0, 0]]) - - # Unconstrained fit - res = minimize(func, [-1.0, 1.0], args=(-1.0,), - jac=func_jac, hess=func_hess, method='trust-constr') - np.testing.assert_almost_equal(res.x, [2, 1]) - - # Constrained fit - nonlinear_constraint = NonlinearConstraint(cons_f, 0, [np.inf, 0], - jac=cons_J, hess=cons_H) - res_constr = minimize(func, [-1.0, 1.0], args=(-1.0,), tol=1e-15, - jac=func_jac, hess=func_hess, method='trust-constr', - constraints=[nonlinear_constraint]) - np.testing.assert_almost_equal(res_constr.x, [1, 1]) - - # Symfit equivalent code - x = Parameter('x', value=-1.0) - y = Parameter('y', value=1.0) - z = Variable('z') - model = Model({z: 2 * x * y + 2 * x - x ** 2 - 2 * y ** 2}) - - # Unconstrained fit first, see if we get the known result. - fit = Fit(-model, minimizer=TrustConstr) - fit_result = fit.execute() - np.testing.assert_almost_equal(list(fit_result.params.values()), [2, 1]) - - # Now we are ready for the constrained fit. - constraints = [ - Le(- y + 1, 0), # y - 1 >= 0, - Eq(x ** 3 - y, 0), # x**3 - y == 0, - ] - fit = Fit(-model, constraints=constraints, minimizer=TrustConstr) + constraint = CallableModel.as_constraint( + {Y: Sum(y[i, 0] * dx[i, 0], i) - 1}, + model=model, + constraint_type=Eq + ) + assert constraint.independent_vars == [I, M, dx, x] + assert constraint.dependent_vars == [Y] + assert constraint.interdependent_vars == [B, y] + assert constraint.params == [A, mu, sig] + assert constraint.constraint_type == Eq + + # Provide the extra data needed for the constraints as well + fit = Fit(model, x=xcentres, y=ydata, dx=xdiff, M=len(xcentres), + I=Idata, constraints=[constraint]) + + # After treatment, our constraint should have `y` & `b` dependencies + assert fit.constraints[0].independent_vars == [I, M, dx, x] + assert fit.constraints[0].dependent_vars == [Y] + assert fit.constraints[0].interdependent_vars == [B, y] + assert fit.constraints[0].params == [A, mu, sig] + assert fit.constraints[0].constraint_type == Eq + assert isinstance(fit.objective, LeastSquares) + assert isinstance(fit.minimizer.constraints[0], MinimizeModel) + + assert {k for k, v in fit.data.items() if v is not None} == {x, y, dx, M, I, fit.model.sigmas[y]} + # These belong to internal variables + assert {k for k, v in fit.data.items() if v is None} == {constraint.sigmas[Y], Y} + + constr_result = fit.execute() + # The constraint should not be met for the unconstrained fit + assert not fit.minimizer.wrapped_constraints[0]['fun'](**unconstr_result.params)[0] == pytest.approx(0, 1e-3) + # And at high precision with constraint + # TODO Change after resolve bug at pytest + assert fit.minimizer.wrapped_constraints[0]['fun'](**constr_result.params)[0] == pytest.approx(0, abs=1e-8) + + # Constraining will negatively effect the R^2 value, but... + assert constr_result.r_squared < unconstr_result.r_squared + # both should be pretty good + assert constr_result.r_squared > 0.99 + + +def test_fixed_and_constrained_tc(): + """ + Taken from #165. Make sure the TrustConstr minimizer can deal with + constraints and fixed parameters. + """ + phi1, phi2, theta1, theta2 = parameters('phi1, phi2, theta1, theta2') + x, y = variables('x, y') + + model_dict = {y: (1 + x * theta1 + theta2 * x ** 2) / (1 + phi1 * x * theta1 + phi2 * theta2 * x ** 2)} + constraints = [GreaterThan(theta1, theta2)] + + xdata = np.array([0., 0.000376, 0.000752, 0.0015, 0.00301, 0.00601, 0.00902]) + ydata = np.array([1., 1.07968041, 1.08990638, 1.12151629, 1.13068452, 1.15484109, 1.19883952]) + + phi1.value = 0.845251484373516 + phi1.fixed = True + + phi2.value = 0.7105427053026403 + phi2.fixed = True + + fit = Fit(model_dict, x=xdata, y=ydata, + constraints=constraints, minimizer=TrustConstr) + fit_result_tc = fit.execute() + # The data and fixed parameters should be partialed away. + objective_kwargs = { + phi2.name: phi2.value, + phi1.name: phi1.value, + x.name: xdata, + } + constraint_kwargs = { + phi2.name: phi2.value, + phi1.name: phi1.value, + } + for index, constraint in enumerate(fit.minimizer.constraints): + assert isinstance(constraint, MinimizeModel) + assert constraint.model == fit.constraints[index] + assert constraint.data == fit.data + assert constraint.data == fit.objective.data + + # Data should be the same memory location so they can share state. + assert id(fit.objective.data) == id(constraint.data) + + # Test if the data and fixed params have been partialed away + assert key2str(constraint._invariant_kwargs).keys() == constraint_kwargs.keys() + assert key2str(fit.objective._invariant_kwargs).keys() == objective_kwargs.keys() + + # Compare the shapes. The constraint shape should now be the same as + # that of the objective + obj_val = fit.minimizer.objective(fit.minimizer.initial_guesses) + obj_jac = fit.minimizer.wrapped_jacobian(fit.minimizer.initial_guesses) + with pytest.raises(TypeError): + len(obj_val) # scalars don't have lengths + assert len(obj_jac) == 2 + + for index, constraint in enumerate(fit.minimizer.wrapped_constraints): + assert callable(constraint.fun) + assert callable(constraint.jac) + + # The argument should be the partialed Constraint object + assert constraint.fun == fit.minimizer.constraints[index] + assert isinstance(constraint.fun, MinimizeModel) + + # Test the shapes + cons_val = constraint.fun(fit.minimizer.initial_guesses) + cons_jac = constraint.jac(fit.minimizer.initial_guesses) + assert cons_val.shape == (1,) + assert isinstance(cons_val[0], float) + assert obj_jac.shape == cons_jac.shape + assert obj_jac.shape == (2,) + + +def test_constrainedminimizers(): + """ + Compare the different constrained minimizers, to make sure all support + constraints, and converge to the same answer. + """ + minimizers = list(subclasses(ScipyConstrainedMinimize)) + x = Parameter('x', value=-1.0) + y = Parameter('y', value=1.0) + z = Variable('z') + model = Model({z: 2 * x * y + 2 * x - x ** 2 - 2 * y ** 2}) + + # First we try an unconstrained fit + results = [] + for minimizer in minimizers: + fit = Fit(- model, minimizer=minimizer) + assert isinstance(fit.objective, MinimizeModel) + fit_result = fit.execute(tol=1e-15) + results.append(fit_result) + + # Compare the parameter values. + for r1, r2 in zip(results[:-1], results[1:]): + assert r1.value(x) == pytest.approx(r2.value(x), 1e-6) + assert r1.value(y) == pytest.approx(r2.value(y), 1e-6) + assert r1.covariance_matrix == pytest.approx(r2.covariance_matrix) + + constraints = [ + Ge(y - 1, 0), # y - 1 >= 0, + Eq(x ** 3 - y, 0), # x**3 - y == 0, + ] + + # Constrained fit. + results = [] + for minimizer in minimizers: + if minimizer is COBYLA: + # COBYLA only supports inequality. + continue + fit = Fit(- model, constraints=constraints, minimizer=minimizer) fit_result = fit.execute(tol=1e-15) + results.append(fit_result) - # Test if the constrained results are equal - np.testing.assert_almost_equal(list(fit_result.params.values()), - res_constr.x) + for r1, r2 in zip(results[:-1], results[1:]): + assert r1.value(x) == pytest.approx(r2.value(x), 1e-6) + assert r1.value(y) == pytest.approx(r2.value(y), 1e-6) + assert r1.covariance_matrix == pytest.approx(r2.covariance_matrix) -if __name__ == '__main__': - unittest.main() +def test_trustconstr(): + """ + Solve the standard constrained example from + https://docs.scipy.org/doc/scipy-0.18.1/reference/tutorial/optimize.html#constrained-minimization-of-multivariate-scalar-functions-minimize + using the trust-constr method. + """ + def func(x, sign=1.0): + """ Objective function """ + return sign*(2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2) + + def func_jac(x, sign=1.0): + """ Derivative of objective function """ + dfdx0 = sign*(-2*x[0] + 2*x[1] + 2) + dfdx1 = sign*(2*x[0] - 4*x[1]) + return np.array([dfdx0, dfdx1]) + + def func_hess(x, sign=1.0): + """ Hessian of objective function """ + dfdx2 = sign*(-2) + dfdxdy = sign * 2 + dfdy2 = sign * (-4) + return np.array([[dfdx2, dfdxdy], [dfdxdy, dfdy2]]) + + def cons_f(x): + return [x[1] - 1, x[0]**3 - x[1]] + + def cons_J(x): + return [[0, 1], [3 * x[0] ** 2, -1]] + + def cons_H(x, v): + return v[0] * np.zeros((2, 2)) + v[1] * np.array([[6 * x[0], 0], [0, 0]]) + + # Unconstrained fit + res = minimize(func, [-1.0, 1.0], args=(-1.0,), + jac=func_jac, hess=func_hess, method='trust-constr') + assert res.x == pytest.approx([2, 1]) + + # Constrained fit + nonlinear_constraint = NonlinearConstraint(cons_f, 0, [np.inf, 0], + jac=cons_J, hess=cons_H) + res_constr = minimize(func, [-1.0, 1.0], args=(-1.0,), tol=1e-15, + jac=func_jac, hess=func_hess, method='trust-constr', + constraints=[nonlinear_constraint]) + assert res_constr.x == pytest.approx([1, 1]) + + # Symfit equivalent code + x = Parameter('x', value=-1.0) + y = Parameter('y', value=1.0) + z = Variable('z') + model = Model({z: 2 * x * y + 2 * x - x ** 2 - 2 * y ** 2}) + + # Unconstrained fit first, see if we get the known result. + fit = Fit(-model, minimizer=TrustConstr) + fit_result = fit.execute() + assert list(fit_result.params.values()) == pytest.approx([2, 1]) + + # Now we are ready for the constrained fit. + constraints = [ + Le(- y + 1, 0), # y - 1 >= 0, + Eq(x ** 3 - y, 0), # x**3 - y == 0, + ] + fit = Fit(-model, constraints=constraints, minimizer=TrustConstr) + fit_result = fit.execute(tol=1e-15) + + # Test if the constrained results are equal + assert list(fit_result.params.values()) == pytest.approx(res_constr.x) diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 4ef09b8b..272eedca 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -1,6 +1,4 @@ from __future__ import division -import unittest -import warnings import sympy @@ -8,49 +6,38 @@ from symfit.distributions import Gaussian, Exp -class TestDistributions(unittest.TestCase): - def test_gaussian(self): - """ - Make sure that symfit.distributions.Gaussians produces the expected - sympy expression. - """ - x0 = Parameter() - sig = Parameter(positive=True) - x = Variable() - - new = sympy.exp(-(x - x0)**2/(2*sig**2))/sympy.sqrt((2*sympy.pi*sig**2)) - self.assertIsInstance(new, sympy.Expr) - g = Gaussian(x, x0, sig) - self.assertTrue(issubclass(g.__class__, sympy.Expr)) - self.assertEqual(new, g) - - # A pdf should always integrate to 1 on its domain - self.assertEqual(sympy.integrate(g, (x, -sympy.oo, sympy.oo)), 1) - - def test_exp(self): - """ - Make sure that symfit.distributions.Exp produces the expected - sympy expression. - """ - l = Parameter(positive=True) - x = Variable() - - new = l * sympy.exp(- l * x) - self.assertIsInstance(new, sympy.Expr) - e = Exp(x, l) - self.assertTrue(issubclass(e.__class__, sympy.Expr)) - self.assertEqual(new, e) - - # A pdf should always integrate to 1 on its domain - self.assertEqual(sympy.integrate(e, (x, 0, sympy.oo)), 1) - -if __name__ == '__main__': - try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() \ No newline at end of file +def test_gaussian(): + """ + Make sure that symfit.distributions.Gaussians produces the expected + sympy expression. + """ + x0 = Parameter() + sig = Parameter(positive=True) + x = Variable() + + new = sympy.exp(-(x - x0)**2/(2*sig**2))/sympy.sqrt((2*sympy.pi*sig**2)) + assert isinstance(new, sympy.Expr) + g = Gaussian(x, x0, sig) + assert issubclass(g.__class__, sympy.Expr) + assert new == g + + # A pdf should always integrate to 1 on its domain + assert sympy.integrate(g, (x, -sympy.oo, sympy.oo)) == 1 + + +def test_exp(): + """ + Make sure that symfit.distributions.Exp produces the expected + sympy expression. + """ + l = Parameter(positive=True) + x = Variable() + + new = l * sympy.exp(- l * x) + assert isinstance(new, sympy.Expr) + e = Exp(x, l) + assert issubclass(e.__class__, sympy.Expr) + assert new == e + + # A pdf should always integrate to 1 on its domain + assert sympy.integrate(e, (x, 0, sympy.oo)) == 1 diff --git a/tests/test_finite_difference.py b/tests/test_finite_difference.py index 560a19c0..5aa6a680 100644 --- a/tests/test_finite_difference.py +++ b/tests/test_finite_difference.py @@ -1,228 +1,217 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Tue Jul 11 11:34:23 2017 - -@author: peterkroon -""" -import unittest import symfit as sf import numpy as np -import warnings - - -class FiniteDifferenceTests(unittest.TestCase): - @classmethod - def setUpClass(cls): - np.random.seed(0) - - def test_1_1_model(self): - '''Tests the case with 1 component and 1 parameter''' - x, y = sf.variables('x, y') - a = sf.Parameter(name='a') - model = sf.Model({y: 3 * a * x**2}) - x_data = np.arange(10) - - exact = model.eval_jacobian(x=x_data, a=3.5) - approx = model.finite_difference(x=x_data, a=3.5) - np.testing.assert_allclose(exact, approx) - - exact = model.eval_jacobian(x=3, a=3.5) - approx = model.finite_difference(x=3, a=3.5) - np.testing.assert_allclose(exact, approx) - - def test_1_multi_model(self): - '''Tests the case with 1 component and multiple parameters''' - x, y = sf.variables('x, y') - a, b = sf.parameters('a, b') - model = sf.Model({y: 3 * a * x**2 - sf.exp(b) * x}) - x_data = np.arange(10) - - exact = model.eval_jacobian(x=x_data, a=3.5, b=2) - approx = model.finite_difference(x=x_data, a=3.5, b=2) - np.testing.assert_allclose(exact, approx) - - exact = model.eval_jacobian(x=3, a=3.5, b=2) - approx = model.finite_difference(x=3, a=3.5, b=2) - np.testing.assert_allclose(exact, approx) - - def test_multi_1_model(self): - '''Tests the case with multiple components and one parameter''' - x, y, z = sf.variables('x, y, z') - a, = sf.parameters('a') - model = sf.Model({y: 3 * a * x**2, - z: sf.exp(a*x)}) - x_data = np.arange(10) - - exact = model.eval_jacobian(x=x_data, a=3.5) - approx = model.finite_difference(x=x_data, a=3.5) - np.testing.assert_allclose(exact, approx) - - exact = model.eval_jacobian(x=3, a=3.5) - approx = model.finite_difference(x=3, a=3.5) - np.testing.assert_allclose(exact, approx) - - def test_multi_multi_model(self): - '''Tests the case with multiple components and multiple parameters''' - x, y, z = sf.variables('x, y, z') - a, b, c = sf.parameters('a, b, c') - model = sf.Model({y: 3 * a * x**2 + b * x - c, - z: sf.exp(a*x - b) * c}) - x_data = np.arange(10) - - exact = model.eval_jacobian(x=x_data, a=3.5, b=2, c=5) - approx = model.finite_difference(x=x_data, a=3.5, b=2, c=5) - np.testing.assert_allclose(exact, approx, rtol=1e-5) - - exact = model.eval_jacobian(x=3, a=3.5, b=2, c=5) - approx = model.finite_difference(x=3, a=3.5, b=2, c=5) - np.testing.assert_allclose(exact, approx, rtol=1e-5) - - def test_multi_indep(self): - ''' - Tests the case with multiple components, multiple parameters and - multiple independent variables - ''' - w, x, y, z = sf.variables('w, x, y, z') - a, b, c = sf.parameters('a, b, c') - model = sf.Model({y: 3 * a * x**2 + b * x * w - c, - z: sf.exp(a*x - b) + c*w}) - x_data = np.arange(10)/10 - w_data = np.arange(10) - - exact = model.eval_jacobian(x=x_data, w=w_data, a=3.5, b=2, c=5) - approx = model.finite_difference(x=x_data, w=w_data, a=3.5, b=2, c=5) - np.testing.assert_allclose(exact, approx, rtol=1e-5) - - exact = model.eval_jacobian(x=0.3, w=w_data, a=3.5, b=2, c=5) - approx = model.finite_difference(x=0.3, w=w_data, a=3.5, b=2, c=5) - np.testing.assert_allclose(exact, approx, rtol=1e-5) - - exact = model.eval_jacobian(x=0.3, w=5, a=3.5, b=2, c=5) - approx = model.finite_difference(x=0.3, w=5, a=3.5, b=2, c=5) - np.testing.assert_allclose(exact, approx, rtol=1e-5) - - def test_ODE_stdev(self): - """ - Make sure that parameters from ODEModels get standard deviations. - """ - x, v, t = sf.variables('x, v, t') - k = sf.Parameter(name='k') - - k.min = 0 - k.value = 10 - a = -k * x - - model = sf.ODEModel({ - sf.D(v, t): a, - sf.D(x, t): v, - }, - initial={v: 0, x: 1, t: 0}) - t_data = np.linspace(0, 10, 150) - noise = np.random.normal(1, 0.05, t_data.shape) - x_data = model(t=t_data, k=11).x * noise - v_data = model(t=t_data, k=11).v * noise - fit = sf.Fit(model, t=t_data, x=x_data, v=v_data) - result = fit.execute() - self.assertTrue(result.stdev(k) is not None) - self.assertTrue(np.isfinite(result.stdev(k))) - - def test_unequal_data(self): - """ - Test to make sure finite differences work with data of unequal length. - """ - x_1, x_2, y_1, y_2 = sf.variables('x_1, x_2, y_1, y_2') - y0, a_1, a_2, b_1, b_2 = sf.parameters('y0, a_1, a_2, b_1, b_2') - - model = sf.Model({ - y_1: a_1 * x_1**2 + b_1 * x_1 + y0, - y_2: a_2 * x_2**2 + b_2 * x_2 + y0, - }) - - # Generate data from this model - xdata1 = np.linspace(0, 10) - xdata2 = xdata1[::2] # Only every other point. - - exact = model.eval_jacobian(x_1=xdata1, x_2=xdata2, - a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) - approx = model.finite_difference(x_1=xdata1, x_2=xdata2, - a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) - # First axis is the number of components - self.assertEqual(len(exact), 2) - self.assertEqual(len(approx), 2) - - # Second axis is the number of parameters, same for all components - for exact_comp, approx_comp, xdata in zip(exact, approx, [xdata1, xdata2]): - self.assertEqual(len(exact_comp), len(model.params)) - self.assertEqual(len(approx_comp), len(model.params)) - for exact_elem, approx_elem in zip(exact_comp, approx_comp): - self.assertEqual(exact_elem.shape, xdata.shape) - self.assertEqual(approx_elem.shape, xdata.shape) - - self._assert_equal(exact, approx, rtol=1e-4) - - model = sf.Model({ - y_1: a_1 * x_1**2 + b_1 * x_1, - y_2: a_2 * x_2**2 + b_2 * x_2, - }) - - exact = model.eval_jacobian(x_1=xdata1, x_2=xdata2, - a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111) - approx = model.finite_difference(x_1=xdata1, x_2=xdata2, - a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111) - self._assert_equal(exact, approx, rtol=1e-4) - - model = sf.Model({ - y_1: a_1 * x_1**2 + b_1 * x_1, - }) - exact = model.eval_jacobian(x_1=xdata1, a_1=101.3, b_1=0.5) - approx = model.finite_difference(x_1=xdata1, a_1=101.3, b_1=0.5) - self._assert_equal(exact, approx, rtol=1e-4) - - def test_harmonic_oscillator_errors(self): - """ - Make sure the errors produced by fitting ODE's are the same as when - fitting an exact solution. - """ - x, v, t = sf.variables('x, v, t') - k = sf.Parameter(name='k', value=100) - m = 1 - a = -k/m * x - ode_model = sf.ODEModel({sf.D(v, t): a, - sf.D(x, t): v}, - initial={t: 0, v: 0, x: 1}) - - t_data = np.linspace(0, 10, 250) - np.random.seed(2) - noise = np.random.normal(1, 0.05, size=t_data.shape) - x_data = ode_model(t=t_data, k=100).x * noise - - ode_fit = sf.Fit(ode_model, t=t_data, x=x_data, v=None) - ode_result = ode_fit.execute() - - phi = 0 - A = 1 - model = sf.Model({x: A * sf.cos(sf.sqrt(k/m) * t + phi)}) - fit = sf.Fit(model, t=t_data, x=x_data) - result = fit.execute() - - self.assertAlmostEqual(result.value(k), ode_result.value(k), places=4) - self.assertAlmostEqual(result.stdev(k) / ode_result.stdev(k), 1, 2) - self.assertGreaterEqual(result.stdev(k), ode_result.stdev(k)) - - def _assert_equal(self, exact, approx, **kwargs): - self.assertEqual(len(exact), len(approx)) - for exact_comp, approx_comp in zip(exact, approx): - np.testing.assert_allclose(exact_comp, approx_comp, **kwargs) - - -if __name__ == '__main__': - try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() +import pytest + + +def setup_method(): + np.random.seed(0) + + +def test_1_1_model(): + '''Tests the case with 1 component and 1 parameter''' + x, y = sf.variables('x, y') + a = sf.Parameter(name='a') + model = sf.Model({y: 3 * a * x**2}) + x_data = np.arange(10) + + exact = model.eval_jacobian(x=x_data, a=3.5) + approx = model.finite_difference(x=x_data, a=3.5) + _assert_equal(exact, approx) + + exact = model.eval_jacobian(x=3, a=3.5) + approx = model.finite_difference(x=3, a=3.5) + _assert_equal(exact, approx) + + +def test_1_multi_model(): + '''Tests the case with 1 component and multiple parameters''' + x, y = sf.variables('x, y') + a, b = sf.parameters('a, b') + model = sf.Model({y: 3 * a * x**2 - sf.exp(b) * x}) + x_data = np.arange(10) + + exact = model.eval_jacobian(x=x_data, a=3.5, b=2) + approx = model.finite_difference(x=x_data, a=3.5, b=2) + _assert_equal(exact, approx) + + exact = model.eval_jacobian(x=3, a=3.5, b=2) + approx = model.finite_difference(x=3, a=3.5, b=2) + _assert_equal(exact, approx) + + +def test_multi_1_model(): + '''Tests the case with multiple components and one parameter''' + x, y, z = sf.variables('x, y, z') + a, = sf.parameters('a') + model = sf.Model({y: 3 * a * x**2, + z: sf.exp(a*x)}) + x_data = np.arange(10) + + exact = model.eval_jacobian(x=x_data, a=3.5) + approx = model.finite_difference(x=x_data, a=3.5) + _assert_equal(exact, approx) + + exact = model.eval_jacobian(x=3, a=3.5) + approx = model.finite_difference(x=3, a=3.5) + _assert_equal(exact, approx) + + +def test_multi_multi_model(): + '''Tests the case with multiple components and multiple parameters''' + x, y, z = sf.variables('x, y, z') + a, b, c = sf.parameters('a, b, c') + model = sf.Model({y: 3 * a * x**2 + b * x - c, + z: sf.exp(a*x - b) * c}) + x_data = np.arange(10) + + exact = model.eval_jacobian(x=x_data, a=3.5, b=2, c=5) + approx = model.finite_difference(x=x_data, a=3.5, b=2, c=5) + _assert_equal(exact, approx, rel=1e-3) + + exact = model.eval_jacobian(x=3, a=3.5, b=2, c=5) + approx = model.finite_difference(x=3, a=3.5, b=2, c=5) + _assert_equal(exact, approx, rel=1e-3) + + +def test_multi_indep(): + ''' + Tests the case with multiple components, multiple parameters and + multiple independent variables + ''' + w, x, y, z = sf.variables('w, x, y, z') + a, b, c = sf.parameters('a, b, c') + model = sf.Model({y: 3 * a * x**2 + b * x * w - c, + z: sf.exp(a*x - b) + c*w}) + x_data = np.arange(10)/10 + w_data = np.arange(10) + + exact = model.eval_jacobian(x=x_data, w=w_data, a=3.5, b=2, c=5) + approx = model.finite_difference(x=x_data, w=w_data, a=3.5, b=2, c=5) + _assert_equal(exact, approx) + + exact = model.eval_jacobian(x=0.3, w=w_data, a=3.5, b=2, c=5) + approx = model.finite_difference(x=0.3, w=w_data, a=3.5, b=2, c=5) + _assert_equal(exact, approx) + + exact = model.eval_jacobian(x=0.3, w=5, a=3.5, b=2, c=5) + approx = model.finite_difference(x=0.3, w=5, a=3.5, b=2, c=5) + _assert_equal(exact, approx) + + +def test_ODE_stdev(): + """ + Make sure that parameters from ODEModels get standard deviations. + """ + x, v, t = sf.variables('x, v, t') + k = sf.Parameter(name='k') + + k.min = 0 + k.value = 10 + a = -k * x + + model = sf.ODEModel( + { + sf.D(v, t): a, + sf.D(x, t): v, + }, + initial={v: 0, x: 1, t: 0} + ) + t_data = np.linspace(0, 10, 150) + noise = np.random.normal(1, 0.05, t_data.shape) + x_data = model(t=t_data, k=11).x * noise + v_data = model(t=t_data, k=11).v * noise + fit = sf.Fit(model, t=t_data, x=x_data, v=v_data) + result = fit.execute() + assert result.stdev(k) is not None + assert np.isfinite(result.stdev(k)) + + +def test_unequal_data(): + """ + Test to make sure finite differences work with data of unequal length. + """ + x_1, x_2, y_1, y_2 = sf.variables('x_1, x_2, y_1, y_2') + y0, a_1, a_2, b_1, b_2 = sf.parameters('y0, a_1, a_2, b_1, b_2') + + model = sf.Model({ + y_1: a_1 * x_1**2 + b_1 * x_1 + y0, + y_2: a_2 * x_2**2 + b_2 * x_2 + y0, + }) + + # Generate data from this model + xdata1 = np.linspace(0, 10) + xdata2 = xdata1[::2] # Only every other point. + + exact = model.eval_jacobian(x_1=xdata1, x_2=xdata2, + a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) + approx = model.finite_difference(x_1=xdata1, x_2=xdata2, + a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111, y0=10.8) + # First axis is the number of components + assert len(exact) == 2 + assert len(approx) == 2 + + # Second axis is the number of parameters, same for all components + for exact_comp, approx_comp, xdata in zip(exact, approx, [xdata1, xdata2]): + assert len(exact_comp) == len(model.params) + assert len(approx_comp) == len(model.params) + for exact_elem, approx_elem in zip(exact_comp, approx_comp): + assert exact_elem.shape == xdata.shape + assert approx_elem.shape == xdata.shape + + _assert_equal(exact, approx, rel=1e-4) + + model = sf.Model({ + y_1: a_1 * x_1**2 + b_1 * x_1, + y_2: a_2 * x_2**2 + b_2 * x_2, + }) + + exact = model.eval_jacobian(x_1=xdata1, x_2=xdata2, + a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111) + approx = model.finite_difference(x_1=xdata1, x_2=xdata2, + a_1=101.3, b_1=0.5, a_2=56.3, b_2=1.1111) + _assert_equal(exact, approx, rel=1e-4) + + model = sf.Model({ + y_1: a_1 * x_1**2 + b_1 * x_1, + }) + exact = model.eval_jacobian(x_1=xdata1, a_1=101.3, b_1=0.5) + approx = model.finite_difference(x_1=xdata1, a_1=101.3, b_1=0.5) + _assert_equal(exact, approx, rel=1e-4) + + +def test_harmonic_oscillator_errors(): + """ + Make sure the errors produced by fitting ODE's are the same as when + fitting an exact solution. + """ + x, v, t = sf.variables('x, v, t') + k = sf.Parameter(name='k', value=100) + m = 1 + a = -k/m * x + ode_model = sf.ODEModel({sf.D(v, t): a, + sf.D(x, t): v}, + initial={t: 0, v: 0, x: 1}) + + t_data = np.linspace(0, 10, 250) + np.random.seed(2) + noise = np.random.normal(1, 0.05, size=t_data.shape) + x_data = ode_model(t=t_data, k=100).x * noise + + ode_fit = sf.Fit(ode_model, t=t_data, x=x_data, v=None) + ode_result = ode_fit.execute() + + phi = 0 + A = 1 + model = sf.Model({x: A * sf.cos(sf.sqrt(k/m) * t + phi)}) + fit = sf.Fit(model, t=t_data, x=x_data) + result = fit.execute() + + assert result.value(k) == pytest.approx(ode_result.value(k), 1e-4) + assert result.stdev(k) == pytest.approx(ode_result.stdev(k), 1e-2) + assert result.stdev(k) >= ode_result.stdev(k) + + +def _assert_equal(exact, approx, **kwargs): + assert len(exact) == len(approx) + for exact_comp, approx_comp in zip(exact, approx): + assert approx_comp == pytest.approx(exact_comp, **kwargs) diff --git a/tests/test_fit_result.py b/tests/test_fit_result.py index 70b7ba41..07fc4356 100644 --- a/tests/test_fit_result.py +++ b/tests/test_fit_result.py @@ -1,10 +1,9 @@ from __future__ import division, print_function -import unittest +import pytest import pickle from collections import OrderedDict import numpy as np -from scipy.optimize import OptimizeResult from symfit import ( Variable, Parameter, Fit, FitResults, Eq, Ge, CallableNumericalModel, Model ) @@ -16,71 +15,70 @@ LogLikelihood, LeastSquares, VectorLeastSquares, MinimizeModel ) + def ge_constraint(a): # Has to be in the global namespace for pickle. return a - 1 -class TestFitResults(unittest.TestCase): - """ - Tests for the FitResults object. - """ - def setUp(self): + +class TestTestResult(): + + @classmethod + def setup_class(cls): xdata = np.linspace(1, 10, 10) ydata = 3 * xdata ** 2 - a = Parameter('a') - b = Parameter('b') + cls.a = Parameter('a') + cls.b = Parameter('b') + x = Variable('x') y = Variable('y') - model = Model({y: a * x ** b}) - self.params = [a, b] + model = Model({y: cls.a * x ** cls.b}) fit = Fit(model, x=xdata, y=ydata) - self.fit_result = fit.execute() + cls.fit_result = fit.execute() fit = Fit(model, x=xdata, y=ydata, minimizer=MINPACK) - self.minpack_result = fit.execute() + cls.minpack_result = fit.execute() fit = Fit(model, x=xdata, objective=LogLikelihood) - self.likelihood_result = fit.execute() + cls.likelihood_result = fit.execute() fit = Fit(model, x=xdata, y=ydata, minimizer=[BFGS, NelderMead]) - self.chained_result = fit.execute() + cls.chained_result = fit.execute() z = Variable('z') constraints = [ - Eq(a, b), + Eq(cls.a, cls.b), CallableNumericalModel.as_constraint( - {z: ge_constraint}, connectivity_mapping={z: {a}}, + {z: ge_constraint}, connectivity_mapping={z: {cls.a}}, constraint_type=Ge, model=model ) ] fit = Fit(model, x=xdata, y=ydata, constraints=constraints) - self.constrained_result = fit.execute() + cls.constrained_result = fit.execute() fit = Fit(model, x=xdata, y=ydata, constraints=constraints, minimizer=BasinHopping) - self.constrained_basinhopping_result = fit.execute() + cls.constrained_basinhopping_result = fit.execute() def test_params_type(self): - self.assertIsInstance(self.fit_result.params, OrderedDict) + assert isinstance(self.fit_result.params, OrderedDict) def test_minimizer_output_type(self): - self.assertIsInstance(self.fit_result.minimizer_output, dict) - self.assertIsInstance(self.minpack_result.minimizer_output, dict) - self.assertIsInstance(self.likelihood_result.minimizer_output, dict) + assert isinstance(self.fit_result.minimizer_output, dict) + assert isinstance(self.minpack_result.minimizer_output, dict) + assert isinstance(self.likelihood_result.minimizer_output, dict) def test_fitting(self): """ Test if the fitting worked in the first place. """ - a, b = self.params - fit_result = self.fit_result - self.assertIsInstance(fit_result, FitResults) - self.assertAlmostEqual(fit_result.value(a), 3.0) - self.assertAlmostEqual(fit_result.value(b), 2.0) + assert isinstance(self.fit_result, FitResults) + assert self.fit_result.value(self.a) == pytest.approx(3.0) + assert self.fit_result.value(self.b) == pytest.approx(2.0) - self.assertIsInstance(fit_result.stdev(a), float) - self.assertIsInstance(fit_result.stdev(b), float) + assert isinstance(self.fit_result.stdev(self.a), float) + assert isinstance(self.fit_result.stdev(self.b), float) - self.assertIsInstance(fit_result.r_squared, float) + assert isinstance(self.fit_result.r_squared, float) # by definition since there's no fuzzyness - self.assertEqual(fit_result.r_squared, 1.0) + assert self.fit_result.r_squared == 1.0 def test_fitting_2(self): np.random.seed(43) @@ -133,38 +131,36 @@ def test_fitting_2(self): fit = Fit(model, xx, yy, ydata) fit_result = fit.execute() - self.assertGreater(fit_result.r_squared, 0.95) + assert fit_result.r_squared > 0.95 for param in fit.model.params: try: - self.assertAlmostEqual(fit_result.stdev(param)**2 / fit_result.variance(param), 1.0) + assert fit_result.stdev(param)**2 == pytest.approx(fit_result.variance(param)) except AssertionError: - self.assertLessEqual(fit_result.variance(param), 0.0) - self.assertTrue(np.isnan(fit_result.stdev(param))) + assert fit_result.variance(param) <= 0.0 + assert np.isnan(fit_result.stdev(param)) # Covariance matrix should be symmetric for param_1 in fit.model.params: for param_2 in fit.model.params: - self.assertAlmostEqual(fit_result.covariance(param_1, param_2) / fit_result.covariance(param_2, param_1), 1.0, 3) + assert fit_result.covariance(param_1, param_2) == pytest.approx(fit_result.covariance(param_2, param_1), rel=1e-3) def test_minimizer_included(self): """"The minimizer used should be included in the results.""" - self.assertIsInstance(self.constrained_result.minimizer, BaseMinimizer) - self.assertIsInstance(self.constrained_basinhopping_result.minimizer, - BaseMinimizer) - self.assertIsInstance(self.likelihood_result.minimizer, BaseMinimizer) - self.assertIsInstance(self.fit_result.minimizer, BaseMinimizer) - self.assertIsInstance(self.chained_result.minimizer, ChainedMinimizer) - for minimizer, cls in zip(self.chained_result.minimizer.minimizers, - [BFGS, NelderMead]): - self.assertIsInstance(minimizer, cls) + assert isinstance(self.constrained_result.minimizer, BaseMinimizer) + assert isinstance(self.constrained_basinhopping_result.minimizer, BaseMinimizer) + assert isinstance(self.likelihood_result.minimizer, BaseMinimizer) + assert isinstance(self.fit_result.minimizer, BaseMinimizer) + assert isinstance(self.chained_result.minimizer, ChainedMinimizer) + for minimizer, cls in zip(self.chained_result.minimizer.minimizers, [BFGS, NelderMead]): + assert isinstance(minimizer, cls) def test_objective_included(self): """"The objective used should be included in the results.""" - self.assertIsInstance(self.fit_result.objective, LeastSquares) - self.assertIsInstance(self.minpack_result.objective, VectorLeastSquares) - self.assertIsInstance(self.likelihood_result.objective, LogLikelihood) - self.assertIsInstance(self.constrained_result.objective, LeastSquares) - self.assertIsInstance(self.constrained_basinhopping_result.objective, LeastSquares) + assert isinstance(self.fit_result.objective, LeastSquares) + assert isinstance(self.minpack_result.objective, VectorLeastSquares) + assert isinstance(self.likelihood_result.objective, LogLikelihood) + assert isinstance(self.constrained_result.objective, LeastSquares) + assert isinstance(self.constrained_basinhopping_result.objective, LeastSquares) def test_constraints_included(self): """ @@ -172,21 +168,18 @@ def test_constraints_included(self): we can easily print their compliance. """ # For a constrained fit we expect a list of MinimizeModel objectives. - for constrained_result in [self.constrained_result, - self.constrained_basinhopping_result]: - self.assertIsInstance(constrained_result.constraints, list) - for constraint in constrained_result.constraints: - self.assertIsInstance(constraint, MinimizeModel) + for constrained_result in [self.constrained_result, self.constrained_basinhopping_result]: + assert isinstance(constrained_result.constraints, list) + for constraint in self.constrained_result.constraints: + assert isinstance(constraint, MinimizeModel) def test_message_included(self): """Status message should be included.""" - self.assertIsInstance(self.fit_result.status_message, str) - self.assertIsInstance(self.minpack_result.status_message, str) - self.assertIsInstance(self.likelihood_result.status_message, str) - self.assertIsInstance(self.constrained_result.status_message, str) - self.assertIsInstance( - self.constrained_basinhopping_result.status_message, str - ) + assert isinstance(self.fit_result.status_message, str) + assert isinstance(self.minpack_result.status_message, str) + assert isinstance(self.likelihood_result.status_message, str) + assert isinstance(self.constrained_result.status_message, str) + assert isinstance(self.constrained_basinhopping_result.status_message, str) def test_pickle(self): for fit_result in [self.fit_result, self.chained_result, @@ -194,39 +187,33 @@ def test_pickle(self): self.constrained_result, self.likelihood_result]: dumped = pickle.dumps(fit_result) new_result = pickle.loads(dumped) - self.assertEqual(sorted(fit_result.__dict__.keys()), - sorted(new_result.__dict__.keys())) + assert sorted(fit_result.__dict__.keys()) == sorted(new_result.__dict__.keys()) for k, v1 in fit_result.__dict__.items(): v2 = new_result.__dict__[k] if k == 'minimizer': - self.assertEqual(type(v1), type(v2)) + assert type(v1) == type(v2) elif k != 'minimizer_output': # Ignore minimizer_output if isinstance(v1, np.ndarray): - np.testing.assert_almost_equal(v1, v2) - else: - self.assertEqual(v1, v2) + assert v1 == pytest.approx(v2, nan_ok=True) def test_gof_presence(self): """ Test if the expected goodness of fit estimators are present. """ - self.assertTrue(hasattr(self.fit_result, 'objective_value')) - self.assertTrue(hasattr(self.fit_result, 'r_squared')) - self.assertTrue(hasattr(self.fit_result, 'chi_squared')) - self.assertFalse(hasattr(self.fit_result, 'log_likelihood')) - self.assertFalse(hasattr(self.fit_result, 'likelihood')) - - self.assertTrue(hasattr(self.minpack_result, 'objective_value')) - self.assertTrue(hasattr(self.minpack_result, 'r_squared')) - self.assertTrue(hasattr(self.minpack_result, 'chi_squared')) - self.assertFalse(hasattr(self.minpack_result, 'log_likelihood')) - self.assertFalse(hasattr(self.minpack_result, 'likelihood')) - - self.assertTrue(hasattr(self.likelihood_result, 'objective_value')) - self.assertFalse(hasattr(self.likelihood_result, 'r_squared')) - self.assertFalse(hasattr(self.likelihood_result, 'chi_squared')) - self.assertTrue(hasattr(self.likelihood_result, 'log_likelihood')) - self.assertTrue(hasattr(self.likelihood_result, 'likelihood')) - -if __name__ == '__main__': - unittest.main() + assert hasattr(self.fit_result, 'objective_value') + assert hasattr(self.fit_result, 'r_squared') + assert hasattr(self.fit_result, 'chi_squared') + assert not hasattr(self.fit_result, 'log_likelihood') + assert not hasattr(self.fit_result, 'likelihood') + + assert hasattr(self.minpack_result, 'objective_value') + assert hasattr(self.minpack_result, 'r_squared') + assert hasattr(self.minpack_result, 'chi_squared') + assert not hasattr(self.minpack_result, 'log_likelihood') + assert not hasattr(self.minpack_result, 'likelihood') + + assert hasattr(self.likelihood_result, 'objective_value') + assert not hasattr(self.likelihood_result, 'r_squared') + assert not hasattr(self.likelihood_result, 'chi_squared') + assert hasattr(self.likelihood_result, 'log_likelihood') + assert hasattr(self.likelihood_result, 'likelihood') diff --git a/tests/test_general.py b/tests/test_general.py index 96d02d10..18a13be5 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -1,17 +1,14 @@ from __future__ import division, print_function import sys -import sympy -import warnings -import unittest import numpy as np import scipy.stats -from scipy.optimize import curve_fit, minimize +from scipy.optimize import curve_fit import pytest from symfit import ( Variable, Parameter, Fit, FitResults, log, variables, - parameters, Model, Eq, Ge, exp, integrate, oo, GradientModel + parameters, Model, exp, integrate, oo, GradientModel ) from symfit.core.minimizers import ( MINPACK, LBFGSB, BoundedMinimizer, DifferentialEvolution, BaseMinimizer, @@ -27,899 +24,932 @@ import funcsigs as inspect_sig -class Tests(unittest.TestCase): - @classmethod - def setUpClass(cls): - np.random.seed(0) - - def test_callable(self): - """ - Make sure that symfit expressions are callable (with scalars and - arrays), and produce the expected results. - """ - a, b = parameters('a, b') - x, y = variables('x, y') - func = a*x**2 + b*y**2 - result = func(x=2, y=3, a=3, b=9) - self.assertEqual(result, 3*2**2 + 9*3**2) - result = func(2, 3, a=3, b=9) - self.assertEqual(result, 3*2**2 + 9*3**2) - - xdata = np.arange(1, 10) - ydata = np.arange(1, 10) - result = func(x=ydata, y=ydata, a=3, b=9) - self.assertTrue(np.array_equal(result, 3*xdata**2 + 9*ydata**2)) - - def test_named_fitting(self): - """ - Make sure that fitting with NumericalLeastSquares works using a dict - as model and that the resulting fit_result is of the right type. - """ - xdata = np.linspace(1, 10, 10) - ydata = 3*xdata**2 - - a = Parameter(value=1.0) - b = Parameter(value=2.5) - x, y = variables('x, y') - - model = {y: a*x**b} - - fit = Fit(model, x=xdata, y=ydata, minimizer=MINPACK) - fit_result = fit.execute() - self.assertIsInstance(fit_result, FitResults) - self.assertAlmostEqual(fit_result.value(a), 3.0) - self.assertAlmostEqual(fit_result.value(b), 2.0) - - def test_backwards_compatible_fitting(self): - """ - In 0.4.2 we replaced the usage of inspect by automatically generated - names. This can cause problems for users using named variables to call - fit. - """ - xdata = np.linspace(1, 10, 10) - ydata = 3*xdata**2 - - a = Parameter(value=1.0) - b = Parameter(value=2.5) - - y = Variable('y') - - with pytest.warns(DeprecationWarning): - x = Variable() - - model = {y: a*x**b} - - with self.assertRaises(TypeError): - # The name of x is not x. - fit = Fit(model, x=xdata, y=ydata) - - def test_vector_fitting(self): - """ - Tests fitting to a 3 component vector valued function, without bounds - or guesses. - """ - a, b, c = parameters('a, b, c') - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = {a_i: a, b_i: b, c_i: c} - - xdata = np.array([ - [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], - [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], - [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], - ]) - - fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - minimizer = MINPACK - ) - fit_result = fit.execute() +def setup_method(): + np.random.seed(0) - self.assertAlmostEqual(fit_result.value(a) / 9.985691, 1.0, 5) - self.assertAlmostEqual(fit_result.value(b) / 1.006143e+02, 1.0, 4) - self.assertAlmostEqual(fit_result.value(c) / 7.085713e+01, 1.0, 5) - - def test_vector_none_fitting(self): - """ - Fit to a 3 component vector valued function with one variables data set - to None, without bounds or guesses. - """ - a, b, c = parameters('a, b, c') - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = {a_i: a, b_i: b, c_i: c} - - xdata = np.array([ - [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], - [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], - [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], - ]) - - fit_none = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=None, - minimizer=MINPACK - ) - fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - minimizer=MINPACK - ) - fit_none_result = fit_none.execute() - fit_result = fit.execute() - self.assertAlmostEqual(fit_none_result.value(a), fit_result.value(a), 4) - self.assertAlmostEqual(fit_none_result.value(b), fit_result.value(b), 4) - # the parameter without data should be unchanged. - self.assertAlmostEqual(fit_none_result.value(c), 1.0) - - def test_vector_fitting_guess(self): - """ - Tests fitting to a 3 component vector valued function, with guesses. - """ - a, b, c = parameters('a, b, c') - a.value = 10 - b.value = 100 - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = {a_i: a, b_i: b, c_i: c} - - xdata = np.array([ - [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], - [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], - [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], - ]) - - fit = Fit( - model=model, - a_i=xdata[0], - b_i=xdata[1], - c_i=xdata[2], - minimizer = MINPACK - ) - fit_result = fit.execute() +def test_callable(): + """ + Make sure that symfit expressions are callable (with scalars and + arrays), and produce the expected results. + """ + a, b = parameters('a, b') + x, y = variables('x, y') + func = a*x**2 + b*y**2 + result = func(x=2, y=3, a=3, b=9) + assert result == 3*2**2 + 9*3**2 + result = func(2, 3, a=3, b=9) + assert result == 3*2**2 + 9*3**2 - self.assertAlmostEqual(fit_result.value(a), np.mean(xdata[0]), 4) - self.assertAlmostEqual(fit_result.value(b), np.mean(xdata[1]), 4) - self.assertAlmostEqual(fit_result.value(c), np.mean(xdata[2]), 4) - - def test_fitting(self): - """ - Tests fitting with NumericalLeastSquares. Makes sure that the resulting - objects and values are of the right type, and that the fit_result does - not have unexpected members. - """ - xdata = np.linspace(1, 10, 10) - ydata = 3*xdata**2 - - a = Parameter() # 3.1, min=2.5, max=3.5 - b = Parameter() - x = Variable() - new = a*x**b + xdata = np.arange(1, 10) + ydata = np.arange(1, 10) + result = func(x=ydata, y=ydata, a=3, b=9) + assert np.array_equal(result, 3*xdata**2 + 9*ydata**2) - fit = Fit(new, xdata, ydata, minimizer=MINPACK) - fit_result = fit.execute() - self.assertIsInstance(fit_result, FitResults) - self.assertAlmostEqual(fit_result.value(a), 3.0) - self.assertAlmostEqual(fit_result.value(b), 2.0) - - self.assertIsInstance(fit_result.stdev(a), float) - self.assertIsInstance(fit_result.stdev(b), float) - - self.assertIsInstance(fit_result.r_squared, float) - self.assertEqual(fit_result.r_squared, 1.0) # by definition since there's no fuzzyness - - def test_grid_fitting(self): - """ - Tests fitting a scalar function with 2 independent variables. - """ - xdata = np.arange(-5, 5, 1) - ydata = np.arange(5, 15, 1) - xx, yy = np.meshgrid(xdata, ydata, sparse=False) - - zdata = (2.5*xx**2 + 3.0*yy**2) - - a = Parameter(value=2.5, max=2.75) - b = Parameter(value=3.0, min=2.75) - x = Variable('x') - y = Variable('y') - z = Variable('z') - new = {z: a*x**2 + b*y**2} - - fit = Fit(new, x=xx, y=yy, z=zdata) - results = fit.execute() - - self.assertIsInstance(fit.minimizer, LBFGSB) - - self.assertAlmostEqual(results.value(a), 2.5) - self.assertAlmostEqual(results.value(b), 3.) - - # TODO: Should be 3 tests? - def test_model_callable(self): - """ - Tests if Model objects are callable in the way expected. Calling a - model should evaluate it's expression(s) with the given values. The - return value is a namedtuple. - - The signature should also work so inspection is saved. - """ - a, b = parameters('a, b') - x, y = variables('x, y') - new = a*x**2 + b*y**2 - model = Model(new) - ans = model(3, 3, 2, 2) - self.assertIsInstance(ans, tuple) - z, = ans - - self.assertEqual(z, 36) - for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): - self.assertEqual(arg_name, name) - - # From Model __init__ directly - model = Model([ - a*x**2, - 4*b*y**2, - a*x**2 + b*y**2 - ]) - z_1, z_2, z_3 = model(3, 3, 2, 2) - - self.assertEqual(z_1, 18) - self.assertEqual(z_2, 72) - self.assertEqual(z_3, 36) - for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): - self.assertEqual(arg_name, name) - - # From dict - z_1, z_2, z_3 = variables('z_1, z_2, z_3') - model = Model({ - z_1: a*x**2, - z_2: 4*b*y**2, - z_3: a*x**2 + b*y**2 - }) - z_1, z_2, z_3 = model(3, 3, 2, 2) +def test_named_fitting(): + """ + Make sure that fitting with NumericalLeastSquares works using a dict + as model and that the resulting fit_result is of the right type. + """ + xdata = np.linspace(1, 10, 10) + ydata = 3*xdata**2 - self.assertEqual(z_1, 18) - self.assertEqual(z_2, 72) - self.assertEqual(z_3, 36) - for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): - self.assertEqual(arg_name, name) + a = Parameter(value=1.0) + b = Parameter(value=2.5) + x, y = variables('x, y') - def test_2D_fitting(self): - """ - Makes sure that a scalar model with 2 independent variables has the - proper signature, and that the fit result is of the correct type. - """ - xdata = np.random.randint(-10, 11, size=(2, 400)) - zdata = 2.5*xdata[0]**2 + 7.0*xdata[1]**2 + model = {y: a*x**b} - a = Parameter('a') - b = Parameter('b') - x = Variable('x') - y = Variable('y') - new = a*x**2 + b*y**2 + fit = Fit(model, x=xdata, y=ydata, minimizer=MINPACK) + fit_result = fit.execute() + assert isinstance(fit_result, FitResults) + assert fit_result.value(a) == pytest.approx(3.0) + assert fit_result.value(b) == pytest.approx(2.0) - fit = Fit(new, xdata[0], xdata[1], zdata) - result = fit.model(xdata[0], xdata[1], 2, 3) - self.assertIsInstance(result, tuple) +def test_backwards_compatible_fitting(): + """ + In 0.4.2 we replaced the usage of inspect by automatically generated + names. This can cause problems for users using named variables to call + fit. + """ + xdata = np.linspace(1, 10, 10) + ydata = 3*xdata**2 - for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(fit.model).parameters): - self.assertEqual(arg_name, name) + a = Parameter(value=1.0) + b = Parameter(value=2.5) - fit_result = fit.execute() - self.assertIsInstance(fit_result, FitResults) - - def test_gaussian_fitting(self): - """ - Tests fitting to a gaussian function and fit_result.params unpacking. - """ - xdata = 2*np.random.rand(10000) - 1 # random betwen [-1, 1] - ydata = 5.0 * scipy.stats.norm.pdf(xdata, loc=0.0, scale=1.0) - - x0 = Parameter('x0') - sig = Parameter('sig') - A = Parameter('A') - x = Variable('x') - g = GradientModel(A * Gaussian(x, x0, sig)) - - fit = Fit(g, xdata, ydata) - self.assertIsInstance(fit.objective, LeastSquares) - fit_result = fit.execute() + y = Variable('y') - self.assertAlmostEqual(fit_result.value(A), 5.0) - self.assertAlmostEqual(np.abs(fit_result.value(sig)), 1.0) - self.assertAlmostEqual(fit_result.value(x0), 0.0) - # raise Exception([i for i in fit_result.params]) - sexy = g(x=2.0, **fit_result.params) - ugly = g( - x=2.0, - x0=fit_result.value(x0), - A=fit_result.value(A), - sig=fit_result.value(sig), - ) - self.assertEqual(sexy, ugly) - - def test_2_gaussian_2d_fitting(self): - """ - Tests fitting to a scalar gaussian with 2 independent variables with - tight bounds. - """ - mean = (0.3, 0.4) # x, y mean 0.6, 0.4 - cov = [[0.01**2, 0], [0, 0.01**2]] - data = np.random.multivariate_normal(mean, cov, 3000000) - mean = (0.7, 0.8) # x, y mean 0.6, 0.4 - cov = [[0.01**2, 0], [0, 0.01**2]] - data_2 = np.random.multivariate_normal(mean, cov, 3000000) - data = np.vstack((data, data_2)) - - # Insert them as y,x here as np fucks up cartesian conventions. - ydata, xedges, yedges = np.histogram2d(data[:, 1], data[:, 0], bins=100, - range=[[0.0, 1.0], [0.0, 1.0]]) - xcentres = (xedges[:-1] + xedges[1:]) / 2 - ycentres = (yedges[:-1] + yedges[1:]) / 2 - - # Make a valid grid to match ydata - xx, yy = np.meshgrid(xcentres, ycentres, sparse=False) - # xdata = np.dstack((xx, yy)).T - - x = Variable('x') - y = Variable('y') - - x0_1 = Parameter(value=0.7, min=0.6, max=0.9) - sig_x_1 = Parameter(value=0.1, min=0.0, max=0.2) - y0_1 = Parameter(value=0.8, min=0.6, max=0.9) - sig_y_1 = Parameter(value=0.1, min=0.0, max=0.2) - A_1 = Parameter() - g_1 = A_1 * Gaussian(x, x0_1, sig_x_1) * Gaussian(y, y0_1, sig_y_1) - - x0_2 = Parameter(value=0.3, min=0.2, max=0.5) - sig_x_2 = Parameter(value=0.1, min=0.0, max=0.2) - y0_2 = Parameter(value=0.4, min=0.2, max=0.5) - sig_y_2 = Parameter(value=0.1, min=0.0, max=0.2) - A_2 = Parameter() - g_2 = A_2 * Gaussian(x, x0_2, sig_x_2) * Gaussian(y, y0_2, sig_y_2) - - model = GradientModel(g_1 + g_2) - fit = Fit(model, xx, yy, ydata) - fit_result = fit.execute() - - self.assertIsInstance(fit.minimizer, LBFGSB) - - img = model(x=xx, y=yy, **fit_result.params)[0] - img_g_1 = g_1(x=xx, y=yy, **fit_result.params) - img_g_2 = g_2(x=xx, y=yy, **fit_result.params) - np.testing.assert_array_equal(img, img_g_1 + img_g_2) - - # Equal up to some precision. Not much obviously. - self.assertAlmostEqual(fit_result.value(x0_1), 0.7, 3) - self.assertAlmostEqual(fit_result.value(y0_1), 0.8, 3) - self.assertAlmostEqual(fit_result.value(x0_2), 0.3, 3) - self.assertAlmostEqual(fit_result.value(y0_2), 0.4, 3) - - def test_gaussian_2d_fitting(self): - """ - Tests fitting to a scalar gaussian function with 2 independent - variables. - """ - mean = (0.6, 0.4) # x, y mean 0.6, 0.4 - cov = [[0.2**2, 0], [0, 0.1**2]] - - data = np.random.multivariate_normal(mean, cov, 1000000) - - # Insert them as y,x here as np fucks up cartesian conventions. - ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, - range=[[0.0, 1.0], [0.0, 1.0]]) - xcentres = (xedges[:-1] + xedges[1:]) / 2 - ycentres = (yedges[:-1] + yedges[1:]) / 2 - - # Make a valid grid to match ydata - xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') - - x0 = Parameter(value=mean[0]) - sig_x = Parameter(min=0.0) - x = Variable('x') - y0 = Parameter(value=mean[1]) - sig_y = Parameter(min=0.0) - A = Parameter(min=1, value=100) - y = Variable('y') - g = Variable('g') -# g = A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y) - model = Model({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) - fit = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(x0), np.mean(data[:, 0]), 1) - self.assertAlmostEqual(fit_result.value(y0), np.mean(data[:, 1]), 1) - self.assertAlmostEqual(np.abs(fit_result.value(sig_x)), np.std(data[:, 0]), 1) - self.assertAlmostEqual(np.abs(fit_result.value(sig_y)), np.std(data[:, 1]), 1) - self.assertGreaterEqual(fit_result.r_squared, 0.99) - - def test_jacobian_matrix(self): - """ - The jacobian matrix of a model should be a 2D list (matrix) containing - all the partial derivatives. - """ - a, b, c = parameters('a, b, c') - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = Model({a_i: 2 * a + 3 * b, b_i: 5 * b, c_i: 7 * c}) - self.assertEqual([[2, 3, 0], [0, 5, 0], [0, 0, 7]], model.jacobian) - - def test_hessian_matrix(self): - """ - The Hessian matrix of a model should be a 3D list (matrix) containing - all the 2nd partial derivatives. - """ - a, b, c = parameters('a, b, c') - a_i, b_i, c_i = variables('a_i, b_i, c_i') - - model = Model({a_i: 2 * a**2 + 3 * b, b_i: 5 * b**2, c_i: 7 * c*b}) - self.assertEqual([[[4, 0, 0], [0, 0, 0], [0, 0, 0]], - [[0, 0, 0], [0, 10, 0], [0, 0, 0]], - [[0, 0, 0], [0, 0, 7], [0, 7, 0]]], model.hessian) - - def test_likelihood_fitting_exponential(self): - """ - Fit using the likelihood method. - """ - b = Parameter('b', value=4, min=3.0) - x, y = variables('x, y') - pdf = {y: Exp(x, 1/b)} - - # Draw points from an Exp(5) exponential distribution. - np.random.seed(100) - xdata = np.random.exponential(5, 1000000) - - # Expected parameter values - mean = np.mean(xdata) - stdev = np.std(xdata) - mean_stdev = stdev / np.sqrt(len(xdata)) - - with self.assertRaises(TypeError): - fit = Fit(pdf, x=xdata, sigma_y=2.0, objective=LogLikelihood) - fit = Fit(pdf, xdata, objective=LogLikelihood) - fit_result = fit.execute() - pdf_i = fit.model(x=xdata, **fit_result.params).y # probabilities - likelihood = np.product(pdf_i) - loglikelihood = np.sum(np.log(pdf_i)) - - self.assertAlmostEqual(fit_result.value(b) / mean, 1, 3) - self.assertAlmostEqual(fit_result.value(b) / stdev, 1, 3) - self.assertAlmostEqual(fit_result.stdev(b) / mean_stdev, 1, 3) - - self.assertAlmostEqual(likelihood, fit_result.likelihood) - self.assertAlmostEqual(loglikelihood, fit_result.log_likelihood) - - def test_likelihood_fitting_gaussian(self): - """ - Fit using the likelihood method. - """ - mu, sig = parameters('mu, sig') - sig.min = 0.01 - sig.value = 3.0 - mu.value = 50. + with pytest.warns(DeprecationWarning): x = Variable() - pdf = GradientModel(Gaussian(x, mu, sig)) - - np.random.seed(10) - xdata = np.random.normal(51., 3.5, 10000) - - # Expected parameter values - mean = np.mean(xdata) - stdev = np.std(xdata) - mean_stdev = stdev/np.sqrt(len(xdata)) - - fit = Fit(pdf, xdata, objective=LogLikelihood) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(mu) / mean, 1, 6) - self.assertAlmostEqual(fit_result.stdev(mu) / mean_stdev, 1, 3) - self.assertAlmostEqual(fit_result.value(sig) / np.std(xdata), 1, 6) - - def test_likelihood_fitting_bivariate_gaussian(self): - """ - Fit using the likelihood method. - """ - # Make variables and parameters - x = Variable('x') - y = Variable('y') - x0 = Parameter('x0', value=0.6, min=0.5, max=0.7) - sig_x = Parameter('sig_x', value=0.1, max=1.0) - y0 = Parameter('y0', value=0.7, min=0.6, max=0.9) - sig_y = Parameter('sig_y', value=0.05, max=1.0) - rho = Parameter('rho', value=0.001, min=-1, max=1) - - pdf = BivariateGaussian(x=x, mu_x=x0, sig_x=sig_x, y=y, mu_y=y0, - sig_y=sig_y, rho=rho) - - # Draw 100000 samples from a bivariate distribution - mean = [0.59, 0.8] - r = 0.6 - cov = np.array([[0.11 ** 2, 0.11 * 0.23 * r], - [0.11 * 0.23 * r, 0.23 ** 2]]) - np.random.seed(42) - xdata, ydata = np.random.multivariate_normal(mean, cov, 100000).T - - fit = Fit(pdf, x=xdata, y=ydata, objective=LogLikelihood) - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(x0) / mean[0], 1, 2) - self.assertAlmostEqual(fit_result.value(y0) / mean[1], 1, 2) - self.assertAlmostEqual(fit_result.value(sig_x) / np.sqrt(cov[0, 0]), 1, 2) - self.assertAlmostEqual(fit_result.value(sig_y) / np.sqrt(cov[1, 1]), 1, 2) - self.assertAlmostEqual(fit_result.value(rho) / r, 1, 2) - - marginal = integrate(pdf, (y, -oo, oo), conds='none') - fit = Fit(marginal, x=xdata, objective=LogLikelihood) - with self.assertRaises(NameError): - # Should raise a NameError, not a TypeError, see #219 - fit.execute() - - def test_evaluate_model(self): - """ - Makes sure that models are callable and give the expected answer. - """ - A = Parameter('A') - x = Variable('x') - new = A * x ** 2 - - self.assertEqual(new(x=2, A=2), 8) - self.assertNotEqual(new(x=2, A=3), 8) - - def test_simple_sigma(self): - """ - Make sure we produce the same results as scipy's curve_fit, with and - without sigmas, and compare the results of both to a known value. - """ - t_data = np.array([1.4, 2.1, 2.6, 3.0, 3.3]) - y_data = np.array([10, 20, 30, 40, 50]) - - sigma = 0.2 - n = np.array([5, 3, 8, 15, 30]) - sigma_t = sigma / np.sqrt(n) - - # We now define our model - y = Variable('x') - g = Parameter('g') - t_model = (2 * y / g)**0.5 - - fit = Fit(t_model, y_data, t_data) # , sigma=sigma_t) - fit_result = fit.execute() - - # h_smooth = np.linspace(0,60,100) - # t_smooth = t_model(y=h_smooth, **fit_result.params) - - # Lets with the results from curve_fit, no weights - popt_noweights, pcov_noweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data) - - self.assertAlmostEqual(fit_result.value(g), popt_noweights[0]) - self.assertAlmostEqual(fit_result.stdev(g), np.sqrt(pcov_noweights[0, 0]), 6) - - # Same sigma everywere - fit = Fit(t_model, y_data, t_data, 0.0031, absolute_sigma=False) - fit_result = fit.execute() - popt_sameweights, pcov_sameweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=0.0031*np.ones(len(y_data)), absolute_sigma=False) - self.assertAlmostEqual(fit_result.value(g), popt_sameweights[0], 4) - self.assertAlmostEqual(fit_result.stdev(g), np.sqrt(pcov_sameweights[0, 0]), 4) - # Same weight everywere should be the same as no weight when absolute_sigma=False - self.assertAlmostEqual(fit_result.value(g), popt_noweights[0], 4) - self.assertAlmostEqual(fit_result.stdev(g), np.sqrt(pcov_noweights[0, 0]), 4) - - # Different sigma for every point - fit = Fit(t_model, y_data, t_data, 0.1*sigma_t, absolute_sigma=False) - fit_result = fit.execute() - popt, pcov = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=.1*sigma_t) - - self.assertAlmostEqual(fit_result.value(g), popt[0]) - self.assertAlmostEqual(fit_result.stdev(g), np.sqrt(pcov[0, 0]), 6) - - # according to Mathematica - self.assertAlmostEqual(fit_result.value(g), 9.095, 3) - self.assertAlmostEqual(fit_result.stdev(g), 0.102, 3) - - def test_error_advanced(self): - """ - Models an example from the mathematica docs and try's to replicate it - using both symfit and scipy's curve_fit. - http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html - """ - data = [ - [0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6], - [1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.], - [7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4], - [4.7, 8.4, 10.] - ] - xdata, ydata, zdata = [np.array(data) for data in zip(*data)] - xy = np.vstack((xdata, ydata)) - errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2]) - - # raise Exception(xy, z) - a = Parameter(value=3.0) - b = Parameter(value=0.9) - c = Parameter(value=5) - x = Variable('x') - y = Variable('y') - z = Variable('z') - model = {z: a * log(b * x + c * y)} - - # Use a gradient model because Mathematica uses the Hessian - # approximation instead of the exact Hessian. - model = GradientModel(model) - fit = Fit(model, x=xdata, y=ydata, z=zdata, absolute_sigma=False) - fit_result = fit.execute() - # Same as Mathematica default behavior. - self.assertAlmostEqual(fit_result.value(a) / 2.9956, 1, 4) - self.assertAlmostEqual(fit_result.value(b) / 0.563212, 1, 4) - self.assertAlmostEqual(fit_result.value(c) / 3.59732, 1, 4) - self.assertAlmostEqual(fit_result.stdev(a) / 0.278304, 1, 4) - self.assertAlmostEqual(fit_result.stdev(b) / 0.224107, 1, 4) - self.assertAlmostEqual(fit_result.stdev(c) / 0.980352, 1, 4) + model = {y: a*x**b} + + with pytest.raises(TypeError): + fit = Fit(model, x=xdata, y=ydata) + + +def test_vector_fitting(): + """ + Tests fitting to a 3 component vector valued function, without bounds + or guesses. + """ + a, b, c = parameters('a, b, c') + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = {a_i: a, b_i: b, c_i: c} + + xdata = np.array([ + [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], + [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], + [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], + ]) + + fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + minimizer=MINPACK + ) + fit_result = fit.execute() + + assert fit_result.value(a) == pytest.approx(np.mean(xdata[0]), 1e-5) + assert fit_result.value(b) == pytest.approx(np.mean(xdata[1]), 1e-4) + assert fit_result.value(c) == pytest.approx(np.mean(xdata[2]), 1e-5) + + +def test_vector_none_fitting(): + """ + Fit to a 3 component vector valued function with one variables data set + to None, without bounds or guesses. + """ + a, b, c = parameters('a, b, c') + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = {a_i: a, b_i: b, c_i: c} + + xdata = np.array([ + [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], + [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], + [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], + ]) + + fit_none = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=None, + minimizer=MINPACK + ) + fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + minimizer=MINPACK + ) + fit_none_result = fit_none.execute() + fit_result = fit.execute() + + assert fit_none_result.value(b) == pytest.approx(fit_result.value(b), 1e-4) + assert fit_none_result.value(a) == pytest.approx(fit_result.value(a), 1e-4) + # the parameter without data should be unchanged. + assert fit_none_result.value(c) == pytest.approx(1.0) + + +def test_vector_fitting_guess(): + """ + Tests fitting to a 3 component vector valued function, with guesses. + """ + a, b, c = parameters('a, b, c') + a.value = 10 + b.value = 100 + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = {a_i: a, b_i: b, c_i: c} + + xdata = np.array([ + [10.1, 9., 10.5, 11.2, 9.5, 9.6, 10.], + [102.1, 101., 100.4, 100.8, 99.2, 100., 100.8], + [71.6, 73.2, 69.5, 70.2, 70.8, 70.6, 70.1], + ]) + + fit = Fit( + model=model, + a_i=xdata[0], + b_i=xdata[1], + c_i=xdata[2], + minimizer=MINPACK + ) + fit_result = fit.execute() + + assert fit_result.value(a) == pytest.approx(np.mean(xdata[0]), 1e-4) + assert fit_result.value(b) == pytest.approx(np.mean(xdata[1]), 1e-4) + assert fit_result.value(c) == pytest.approx(np.mean(xdata[2]), 1e-4) + + +def test_fitting(): + """ + Tests fitting with NumericalLeastSquares. Makes sure that the resulting + objects and values are of the right type, and that the fit_result does + not have unexpected members. + """ + xdata = np.linspace(1, 10, 10) + ydata = 3*xdata**2 + + a = Parameter() # 3.1, min=2.5, max=3.5 + b = Parameter() + x = Variable() + new = a*x**b + + fit = Fit(new, xdata, ydata, minimizer=MINPACK) + + fit_result = fit.execute() + assert isinstance(fit_result, FitResults) + assert fit_result.value(a) == pytest.approx(3.0) + assert fit_result.value(b) == pytest.approx(2.0) + + assert isinstance(fit_result.stdev(a), float) + assert isinstance(fit_result.stdev(b), float) + + assert isinstance(fit_result.r_squared, float) + assert fit_result.r_squared == 1.0 # by definition since there's no fuzzyness + + +def test_grid_fitting(): + """ + Tests fitting a scalar function with 2 independent variables. + """ + xdata = np.arange(-5, 5, 1) + ydata = np.arange(5, 15, 1) + xx, yy = np.meshgrid(xdata, ydata, sparse=False) + + zdata = (2.5*xx**2 + 3.0*yy**2) + + a = Parameter(value=2.5, max=2.75) + b = Parameter(value=3.0, min=2.75) + x = Variable('x') + y = Variable('y') + z = Variable('z') + new = {z: a*x**2 + b*y**2} + + fit = Fit(new, x=xx, y=yy, z=zdata) + results = fit.execute() + + assert isinstance(fit.minimizer, LBFGSB) + + assert results.value(a) == pytest.approx(2.5) + assert results.value(b) == pytest.approx(3.) + +# TODO: Should be 3 tests? + + +def test_model_callable(): + """ + Tests if Model objects are callable in the way expected. Calling a + model should evaluate it's expression(s) with the given values. The + return value is a namedtuple. + + The signature should also work so inspection is saved. + """ + a, b = parameters('a, b') + x, y = variables('x, y') + new = a*x**2 + b*y**2 + model = Model(new) + ans = model(3, 3, 2, 2) + assert isinstance(ans, tuple) + z, = ans + + assert z == 36 + for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): + assert arg_name == name + + # From Model __init__ directly + model = Model([ + a*x**2, + 4*b*y**2, + a*x**2 + b*y**2 + ]) + z_1, z_2, z_3 = model(3, 3, 2, 2) + + assert z_1 == 18 + assert z_2 == 72 + assert z_3 == 36 + for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): + assert arg_name == name + + # From dict + z_1, z_2, z_3 = variables('z_1, z_2, z_3') + model = Model({ + z_1: a*x**2, + z_2: 4*b*y**2, + z_3: a*x**2 + b*y**2 + }) + z_1, z_2, z_3 = model(3, 3, 2, 2) + + assert z_1 == 18 + assert z_2 == 72 + assert z_3 == 36 + for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(model).parameters): + assert arg_name == name + + +def test_2D_fitting(): + """ + Makes sure that a scalar model with 2 independent variables has the + proper signature, and that the fit result is of the correct type. + """ + xdata = np.random.randint(-10, 11, size=(2, 400)) + zdata = 2.5*xdata[0]**2 + 7.0*xdata[1]**2 + + a = Parameter('a') + b = Parameter('b') + x = Variable('x') + y = Variable('y') + new = a*x**2 + b*y**2 + + fit = Fit(new, xdata[0], xdata[1], zdata) + + result = fit.model(xdata[0], xdata[1], 2, 3) + assert isinstance(result, tuple) + + for arg_name, name in zip(('x', 'y', 'a', 'b'), inspect_sig.signature(fit.model).parameters): + assert arg_name == name + + fit_result = fit.execute() + assert isinstance(fit_result, FitResults) + + +def test_gaussian_fitting(): + """ + Tests fitting to a gaussian function and fit_result.params unpacking. + """ + xdata = 2*np.random.rand(10000) - 1 # random betwen [-1, 1] + ydata = 5.0 * scipy.stats.norm.pdf(xdata, loc=0.0, scale=1.0) + + x0 = Parameter('x0') + sig = Parameter('sig') + A = Parameter('A') + x = Variable('x') + g = GradientModel(A * Gaussian(x, x0, sig)) + + fit = Fit(g, xdata, ydata) + assert isinstance(fit.objective, LeastSquares) + fit_result = fit.execute() + + assert fit_result.value(A) == pytest.approx(5.0) + assert np.abs(fit_result.value(sig)) == pytest.approx(1.0) + assert fit_result.value(x0) == pytest.approx(0.0) + # raise Exception([i for i in fit_result.params]) + sexy = g(x=2.0, **fit_result.params) + ugly = g( + x=2.0, + x0=fit_result.value(x0), + A=fit_result.value(A), + sig=fit_result.value(sig), + ) + assert sexy == ugly + + +def test_2_gaussian_2d_fitting(): + """ + Tests fitting to a scalar gaussian with 2 independent variables with + tight bounds. + """ + mean = (0.3, 0.4) # x, y mean 0.6, 0.4 + cov = [[0.01**2, 0], [0, 0.01**2]] + # TODO: evaluate gaussian at 100x100 points and add appropriate noise + data = np.random.multivariate_normal(mean, cov, 3000000) + mean = (0.7, 0.8) # x, y mean 0.6, 0.4 + cov = [[0.01**2, 0], [0, 0.01**2]] + data_2 = np.random.multivariate_normal(mean, cov, 3000000) + data = np.vstack((data, data_2)) + + # Insert them as y,x here as np fucks up cartesian conventions. + ydata, xedges, yedges = np.histogram2d(data[:, 1], data[:, 0], bins=100, + range=[[0.0, 1.0], [0.0, 1.0]]) + xcentres = (xedges[:-1] + xedges[1:]) / 2 + ycentres = (yedges[:-1] + yedges[1:]) / 2 + + # Make a valid grid to match ydata + xx, yy = np.meshgrid(xcentres, ycentres, sparse=False) + # xdata = np.dstack((xx, yy)).T + + x = Variable('x') + y = Variable('y') + + x0_1 = Parameter(value=0.7, min=0.6, max=0.9) + sig_x_1 = Parameter(value=0.1, min=0.0, max=0.2) + y0_1 = Parameter(value=0.8, min=0.6, max=0.9) + sig_y_1 = Parameter(value=0.1, min=0.0, max=0.2) + A_1 = Parameter() + g_1 = A_1 * Gaussian(x, x0_1, sig_x_1) * Gaussian(y, y0_1, sig_y_1) + + x0_2 = Parameter(value=0.3, min=0.2, max=0.5) + sig_x_2 = Parameter(value=0.1, min=0.0, max=0.2) + y0_2 = Parameter(value=0.4, min=0.2, max=0.5) + sig_y_2 = Parameter(value=0.1, min=0.0, max=0.2) + A_2 = Parameter() + g_2 = A_2 * Gaussian(x, x0_2, sig_x_2) * Gaussian(y, y0_2, sig_y_2) + + model = GradientModel(g_1 + g_2) + fit = Fit(model, xx, yy, ydata) + fit_result = fit.execute() + + assert isinstance(fit.minimizer, LBFGSB) + + img = model(x=xx, y=yy, **fit_result.params)[0] + img_g_1 = g_1(x=xx, y=yy, **fit_result.params) + img_g_2 = g_2(x=xx, y=yy, **fit_result.params) + assert img == pytest.approx(img_g_1 + img_g_2) + + # Equal up to some precision. Not much obviously. + assert fit_result.value(x0_1) == pytest.approx(0.7, 1e-3) + assert fit_result.value(y0_1) == pytest.approx(0.8, 1e-3) + assert fit_result.value(x0_2) == pytest.approx(0.3, 1e-3) + assert fit_result.value(y0_2) == pytest.approx(0.4, 1e-3) + + +def test_gaussian_2d_fitting(): + """ + Tests fitting to a scalar gaussian function with 2 independent + variables. + """ + mean = (0.6, 0.4) # x, y mean 0.6, 0.4 + cov = [[0.2**2, 0], [0, 0.1**2]] + + # TODO: evaluate gaussian at 100x100 points and add appropriate noise + data = np.random.multivariate_normal(mean, cov, 1000000) + + # Insert them as y,x here as np fucks up cartesian conventions. + ydata, xedges, yedges = np.histogram2d(data[:, 0], data[:, 1], bins=100, + range=[[0.0, 1.0], [0.0, 1.0]]) + xcentres = (xedges[:-1] + xedges[1:]) / 2 + ycentres = (yedges[:-1] + yedges[1:]) / 2 + + # Make a valid grid to match ydata + xx, yy = np.meshgrid(xcentres, ycentres, sparse=False, indexing='ij') + + x0 = Parameter(value=mean[0]) + sig_x = Parameter(min=0.0) + x = Variable('x') + y0 = Parameter(value=mean[1]) + sig_y = Parameter(min=0.0) + A = Parameter(min=1, value=100) + y = Variable('y') + g = Variable('g') + # g = A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y) + model = Model({g: A * Gaussian(x, x0, sig_x) * Gaussian(y, y0, sig_y)}) + fit = Fit(model, x=xx, y=yy, g=ydata, minimizer=MINPACK) + fit_result = fit.execute() + + assert fit_result.value(x0) == pytest.approx(np.mean(data[:, 0]), 1e-1) + assert fit_result.value(y0) == pytest.approx(np.mean(data[:, 1]), 1e-1) + assert np.abs(fit_result.value(sig_x)) == pytest.approx(np.std(data[:, 0]), 1e-1) + assert np.abs(fit_result.value(sig_y)) == pytest.approx(np.std(data[:, 1]), 1e-1) + assert fit_result.r_squared >= 0.99 + + +def test_jacobian_matrix(): + """ + The jacobian matrix of a model should be a 2D list (matrix) containing + all the partial derivatives. + """ + a, b, c = parameters('a, b, c') + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = Model({a_i: 2 * a + 3 * b, b_i: 5 * b, c_i: 7 * c}) + assert [[2, 3, 0], [0, 5, 0], [0, 0, 7]] == model.jacobian + + +def test_hessian_matrix(): + """ + The Hessian matrix of a model should be a 3D list (matrix) containing + all the 2nd partial derivatives. + """ + a, b, c = parameters('a, b, c') + a_i, b_i, c_i = variables('a_i, b_i, c_i') + + model = Model({a_i: 2 * a**2 + 3 * b, b_i: 5 * b**2, c_i: 7 * c*b}) + assert [[[4, 0, 0], [0, 0, 0], [0, 0, 0]], + [[0, 0, 0], [0, 10, 0], [0, 0, 0]], + [[0, 0, 0], [0, 0, 7], [0, 7, 0]]] == model.hessian + + +def test_likelihood_fitting_exponential(): + """ + Fit using the likelihood method. + """ + b = Parameter('b', value=4, min=3.0) + x, y = variables('x, y') + pdf = {y: Exp(x, 1/b)} + + # Draw points from an Exp(5) exponential distribution. + np.random.seed(100) + # TODO: Do we *really* need 1m points? + xdata = np.random.exponential(5, 1000000) + + # Expected parameter values + mean = np.mean(xdata) + stdev = np.std(xdata) + mean_stdev = stdev / np.sqrt(len(xdata)) + + with pytest.raises(TypeError): + fit = Fit(pdf, x=xdata, sigma_y=2.0, objective=LogLikelihood) + + fit = Fit(pdf, xdata, objective=LogLikelihood) + fit_result = fit.execute() + pdf_i = fit.model(x=xdata, **fit_result.params).y # probabilities + likelihood = np.product(pdf_i) + loglikelihood = np.sum(np.log(pdf_i)) + + assert fit_result.value(b) == pytest.approx(mean, 1e-3) + assert fit_result.value(b) == pytest.approx(mean, 1e-3) + assert fit_result.value(b) == pytest.approx(mean, 1e-3) + assert fit_result.value(b) == pytest.approx(mean, 1e-3) + assert fit_result.value(b) == pytest.approx(mean, 1e-3) + assert fit_result.value(b) == pytest.approx(mean, 1e-3) + assert fit_result.value(b) == pytest.approx(stdev, 1e-3) + assert fit_result.stdev(b) == pytest.approx(mean_stdev, 1e-3) + + assert likelihood == pytest.approx(fit_result.likelihood) + assert loglikelihood == pytest.approx(fit_result.log_likelihood) + + +def test_likelihood_fitting_gaussian(): + """ + Fit using the likelihood method. + """ + mu, sig = parameters('mu, sig') + sig.min = 0.01 + sig.value = 3.0 + mu.value = 50. + x = Variable() + pdf = GradientModel(Gaussian(x, mu, sig)) + + np.random.seed(10) + # TODO: Do we really need 1k points? + xdata = np.random.normal(51., 3.5, 10000) + + # Expected parameter values + mean = np.mean(xdata) + stdev = np.std(xdata) + mean_stdev = stdev/np.sqrt(len(xdata)) + + fit = Fit(pdf, xdata, objective=LogLikelihood) + fit_result = fit.execute() + + assert fit_result.value(mu) == pytest.approx(mean, 1e-6) + assert fit_result.stdev(mu) == pytest.approx(mean_stdev, 1e-3) + assert fit_result.value(sig) == pytest.approx(np.std(xdata), 1e-6) + + +def test_likelihood_fitting_bivariate_gaussian(): + """ + Fit using the likelihood method. + """ + # Make variables and parameters + x = Variable('x') + y = Variable('y') + x0 = Parameter('x0', value=0.6, min=0.5, max=0.7) + sig_x = Parameter('sig_x', value=0.1, max=1.0) + y0 = Parameter('y0', value=0.7, min=0.6, max=0.9) + sig_y = Parameter('sig_y', value=0.05, max=1.0) + rho = Parameter('rho', value=0.001, min=-1, max=1) + + pdf = BivariateGaussian(x=x, mu_x=x0, sig_x=sig_x, y=y, mu_y=y0, + sig_y=sig_y, rho=rho) + + # Draw 100000 samples from a bivariate distribution + mean = [0.59, 0.8] + r = 0.6 + cov = np.array([[0.11 ** 2, 0.11 * 0.23 * r], + [0.11 * 0.23 * r, 0.23 ** 2]]) + np.random.seed(42) + # TODO: Do we really need 100k points? + xdata, ydata = np.random.multivariate_normal(mean, cov, 100000).T + + fit = Fit(pdf, x=xdata, y=ydata, objective=LogLikelihood) + fit_result = fit.execute() + + assert fit_result.value(x0) == pytest.approx(mean[0], 1e-2) + assert fit_result.value(y0) == pytest.approx(mean[1], 1e-2) + assert fit_result.value(sig_x) == pytest.approx(np.sqrt(cov[0, 0]), 1e-2) + assert fit_result.value(sig_y) == pytest.approx(np.sqrt(cov[1, 1]), 1e-2) + assert fit_result.value(rho) == pytest.approx(r, 1e-2) + + marginal = integrate(pdf, (y, -oo, oo), conds='none') + fit = Fit(marginal, x=xdata, objective=LogLikelihood) + + with pytest.raises(NameError): + # Should raise a NameError, not a TypeError, see #219 + fit.execute() + + +def test_evaluate_model(): + """ + Makes sure that models are callable and give the expected answer. + """ + A = Parameter('A') + x = Variable('x') + new = A * x ** 2 + + assert new(x=2, A=2) == 8 + assert not new(x=2, A=3) == 8 + + +def test_simple_sigma(): + """ + Make sure we produce the same results as scipy's curve_fit, with and + without sigmas, and compare the results of both to a known value. + """ + t_data = np.array([1.4, 2.1, 2.6, 3.0, 3.3]) + y_data = np.array([10, 20, 30, 40, 50]) + + sigma = 0.2 + n = np.array([5, 3, 8, 15, 30]) + sigma_t = sigma / np.sqrt(n) + + # We now define our model + y = Variable('x') + g = Parameter('g') + t_model = (2 * y / g)**0.5 + + fit = Fit(t_model, y_data, t_data) # , sigma=sigma_t) + fit_result = fit.execute() + + # h_smooth = np.linspace(0,60,100) + # t_smooth = t_model(y=h_smooth, **fit_result.params) + + # Lets with the results from curve_fit, no weights + popt_noweights, pcov_noweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data) + + assert fit_result.value(g) == pytest.approx(popt_noweights[0]) + assert fit_result.stdev(g) == pytest.approx(np.sqrt(pcov_noweights[0, 0]), 1e-6) + + # Same sigma everywere + fit = Fit(t_model, y_data, t_data, 0.0031, absolute_sigma=False) + fit_result = fit.execute() + popt_sameweights, pcov_sameweights = curve_fit( + lambda y, p: (2 * y / p)**0.5, + y_data, + t_data, + sigma=0.0031*np.ones(len(y_data)), + absolute_sigma=False + ) + assert fit_result.value(g) == pytest.approx(popt_sameweights[0], 1e-4) + assert fit_result.stdev(g) == pytest.approx(np.sqrt(pcov_sameweights[0, 0]), 1e-4) + # Same weight everywere should be the same as no weight when absolute_sigma=False + assert fit_result.value(g) == pytest.approx(popt_noweights[0], 1e-4) + assert fit_result.stdev(g) == pytest.approx(np.sqrt(pcov_noweights[0, 0]), 1e-4) + + # Different sigma for every point + fit = Fit(t_model, y_data, t_data, 0.1*sigma_t, absolute_sigma=False) + fit_result = fit.execute() + popt, pcov = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=.1*sigma_t) + + assert fit_result.value(g) == pytest.approx(popt[0]) + assert fit_result.stdev(g) == pytest.approx(np.sqrt(pcov[0, 0]), 1e-6) + + # according to Mathematica + assert fit_result.value(g) == pytest.approx(9.095, 1e-3) + assert fit_result.stdev(g) == pytest.approx(0.102, 1e-2) + + +def test_error_advanced(): + """ + Models an example from the mathematica docs and try's to replicate it + using both symfit and scipy's curve_fit. + http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html + """ + data = [ + [0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6], + [1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.], + [7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4], + [4.7, 8.4, 10.] + ] + xdata, ydata, zdata = [np.array(data) for data in zip(*data)] + xy = np.vstack((xdata, ydata)) + errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2]) + + # raise Exception(xy, z) + a = Parameter(value=3.0) + b = Parameter(value=0.9) + c = Parameter(value=5) + x = Variable('x') + y = Variable('y') + z = Variable('z') + model = {z: a * log(b * x + c * y)} + + # Use a gradient model because Mathematica uses the Hessian + # approximation instead of the exact Hessian. + model = GradientModel(model) + fit = Fit(model, x=xdata, y=ydata, z=zdata, absolute_sigma=False) + fit_result = fit.execute() + + # Same as Mathematica default behavior. + assert fit_result.value(a) == pytest.approx(2.9956, 1e-4) + assert fit_result.value(b) == pytest.approx(0.563212, 1e-4) + assert fit_result.value(c) == pytest.approx(3.59732, 1e-4) + assert fit_result.stdev(a) == pytest.approx(0.278304, 1e-4) + assert fit_result.stdev(b) == pytest.approx(0.224107, 1e-4) + assert fit_result.stdev(c) == pytest.approx(0.980352, 1e-4) + + fit = Fit(model, xdata, ydata, zdata, absolute_sigma=True) + fit_result = fit.execute() + # Same as Mathematica in Measurement error mode, but without suplying + # any errors. + assert fit_result.value(a) == pytest.approx(2.9956, 1e-4) + assert fit_result.value(b) == pytest.approx(0.563212, 1e-4) + assert fit_result.value(c) == pytest.approx(3.59732, 1e-4) + assert fit_result.stdev(a) == pytest.approx(0.643259, 1e-4) + assert fit_result.stdev(b) == pytest.approx(0.517992, 1e-4) + assert fit_result.stdev(c) == pytest.approx(2.26594, 1e-4) + + fit = Fit(model, xdata, ydata, zdata, sigma_z=errors) + fit_result = fit.execute() + + popt, pcov, infodict, errmsg, ier = curve_fit( + lambda x_vec, a, b, c: a * np.log(b * x_vec[0] + c * x_vec[1]), + xy, zdata, sigma=errors, absolute_sigma=True, full_output=True + ) + + # Same as curve_fit? + assert fit_result.value(a) == pytest.approx(popt[0], 1e-4) + assert fit_result.value(b) == pytest.approx(popt[1], 1e-4) + assert fit_result.value(c) == pytest.approx(popt[2], 1e-4) + assert fit_result.stdev(a) == pytest.approx(np.sqrt(pcov[0, 0]), 1e-4) + assert fit_result.stdev(b) == pytest.approx(np.sqrt(pcov[1, 1]), 1e-4) + assert fit_result.stdev(c) == pytest.approx(np.sqrt(pcov[2, 2]), 1e-4) + + # Same as Mathematica with MEASUREMENT ERROR + assert fit_result.value(a) == pytest.approx(2.68807, 1e-4) + assert fit_result.value(b) == pytest.approx(0.941344, 1e-4) + assert fit_result.value(c) == pytest.approx(5.01541, 1e-4) + assert fit_result.stdev(a) == pytest.approx(0.0974628, 1e-4) + assert fit_result.stdev(b) == pytest.approx(0.247018, 1e-4) + assert fit_result.stdev(c) == pytest.approx(0.597661, 1e-4) + + +def test_error_analytical(): + """ + Test using a case where the analytical answer is known. Uses both + symfit and scipy's curve_fit. + Modeled after: + http://nbviewer.ipython.org/urls/gist.github.com/taldcroft/5014170/raw/31e29e235407e4913dc0ec403af7ed524372b612/curve_fit.ipynb + """ + N = 10000 + sigma = 10.0 * np.ones(N) + xn = np.arange(N, dtype=np.float) + # yn = np.zeros_like(xn) + np.random.seed(10) + yn = np.random.normal(size=len(xn), scale=sigma) + + a = Parameter() + y = Variable('y') + model = {y: a} + + fit = Fit(model, y=yn, sigma_y=sigma) + fit_result = fit.execute() + + popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), + xn, yn, sigma=sigma, absolute_sigma=True) + assert fit_result.value(a) == pytest.approx(popt[0], 1e-5) + assert fit_result.stdev(a) == pytest.approx(np.sqrt(np.diag(pcov))[0], 1e-2) + + fit_no_sigma = Fit(model, yn) + fit_result_no_sigma = fit_no_sigma.execute() + + popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn,) + # With or without sigma, the bestfit params should be in agreement in case of equal weights + assert fit_result.value(a) == pytest.approx(fit_result_no_sigma.value(a), 1e-5) + # Since symfit is all about absolute errors, the sigma will not be in agreement + assert not fit_result.stdev(a) == fit_result_no_sigma.stdev(a) == 5 + assert fit_result_no_sigma.stdev(a) == pytest.approx(pcov[0][0]**0.5, 1e-5) + assert fit_result_no_sigma.value(a) == pytest.approx(popt[0], 1e-5) + + # Analytical answer for mean of N(0,1): + mu = 0.0 + sigma_mu = sigma[0]/N**0.5 + + assert fit_result.stdev(a) == pytest.approx(sigma_mu, 1e-5) + +# TODO: redudant with test_error_analytical? +@pytest.mark.skip() +def test_straight_line_analytical(): + """ + Test symfit against a straight line, for which the parameters and their + uncertainties are known analytically. Assuming equal weights. + """ + data = [[0, 1], [1, 0], [3, 2], [5, 4]] + x, y = (np.array(i, dtype='float64') for i in zip(*data)) + # x = np.arange(0, 100, 0.1) + # np.random.seed(10) + # y = 3.0*x + 105.0 + np.random.normal(size=x.shape) + + dx = x - x.mean() + dy = y - y.mean() + mean_squared_x = np.mean(x**2) - np.mean(x)**2 + mean_xy = np.mean(x * y) - np.mean(x)*np.mean(y) + a = mean_xy/mean_squared_x + b = y.mean() - a * x.mean() + assert a == pytest.approx(0.694915, 1e-6) # values form Mathematica + assert b == pytest.approx(0.186441, 1e-6) + + S = np.sum((y - (a*x + b))**2) + var_a_exact = S/(len(x) * (len(x) - 2) * mean_squared_x) + var_b_exact = var_a_exact*np.mean(x ** 2) + a_exact = a + b_exact = b + + # We will now compare these exact results with values from symfit + a, b, x_var = Parameter(name='a', value=3.0), Parameter(name='b'), Variable(name='x') + model = a*x_var + b + fit = Fit(model, x, y, absolute_sigma=False) + fit_result = fit.execute() + + popt, pcov = curve_fit(lambda z, c, d: c * z + d, x, y, + Dfun=lambda p, x, y, func: np.transpose([x, np.ones_like(x)])) + # Dfun=lambda p, x, y, func: print(p, func, x, y)) + + # curve_fit + assert a_exact == pytest.approx(popt[0], 1e-4) + assert b_exact == pytest,approx(popt[1], 1e-4) + assert var_a_exact == pytest.approx(pcov[0][0], 1e-6) + assert var_b_exact == pytest.approx(pcov[1][1], 1e-6) + + assert a_exact == pytest.approx(fit_result.params.a, 1e-4) + assert b_exact == pytest.approx(fit_result.params.b, 1e-4) + assert var_a_exact**0.5 == pytest.approx(fit_result.params.a_stdev, 1e-6) + assert var_b_exact**0.5 == pytest.approx(fit_result.params.b_stdev, 1e-6) + + +def test_fixed_parameters(): + """ + Make sure fixed parameters don't change on fitting + """ + a, b, c, d = parameters('a, b, c, d') + x, y = variables('x, y') + + c.value = 4.0 + a.min, a.max = 1.0, 5.0 # Bounds are needed for DifferentialEvolution + b.min, b.max = 1.0, 5.0 + c.min, c.max = 1.0, 5.0 + d.min, d.max = 1.0, 5.0 + c.fixed = True + + model = Model({y: a * exp(-(x - b)**2 / (2 * c**2)) + d}) + # Generate data + xdata = np.linspace(0, 100) + ydata = model(xdata, a=2, b=3, c=2, d=2).y + + for minimizer in subclasses(BaseMinimizer): + if minimizer is ChainedMinimizer: + continue + else: + fit = Fit(model, x=xdata, y=ydata, minimizer=minimizer) + fit_result = fit.execute() + # Should still be 4.0, not 2.0! + assert 4.0 == fit_result.params['c'] - fit = Fit(model, xdata, ydata, zdata, absolute_sigma=True) - fit_result = fit.execute() - # Same as Mathematica in Measurement error mode, but without suplying - # any errors. - self.assertAlmostEqual(fit_result.value(a) / 2.9956, 1, 4) - self.assertAlmostEqual(fit_result.value(b) / 0.563212, 1, 4) - self.assertAlmostEqual(fit_result.value(c) / 3.59732, 1, 4) - self.assertAlmostEqual(fit_result.stdev(a) / 0.643259, 1, 4) - self.assertAlmostEqual(fit_result.stdev(b) / 0.517992, 1, 4) - self.assertAlmostEqual(fit_result.stdev(c) / 2.26594, 1, 4) - - fit = Fit(model, xdata, ydata, zdata, sigma_z=errors) - fit_result = fit.execute() - popt, pcov, infodict, errmsg, ier = curve_fit( - lambda x_vec, a, b, c: a * np.log(b * x_vec[0] + c * x_vec[1]), - xy, zdata, sigma=errors, absolute_sigma=True, full_output=True - ) - - # Same as curve_fit? - self.assertAlmostEqual(fit_result.value(a), popt[0], 4) - self.assertAlmostEqual(fit_result.value(b), popt[1], 4) - self.assertAlmostEqual(fit_result.value(c), popt[2], 4) - self.assertAlmostEqual(fit_result.stdev(a), np.sqrt(pcov[0,0]), 4) - self.assertAlmostEqual(fit_result.stdev(b), np.sqrt(pcov[1,1]), 4) - self.assertAlmostEqual(fit_result.stdev(c), np.sqrt(pcov[2,2]), 4) - - # Same as Mathematica with MEASUREMENT ERROR - self.assertAlmostEqual(fit_result.value(a), 2.68807, 4) - self.assertAlmostEqual(fit_result.value(b), 0.941344, 4) - self.assertAlmostEqual(fit_result.value(c), 5.01541, 4) - self.assertAlmostEqual(fit_result.stdev(a), 0.0974628, 4) - self.assertAlmostEqual(fit_result.stdev(b), 0.247018, 4) - self.assertAlmostEqual(fit_result.stdev(c), 0.597661, 4) - - def test_error_analytical(self): - """ - Test using a case where the analytical answer is known. Uses both - symfit and scipy's curve_fit. - Modeled after: - http://nbviewer.ipython.org/urls/gist.github.com/taldcroft/5014170/raw/31e29e235407e4913dc0ec403af7ed524372b612/curve_fit.ipynb - """ - N = 10000 - sigma = 10.0 * np.ones(N) - xn = np.arange(N, dtype=np.float) - # yn = np.zeros_like(xn) - np.random.seed(10) - yn = np.random.normal(size=len(xn), scale=sigma) - - a = Parameter() - y = Variable('y') - model = {y: a} - - fit = Fit(model, y=yn, sigma_y=sigma) - fit_result = fit.execute() +def test_boundaries(): + """ + Make sure parameter boundaries are respected + """ + x = Parameter('x', min=1) + y = Variable('y') + model = Model({y: x**2}) - popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=True) - self.assertAlmostEqual(fit_result.value(a), popt[0], 5) - self.assertAlmostEqual(fit_result.stdev(a), np.sqrt(np.diag(pcov))[0], 2) - - fit_no_sigma = Fit(model, yn) - fit_result_no_sigma = fit_no_sigma.execute() - - popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn,) - # With or without sigma, the bestfit params should be in agreement in case of equal weights - self.assertAlmostEqual(fit_result.value(a), fit_result_no_sigma.value(a), 5) - # Since symfit is all about absolute errors, the sigma will not be in agreement - self.assertNotEqual(fit_result.stdev(a), fit_result_no_sigma.stdev(a), 5) - self.assertAlmostEqual(fit_result_no_sigma.value(a), popt[0], 5) - self.assertAlmostEqual(fit_result_no_sigma.stdev(a), pcov[0][0]**0.5, 5) - - # Analytical answer for mean of N(0,1): - mu = 0.0 - sigma_mu = sigma[0]/N**0.5 - - self.assertAlmostEqual(fit_result.stdev(a), sigma_mu, 5) - - # TODO: redudant with test_error_analytical? - # def test_straight_line_analytical(self): - # """ - # Test symfit against a straight line, for which the parameters and their - # uncertainties are known analytically. Assuming equal weights. - # """ - # data = [[0, 1], [1, 0], [3, 2], [5, 4]] - # x, y = (np.array(i, dtype='float64') for i in zip(*data)) - # # x = np.arange(0, 100, 0.1) - # # np.random.seed(10) - # # y = 3.0*x + 105.0 + np.random.normal(size=x.shape) - # - # dx = x - x.mean() - # dy = y - y.mean() - # mean_squared_x = np.mean(x**2) - np.mean(x)**2 - # mean_xy = np.mean(x * y) - np.mean(x)*np.mean(y) - # a = mean_xy/mean_squared_x - # b = y.mean() - a * x.mean() - # self.assertAlmostEqual(a, 0.694915, 6) # values from Mathematica - # self.assertAlmostEqual(b, 0.186441, 6) - # print(a, b) - # - # S = np.sum((y - (a*x + b))**2) - # var_a_exact = S/(len(x) * (len(x) - 2) * mean_squared_x) - # var_b_exact = var_a_exact*np.mean(x ** 2) - # a_exact = a - # b_exact = b - # - # # We will now compare these exact results with values from symfit - # a, b, x_var = Parameter(name='a', value=3.0), Parameter(name='b'), Variable(name='x') - # model = a*x_var + b - # fit = Fit(model, x, y, absolute_sigma=False) - # fit_result = fit.execute() - # - # popt, pcov = curve_fit(lambda z, c, d: c * z + d, x, y, - # Dfun=lambda p, x, y, func: np.transpose([x, np.ones_like(x)])) - # # Dfun=lambda p, x, y, func: print(p, func, x, y)) - # - # # curve_fit - # self.assertAlmostEqual(a_exact, popt[0], 4) - # self.assertAlmostEqual(b_exact, popt[1], 4) - # self.assertAlmostEqual(var_a_exact, pcov[0][0], 6) - # self.assertAlmostEqual(var_b_exact, pcov[1][1], 6) - # - # self.assertAlmostEqual(a_exact, fit_result.params.a, 4) - # self.assertAlmostEqual(b_exact, fit_result.params.b, 4) - # self.assertAlmostEqual(var_a_exact**0.5, fit_result.params.a_stdev, 6) - # self.assertAlmostEqual(var_b_exact**0.5, fit_result.params.b_stdev, 6) - - def test_fixed_parameters(self): - """ - Make sure fixed parameters don't change on fitting - """ - a, b, c, d = parameters('a, b, c, d') - x, y = variables('x, y') - - c.value = 4.0 - a.min, a.max = 1.0, 5.0 # Bounds are needed for DifferentialEvolution - b.min, b.max = 1.0, 5.0 - c.min, c.max = 1.0, 5.0 - d.min, d.max = 1.0, 5.0 - c.fixed = True - - model = Model({y: a * exp(-(x - b)**2 / (2 * c**2)) + d}) - # Generate data - xdata = np.linspace(0, 100) - ydata = model(xdata, a=2, b=3, c=2, d=2).y - - for minimizer in subclasses(BaseMinimizer): - if minimizer is ChainedMinimizer: - continue - else: - fit = Fit(model, x=xdata, y=ydata, minimizer=minimizer) - fit_result = fit.execute() - # Should still be 4.0, not 2.0! - self.assertEqual(4.0, fit_result.params['c']) - - def test_boundaries(self): - """ - Make sure parameter boundaries are respected - """ - x = Parameter('x', min=1) - y = Variable('y') - model = Model({y: x**2}) - - bounded_minimizers = list(subclasses(BoundedMinimizer)) - for minimizer in bounded_minimizers: - if minimizer is MINPACK: - # Not a MINPACKable problem because it only has a param - continue - fit = Fit(model, minimizer=minimizer) - self.assertIsInstance(fit.objective, MinimizeModel) - if minimizer is DifferentialEvolution: - # Also needs a max - x.max = 10 - fit_result = fit.execute() - x.max = None - else: - fit_result = fit.execute() - self.assertGreaterEqual(fit_result.value(x), 1.0) - self.assertLessEqual(fit_result.value(x), 2.0) - self.assertEqual(fit.minimizer.bounds, [(1, None)]) - - def test_non_boundaries(self): - """ - Make sure parameter boundaries are not invented - """ - x = Parameter('x') - y = Variable('y') - model = Model({y: x**2}) - - bounded_minimizers = list(subclasses(BoundedMinimizer)) - bounded_minimizers = [minimizer for minimizer in bounded_minimizers - if minimizer is not DifferentialEvolution] - for minimizer in bounded_minimizers: + bounded_minimizers = list(subclasses(BoundedMinimizer)) + for minimizer in bounded_minimizers: + if minimizer is MINPACK: # Not a MINPACKable problem because it only has a param - if minimizer is MINPACK: - continue - fit = Fit(model, minimizer=minimizer) + continue + fit = Fit(model, minimizer=minimizer) + assert isinstance(fit.objective, MinimizeModel) + if minimizer is DifferentialEvolution: + # Also needs a max + x.max = 10 + fit_result = fit.execute() + x.max = None + else: fit_result = fit.execute() - self.assertAlmostEqual(fit_result.value(x), 0.0) - self.assertEqual(fit.minimizer.bounds, [(None, None)]) - - def test_single_param_model(self): - """ - Added after #161, this tests if models with a single additive parameter - are fitted properly. The problem with these models is that their - jacobian is in principle just int 1, which is not the correct shape. - - No news is good news. - :return: - """ - T = Variable('T') - l = Variable('l') - s = Parameter('s', value=300) - a = Parameter('a', value=300) - model = {l: s + a + 1 / (1 + exp(- T))} - - temp_data = [270, 280, 285, 290, 295, 300, 310, 320] - length_data = [8.33, 8.41, 8.45, 8.5, 8.54, 9.13, 9.27, 9.4] - fit = Fit(model, l=length_data, T=temp_data) + assert fit_result.value(x) >= 1.0 + assert fit_result.value(x) <= 2.0 + assert fit.minimizer.bounds == [(1, None)] + + +def test_non_boundaries(): + """ + Make sure parameter boundaries are not invented + """ + x = Parameter('x') + y = Variable('y') + model = Model({y: x**2}) + + bounded_minimizers = list(subclasses(BoundedMinimizer)) + bounded_minimizers = [minimizer for minimizer in bounded_minimizers + if minimizer is not DifferentialEvolution] + for minimizer in bounded_minimizers: + # Not a MINPACKable problem because it only has a param + if minimizer is MINPACK: + continue + fit = Fit(model, minimizer=minimizer) fit_result = fit.execute() + assert fit_result.value(x) == pytest.approx(0.0) + assert fit.minimizer.bounds == [(None, None)] + + +def test_single_param_model(): + """ + Added after #161, this tests if models with a single additive parameter + are fitted properly. The problem with these models is that their + jacobian is in principle just int 1, which is not the correct shape. + + No news is good news. + :return: + """ + T = Variable('T') + l = Variable('l') + s = Parameter('s', value=300) + a = Parameter('a', value=300) + model = {l: s + a + 1 / (1 + exp(- T))} + + temp_data = [270, 280, 285, 290, 295, 300, 310, 320] + length_data = [8.33, 8.41, 8.45, 8.5, 8.54, 9.13, 9.27, 9.4] + fit = Fit(model, l=length_data, T=temp_data) + fit_result = fit.execute() + + # Raise the stakes by increasing the dimensionality of the data + TT, LL = np.meshgrid(temp_data, length_data) + fit = Fit(model, l=LL, T=TT) + fit_result = fit.execute() + + +def test_model_from_dict(): + """ + Tries to create a model from a dictionary. + """ + x, y_1, y_2 = variables('x, y_1, y_2') + a, b = parameters('a, b') + # This way the test fails rather than errors. + try: + Model({ + y_1: 2 * a * x, + y_2: b * x**2 + }) + except Exception as error: + pytest.fail('test_model_from_dict raised {}'.format(error)) - # Raise the stakes by increasing the dimensionality of the data - TT, LL = np.meshgrid(temp_data, length_data) - fit = Fit(model, l=LL, T=TT) - fit_result = fit.execute() - def test_model_from_dict(self): - """ - Tries to create a model from a dictionary. - """ - x, y_1, y_2 = variables('x, y_1, y_2') - a, b = parameters('a, b') - # This way the test fails rather than errors. - try: - Model({ - y_1: 2 * a * x, - y_2: b * x**2 - }) - except Exception as error: - self.fail('test_model_from_dict raised {}'.format(error)) - - def test_version(self): - """ - Test if __version__ is availabe - :return: - """ - import symfit - symfit.__version__ - - -if __name__ == '__main__': - try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() +def test_version(): + """ + Test if __version__ is availabe + :return: + """ + import symfit + symfit.__version__ diff --git a/tests/test_global_opt.py b/tests/test_global_opt.py index d6ce039f..522508d3 100644 --- a/tests/test_global_opt.py +++ b/tests/test_global_opt.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import division, print_function -import unittest -import warnings +import pytest import sys import numpy as np @@ -12,22 +11,25 @@ from symfit.core.minimizers import BFGS, DifferentialEvolution from symfit.distributions import Gaussian -if sys.version_info >= (3,0): +if sys.version_info >= (3, 0): import inspect as inspect_sig else: import funcsigs as inspect_sig -class TestGlobalOptGaussian(unittest.TestCase): + +class TestGlobalOptGaussian: @classmethod - def setUpClass(cls): + def setup_class(cls): np.random.seed(0) mean = (0.4, 0.4) # x, y mean 0.6, 0.4 cov = [[0.01**2, 0], [0, 0.01**2]] + + # TODO: evaluate gaussian at 200x200 points (?!) and add appropriate noise data = np.random.multivariate_normal(mean, cov, 2500000) # Insert them as y,x here as np fucks up cartesian conventions. cls.ydata, xedges, yedges = np.histogram2d(data[:, 1], data[:, 0], bins=200, - range=[[0.0, 1.0], [0.0, 1.0]]) + range=[[0.0, 1.0], [0.0, 1.0]]) xcentres = (xedges[:-1] + xedges[1:]) / 2 ycentres = (yedges[:-1] + yedges[1:]) / 2 @@ -35,7 +37,7 @@ def setUpClass(cls): cls.xx, cls.yy = np.meshgrid(xcentres, ycentres, sparse=False) # xdata = np.dstack((xx, yy)).T - def setUp(self): + def setup_method(self): x = Variable('x') y = Variable('y') xmin, xmax = -5, 5 @@ -44,25 +46,24 @@ def setUp(self): self.y0_1 = Parameter('y01', value=0, min=xmin, max=xmax) self.sig_y_1 = Parameter('sigy1', value=0, min=0.0, max=1) self.A_1 = Parameter('A1', min=0, max=1000) - g_1 = self.A_1 * Gaussian(x, self.x0_1, self.sig_x_1) *\ - Gaussian(y, self.y0_1, self.sig_y_1) + g_1 = self.A_1 * Gaussian(x, self.x0_1, self.sig_x_1) * Gaussian(y, self.y0_1, self.sig_y_1) self.model = GradientModel(g_1) - + def test_diff_evo(self): """ Tests fitting to a scalar gaussian with 2 independent variables with wide bounds. """ - + fit = Fit(self.model, self.xx, self.yy, self.ydata, minimizer=BFGS) fit_result = fit.execute() - self.assertIsInstance(fit.minimizer, BFGS) + assert isinstance(fit.minimizer, BFGS) # Make sure a local optimizer doesn't find the answer. - self.assertNotAlmostEqual(fit_result.value(self.x0_1), 0.4, 1) - self.assertNotAlmostEqual(fit_result.value(self.y0_1), 0.4, 1) + assert not fit_result.value(self.x0_1) == pytest.approx(0.4, 1e-1) + assert not fit_result.value(self.y0_1) == pytest.approx(0.4, 1e-1) # On to the main event fit = Fit(self.model, self.xx, self.yy, self.ydata, @@ -70,8 +71,8 @@ def test_diff_evo(self): fit_result = fit.execute(polish=True, seed=0, tol=1e-4, maxiter=100) # Global minimizers are really bad at finding local minima though, so # roughly equal is good enough. - self.assertAlmostEqual(fit_result.value(self.x0_1), 0.4, 1) - self.assertAlmostEqual(fit_result.value(self.y0_1), 0.4, 1) + assert fit_result.value(self.x0_1) == pytest.approx(0.4, 1e-1) + assert fit_result.value(self.y0_1) == pytest.approx(0.4, 1e-1) def test_chained_min(self): """Test fitting with a chained minimizer""" @@ -81,9 +82,9 @@ def test_chained_min(self): fit_result = fit.execute( DifferentialEvolution={'seed': 0, 'tol': 1e-4, 'maxiter': 10} ) - self.assertAlmostEqual(fit_result.value(self.x0_1), 0.4, 4) - self.assertAlmostEqual(fit_result.value(self.y0_1), 0.4, 4) - self.assertEqual(curvals, [p.value for p in self.model.params]) + assert fit_result.value(self.x0_1) == pytest.approx(0.4, 1e-4) + assert fit_result.value(self.y0_1) == pytest.approx(0.4, 1e-4) + assert curvals == [p.value for p in self.model.params] def test_chained_min_signature(self): """ @@ -101,48 +102,35 @@ def test_chained_min_signature(self): 'DifferentialEvolution_2', 'BFGS_3' ] for name, param_name in zip(names, fit.minimizer.__signature__.parameters): - self.assertEqual(name, param_name) + assert name == param_name # Check for equal lengths because zip is slippery that way - self.assertEqual(len(names), len(fit.minimizer.__signature__.parameters)) + assert len(names) == len(fit.minimizer.__signature__.parameters) for param in fit.minimizer.__signature__.parameters.values(): - self.assertEqual(param.kind, inspect_sig.Parameter.KEYWORD_ONLY) + assert param.kind == inspect_sig.Parameter.KEYWORD_ONLY # Make sure keywords end up at the right minimizer. - with self.assertRaises(TypeError): + with pytest.raises(TypeError): # This is not a valid kwarg to DiffEvo, but it is to BFGS. Check if # we really go by name of the Minimizer, not by order. fit.execute(DifferentialEvolution={'return_all': False}) -class TestGlobalOptMexican(unittest.TestCase): - def test_mexican_hat(self): - """ - Test that global minimisation finds the global minima, and doesn't - affect the value of parameters. - """ - x = Parameter('x') - x.min, x.max = -100, 100 - x.value = -2.5 - y = Variable('y') - - model = Model({y: x**4 - 10 * x**2 - x}) # Skewed Mexican hat - fit = Fit(model, minimizer=[DifferentialEvolution, BFGS]) - fit_result1 = fit.execute(DifferentialEvolution={'seed': 0}) - - fit = Fit(model) - fit_result2 = fit.execute() +def test_mexican_hat(): + """ + Test that global minimisation finds the global minima, and doesn't + affect the value of parameters. + """ + x = Parameter('x') + x.min, x.max = -100, 100 + x.value = -2.5 + y = Variable('y') - self.assertGreater(fit_result1.value(x), 0) - self.assertLess(fit_result2.value(x), 0) + model = Model({y: x**4 - 10 * x**2 - x}) # Skewed Mexican hat + fit = Fit(model, minimizer=[DifferentialEvolution, BFGS]) + fit_result1 = fit.execute(DifferentialEvolution={'seed': 0}) + fit = Fit(model) + fit_result2 = fit.execute() -if __name__ == '__main__': - try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() \ No newline at end of file + assert fit_result1.value(x) > 0 + assert fit_result2.value(x) < 0 diff --git a/tests/test_minimize.py b/tests/test_minimize.py index 2802013e..20fb73e7 100644 --- a/tests/test_minimize.py +++ b/tests/test_minimize.py @@ -1,7 +1,5 @@ from __future__ import division, print_function -import unittest -import sys -import warnings +import pytest import numpy as np from scipy.optimize import minimize, basinhopping @@ -15,252 +13,247 @@ from symfit.core.support import partial -class TestMinimize(unittest.TestCase): - @classmethod - def setUpClass(cls): - np.random.seed(0) - - # TODO: Should be 2 tests? - def test_minimize(self): - """ - Tests maximizing a function with and without constraints, taken from the - scipy `minimize` tutorial. Compare the symfit result with the scipy - result. - https://docs.scipy.org/doc/scipy-0.18.1/reference/tutorial/optimize.html#constrained-minimization-of-multivariate-scalar-functions-minimize - """ - x = Parameter(value=-1.0) - y = Parameter(value=1.0) - # Use an unnamed Variable on purpose to test the auto-generation of names. - model = Model(2 * x * y + 2 * x - x ** 2 - 2 * y ** 2) - - constraints = [ - Ge(y - 1, 0), # y - 1 >= 0, - Eq(x**3 - y, 0), # x**3 - y == 0, - ] - - def func(x, sign=1.0): - """ Objective function """ - return sign*(2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2) - - def func_deriv(x, sign=1.0): - """ Derivative of objective function """ - dfdx0 = sign*(-2*x[0] + 2*x[1] + 2) - dfdx1 = sign*(2*x[0] - 4*x[1]) - return np.array([ dfdx0, dfdx1 ]) - - cons = ( - {'type': 'eq', - 'fun' : lambda x: np.array([x[0]**3 - x[1]]), - 'jac' : lambda x: np.array([3.0*(x[0]**2.0), -1.0])}, - {'type': 'ineq', - 'fun' : lambda x: np.array([x[1] - 1]), - 'jac' : lambda x: np.array([0.0, 1.0])}) - - # Unconstrained fit - res = minimize(func, [-1.0,1.0], args=(-1.0,), jac=func_deriv, - method='BFGS', options={'disp': False}) - fit = Fit(model=- model) - self.assertIsInstance(fit.objective, MinimizeModel) - self.assertIsInstance(fit.minimizer, BFGS) - - fit_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(x) / res.x[0], 1.0, 6) - self.assertAlmostEqual(fit_result.value(y) / res.x[1], 1.0, 6) - - # Same test, but with constraints in place. - res = minimize(func, [-1.0,1.0], args=(-1.0,), jac=func_deriv, - constraints=cons, method='SLSQP', options={'disp': False}) - - from symfit.core.minimizers import SLSQP - fit = Fit(- model, constraints=constraints) - self.assertEqual(fit.constraints[0].constraint_type, Ge) - self.assertEqual(fit.constraints[1].constraint_type, Eq) - fit_result = fit.execute() - self.assertAlmostEqual(fit_result.value(x), res.x[0], 6) - self.assertAlmostEqual(fit_result.value(y), res.x[1], 6) - - def test_constraint_types(self): - x = Parameter(value=-1.0) - y = Parameter(value=1.0) - z = Variable() - model = Model({z: 2*x*y + 2*x - x**2 - 2*y**2}) - - # These types are not allowed constraints. - for relation in [Lt, Gt, Ne]: - with self.assertRaises(ModelError): - Fit(model, constraints=[relation(x, y)]) - - # Should execute without problems. - for relation in [Eq, Ge, Le]: +def setup_method(): + np.random.seed(0) + +# TODO: Should be 2 tests? + + +def test_minimize(): + """ + Tests maximizing a function with and without constraints, taken from the + scipy `minimize` tutorial. Compare the symfit result with the scipy + result. + https://docs.scipy.org/doc/scipy-0.18.1/reference/tutorial/optimize.html#constrained-minimization-of-multivariate-scalar-functions-minimize + """ + x = Parameter(value=-1.0) + y = Parameter(value=1.0) + # Use an unnamed Variable on purpose to test the auto-generation of names. + model = Model(2 * x * y + 2 * x - x ** 2 - 2 * y ** 2) + + constraints = [ + Ge(y - 1, 0), # y - 1 >= 0, + Eq(x**3 - y, 0), # x**3 - y == 0, + ] + + def func(x, sign=1.0): + """ Objective function """ + return sign*(2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2) + + def func_deriv(x, sign=1.0): + """ Derivative of objective function """ + dfdx0 = sign*(-2*x[0] + 2*x[1] + 2) + dfdx1 = sign*(2*x[0] - 4*x[1]) + return np.array([dfdx0, dfdx1]) + + cons = ( + {'type': 'eq', + 'fun': lambda x: np.array([x[0]**3 - x[1]]), + 'jac': lambda x: np.array([3.0*(x[0]**2.0), -1.0])}, + {'type': 'ineq', + 'fun': lambda x: np.array([x[1] - 1]), + 'jac': lambda x: np.array([0.0, 1.0])} + ) + + # Unconstrained fit + res = minimize(func, [-1.0, 1.0], args=(-1.0,), jac=func_deriv, + method='BFGS', options={'disp': False}) + fit = Fit(model=-model) + assert isinstance(fit.objective, MinimizeModel) + assert isinstance(fit.minimizer, BFGS) + + fit_result = fit.execute() + + assert fit_result.value(x) == pytest.approx(res.x[0], 1e-6) + assert fit_result.value(y) == pytest.approx(res.x[1], 1e-6) + + # Same test, but with constraints in place. + res = minimize(func, [-1.0, 1.0], args=(-1.0,), jac=func_deriv, + constraints=cons, method='SLSQP', options={'disp': False}) + + fit = Fit(-model, constraints=constraints) + assert fit.constraints[0].constraint_type == Ge + assert fit.constraints[1].constraint_type == Eq + fit_result = fit.execute() + assert fit_result.value(x) == pytest.approx(res.x[0], 1e-6) + assert fit_result.value(y) == pytest.approx(res.x[1], 1e-6) + + +def test_constraint_types(): + x = Parameter(value=-1.0) + y = Parameter(value=1.0) + z = Variable() + model = Model({z: 2*x*y + 2*x - x**2 - 2*y**2}) + + # These types are not allowed constraints. + for relation in [Lt, Gt, Ne]: + with pytest.raises(ModelError): Fit(model, constraints=[relation(x, y)]) - fit = Fit(model, constraints=[Le(x, y)]) - # Le should be transformed to Ge - self.assertIs(fit.constraints[0].constraint_type, Ge) - - # Redo the standard test as a Le - constraints = [ - Le(- y + 1, 0), # y - 1 >= 0, - Eq(x**3 - y, 0), # x**3 - y == 0, - ] - std_constraints = [ - Ge(y - 1, 0), # y - 1 >= 0, - Eq(x**3 - y, 0), # x**3 - y == 0, - ] - - fit = Fit(- model, constraints=constraints) - std_fit = Fit(- model, constraints=std_constraints) - self.assertEqual(fit.constraints[0].constraint_type, Ge) - self.assertEqual(fit.constraints[1].constraint_type, Eq) - self.assertEqual(fit.constraints[0].params, [x, y]) - self.assertEqual(fit.constraints[1].params, [x, y]) - self.assertEqual(fit.constraints[0].jacobian_model.params, [x, y]) - self.assertEqual(fit.constraints[1].jacobian_model.params, [x, y]) - self.assertEqual(fit.constraints[0].hessian_model.params, [x, y]) - self.assertEqual(fit.constraints[1].hessian_model.params, [x, y]) - self.assertEqual(fit.constraints[0].__signature__, - fit.constraints[1].__signature__) - fit_result = fit.execute() - std_result = std_fit.execute() - self.assertAlmostEqual(fit_result.value(x), std_result.value(x)) - self.assertAlmostEqual(fit_result.value(y), std_result.value(y)) - - def test_basinhopping_large(self): - """ - Test the basinhopping method of scipy.minimize. This is based of scipy's docs - as found here: https://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.optimize.anneal.html - """ - def f1(z, *params): - x, y = z - a, b, c, d, e, f, g, h, i, j, k, l, scale = params - return (a * x ** 2 + b * x * y + c * y ** 2 + d * x + e * y + f) - - def f2(z, *params): - x, y = z - a, b, c, d, e, f, g, h, i, j, k, l, scale = params - return (-g * np.exp(-((x - h) ** 2 + (y - i) ** 2) / scale)) - - def f3(z, *params): - x, y = z - a, b, c, d, e, f, g, h, i, j, k, l, scale = params - return (-j * np.exp(-((x - k) ** 2 + (y - l) ** 2) / scale)) - - def func(z, *params): - x, y = z - a, b, c, d, e, f, g, h, i, j, k, l, scale = params - return f1(z, *params) + f2(z, *params) + f3(z, *params) - - def f_symfit(x1, x2, params): - z = [x1, x2] - return func(z, *params) - - params = (2, 3, 7, 8, 9, 10, 44, -1, 2, 26, 1, -2, 0.5) - - x0 = np.array([2., 2.]) - np.random.seed(555) - res = basinhopping(func, x0, minimizer_kwargs={'args': params}) - - np.random.seed(555) - x1, x2 = parameters('x1, x2', value=x0) - fit = BasinHopping(partial(f_symfit, params=params), [x1, x2]) - fit_result = fit.execute() - - self.assertEqual(res.x[0], fit_result.value(x1)) - self.assertEqual(res.x[1], fit_result.value(x2)) - self.assertEqual(res.fun, fit_result.objective_value) - - def test_basinhopping(self): - func = lambda x: np.cos(14.5 * x - 0.3) + (x + 0.2) * x - x0 = [1.] - np.random.seed(555) - res = basinhopping(func, x0, minimizer_kwargs={"method": "BFGS"}, niter=200) - np.random.seed(555) - x, = parameters('x') - fit = BasinHopping(func, [x], local_minimizer=BFGS) - fit_result = fit.execute(niter=200) - # fit_result = fit.execute(minimizer_kwargs={"method": "BFGS"}, niter=200) - - self.assertEqual(res.x, fit_result.value(x)) - self.assertEqual(res.fun, fit_result.objective_value) - - def test_basinhopping_2d(self): - def func2d(x): - f = np.cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] + 0.2) * x[0] - df = np.zeros(2) - df[0] = -14.5 * np.sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2 - df[1] = 2. * x[1] + 0.2 - return f, df - - def func2d_symfit(x1, x2): - f = np.cos(14.5 * x1 - 0.3) + (x2 + 0.2) * x2 + (x1 + 0.2) * x1 - return f - - def jac2d_symfit(x1, x2): - df = np.zeros(2) - df[0] = -14.5 * np.sin(14.5 * x1 - 0.3) + 2. * x1 + 0.2 - df[1] = 2. * x2 + 0.2 - return df - - np.random.seed(555) - minimizer_kwargs = {'method': 'BFGS', 'jac': True} - x0 = [1.0, 1.0] - res = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs, niter=200) - - np.random.seed(555) - x1, x2 = parameters('x1, x2', value=x0) - with self.assertRaises(TypeError): - fit = BasinHopping( - func2d_symfit, [x1, x2], - local_minimizer=NelderMead(func2d_symfit, [x1, x2], - jacobian=jac2d_symfit) - ) + + # Should execute without problems. + for relation in [Eq, Ge, Le]: + Fit(model, constraints=[relation(x, y)]) + + fit = Fit(model, constraints=[Le(x, y)]) + # Le should be transformed to Ge + assert fit.constraints[0].constraint_type is Ge + + # Redo the standard test as a Le + constraints = [ + Le(- y + 1, 0), # y - 1 >= 0, + Eq(x**3 - y, 0), # x**3 - y == 0, + ] + std_constraints = [ + Ge(y - 1, 0), # y - 1 >= 0, + Eq(x**3 - y, 0), # x**3 - y == 0, + ] + + fit = Fit(-model, constraints=constraints) + std_fit = Fit(-model, constraints=std_constraints) + assert fit.constraints[0].constraint_type == Ge + assert fit.constraints[1].constraint_type == Eq + assert fit.constraints[0].params == [x, y] + assert fit.constraints[1].params == [x, y] + assert fit.constraints[0].jacobian_model.params == [x, y] + assert fit.constraints[1].jacobian_model.params == [x, y] + assert fit.constraints[0].hessian_model.params == [x, y] + assert fit.constraints[1].hessian_model.params == [x, y] + assert fit.constraints[0].__signature__ == fit.constraints[1].__signature__ + fit_result = fit.execute() + std_result = std_fit.execute() + assert fit_result.value(x) == pytest.approx(std_result.value(x)) + assert fit_result.value(y) == pytest.approx(std_result.value(y)) + + +def test_basinhopping_large(): + """ + Test the basinhopping method of scipy.minimize. This is based of scipy's docs + as found here: https://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.optimize.anneal.html + """ + def f1(z, *params): + x, y = z + a, b, c, d, e, f, g, h, i, j, k, l, scale = params + return (a * x ** 2 + b * x * y + c * y ** 2 + d * x + e * y + f) + + def f2(z, *params): + x, y = z + a, b, c, d, e, f, g, h, i, j, k, l, scale = params + return (-g * np.exp(-((x - h) ** 2 + (y - i) ** 2) / scale)) + + def f3(z, *params): + x, y = z + a, b, c, d, e, f, g, h, i, j, k, l, scale = params + return (-j * np.exp(-((x - k) ** 2 + (y - l) ** 2) / scale)) + + def func(z, *params): + x, y = z + a, b, c, d, e, f, g, h, i, j, k, l, scale = params + return f1(z, *params) + f2(z, *params) + f3(z, *params) + + def f_symfit(x1, x2, params): + z = [x1, x2] + return func(z, *params) + + params = (2, 3, 7, 8, 9, 10, 44, -1, 2, 26, 1, -2, 0.5) + + x0 = np.array([2., 2.]) + np.random.seed(555) + res = basinhopping(func, x0, minimizer_kwargs={'args': params}) + + np.random.seed(555) + x1, x2 = parameters('x1, x2', value=x0) + fit = BasinHopping(partial(f_symfit, params=params), [x1, x2]) + fit_result = fit.execute() + + assert res.x[0] == fit_result.value(x1) + assert res.x[1] == fit_result.value(x2) + assert res.fun == fit_result.objective_value + + +def test_basinhopping(): + def func(x): + return np.cos(14.5 * x - 0.3) + (x + 0.2) * x + x0 = [1.] + np.random.seed(555) + res = basinhopping(func, x0, minimizer_kwargs={"method": "BFGS"}, niter=200) + np.random.seed(555) + x, = parameters('x') + fit = BasinHopping(func, [x], local_minimizer=BFGS) + fit_result = fit.execute(niter=200) + # fit_result = fit.execute(minimizer_kwargs={"method": "BFGS"}, niter=200) + + assert res.x == fit_result.value(x) + assert res.fun == fit_result.objective_value + + +def test_basinhopping_2d(): + def func2d(x): + f = np.cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] + 0.2) * x[0] + df = np.zeros(2) + df[0] = -14.5 * np.sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2 + df[1] = 2. * x[1] + 0.2 + return f, df + + def func2d_symfit(x1, x2): + f = np.cos(14.5 * x1 - 0.3) + (x2 + 0.2) * x2 + (x1 + 0.2) * x1 + return f + + def jac2d_symfit(x1, x2): + df = np.zeros(2) + df[0] = -14.5 * np.sin(14.5 * x1 - 0.3) + 2. * x1 + 0.2 + df[1] = 2. * x2 + 0.2 + return df + + np.random.seed(555) + minimizer_kwargs = {'method': 'BFGS', 'jac': True} + x0 = [1.0, 1.0] + res = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs, niter=200) + + np.random.seed(555) + x1, x2 = parameters('x1, x2', value=x0) + + with pytest.raises(TypeError): fit = BasinHopping( func2d_symfit, [x1, x2], - local_minimizer=BFGS(func2d_symfit, [x1, x2], jacobian=jac2d_symfit) + local_minimizer=NelderMead(func2d_symfit, [x1, x2], + jacobian=jac2d_symfit) ) - fit_result = fit.execute(niter=200) - self.assertIsInstance(fit.local_minimizer.jacobian, MinimizeModel) - self.assertIsInstance(fit.local_minimizer.jacobian.model, CallableNumericalModel) - self.assertEqual(res.x[0] / fit_result.value(x1), 1.0) - self.assertEqual(res.x[1] / fit_result.value(x2), 1.0) - self.assertEqual(res.fun, fit_result.objective_value) - - # Now compare with the symbolic equivalent - np.random.seed(555) - model = cos(14.5 * x1 - 0.3) + (x2 + 0.2) * x2 + (x1 + 0.2) * x1 - fit = Fit(model, minimizer=BasinHopping) - fit_result = fit.execute() - self.assertEqual(res.x[0], fit_result.value(x1)) - self.assertEqual(res.x[1], fit_result.value(x2)) - self.assertEqual(res.fun, fit_result.objective_value) - self.assertIsInstance(fit.minimizer.local_minimizer, BFGS) - - # Impose constrains - np.random.seed(555) - model = cos(14.5 * x1 - 0.3) + (x2 + 0.2) * x2 + (x1 + 0.2) * x1 - fit = Fit(model, minimizer=BasinHopping, constraints=[Eq(x1, x2)]) - fit_result = fit.execute() - self.assertEqual(fit_result.value(x1), fit_result.value(x2)) - self.assertIsInstance(fit.minimizer.local_minimizer, SLSQP) - - # Impose bounds - np.random.seed(555) - x1.min = 0.0 - model = cos(14.5 * x1 - 0.3) + (x2 + 0.2) * x2 + (x1 + 0.2) * x1 - fit = Fit(model, minimizer=BasinHopping) - fit_result = fit.execute() - self.assertGreaterEqual(fit_result.value(x1), x1.min) - self.assertIsInstance(fit.minimizer.local_minimizer, LBFGSB) - - -if __name__ == '__main__': - try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() + + fit = BasinHopping( + func2d_symfit, [x1, x2], + local_minimizer=BFGS(func2d_symfit, [x1, x2], jacobian=jac2d_symfit) + ) + fit_result = fit.execute(niter=200) + assert isinstance(fit.local_minimizer.jacobian, MinimizeModel) + assert isinstance(fit.local_minimizer.jacobian.model, CallableNumericalModel) + assert res.x[0] == fit_result.value(x1) + assert res.x[1] == fit_result.value(x2) + assert res.fun == fit_result.objective_value + + # Now compare with the symbolic equivalent + np.random.seed(555) + model = cos(14.5 * x1 - 0.3) + (x2 + 0.2) * x2 + (x1 + 0.2) * x1 + fit = Fit(model, minimizer=BasinHopping) + fit_result = fit.execute() + assert res.x[0] == fit_result.value(x1) + assert res.x[1] == fit_result.value(x2) + assert res.fun == fit_result.objective_value + assert isinstance(fit.minimizer.local_minimizer, BFGS) + + # Impose constrains + np.random.seed(555) + model = cos(14.5 * x1 - 0.3) + (x2 + 0.2) * x2 + (x1 + 0.2) * x1 + fit = Fit(model, minimizer=BasinHopping, constraints=[Eq(x1, x2)]) + fit_result = fit.execute() + assert fit_result.value(x1) == fit_result.value(x2) + assert isinstance(fit.minimizer.local_minimizer, SLSQP) + + # Impose bounds + np.random.seed(555) + x1.min = 0.0 + model = cos(14.5 * x1 - 0.3) + (x2 + 0.2) * x2 + (x1 + 0.2) * x1 + fit = Fit(model, minimizer=BasinHopping) + fit_result = fit.execute() + assert fit_result.value(x1) >= x1.min + assert isinstance(fit.minimizer.local_minimizer, LBFGSB) diff --git a/tests/test_minimizers.py b/tests/test_minimizers.py index 70409628..b90bd837 100644 --- a/tests/test_minimizers.py +++ b/tests/test_minimizers.py @@ -1,6 +1,5 @@ from __future__ import division, print_function -import unittest -import sys +import pytest import warnings import numpy as np @@ -8,25 +7,30 @@ import multiprocessing as mp from symfit import ( - Variable, Parameter, Eq, Ge, Le, Lt, Gt, Ne, parameters, ModelError, Fit, + Variable, Parameter, Eq, Ge, parameters, Fit, Model, FitResults, variables, CallableNumericalModel ) from symfit.core.minimizers import * from symfit.core.objectives import LeastSquares, MinimizeModel, VectorLeastSquares # Defined at the global level because local functions can't be pickled. + + def f(x, a, b): return a * x + b + def chi_squared(x, y, a, b, sum=True): if sum: return np.sum((y - f(x, a, b)) ** 2) else: return (y - f(x, a, b)) ** 2 + def worker(fit_obj): return fit_obj.execute() + class SqrtLeastSquares(LeastSquares): """ Minimizes the square root of LeastSquares. This seems to help SLSQP in @@ -37,6 +41,7 @@ class SqrtLeastSquares(LeastSquares): # objective. This lightweight version is given without proper testing # because only the call is relevant, and this makes our multiprocessing test # work. + def __call__(self, *args, **kwargs): chi2 = super(SqrtLeastSquares, self).__call__(*args, **kwargs) return np.sqrt(chi2) @@ -52,8 +57,8 @@ def eval_hessian(self, *args, **kwargs): chi2 = super(SqrtLeastSquares, self).__call__(*args, **kwargs) chi2_jac = super(SqrtLeastSquares, self).eval_jacobian(*args, **kwargs) chi2_hess = super(SqrtLeastSquares, self).eval_hessian(*args, **kwargs) - return - 0.5 * (1 / chi2) * np.outer(sqrt_chi2_jac, chi2_jac) \ - + 0.5 * (1 / sqrt_chi2) * chi2_hess + return - 0.5 * (1 / chi2) * np.outer(sqrt_chi2_jac, chi2_jac) + 0.5 * (1 / sqrt_chi2) * chi2_hess + def subclasses(base, leaves_only=True): """ @@ -73,346 +78,314 @@ def subclasses(base, leaves_only=True): all_subs.update(sub_subs) return all_subs -class TestMinimize(unittest.TestCase): - @classmethod - def setUpClass(cls): - np.random.seed(0) - def test_custom_objective(self): - """ - Compare the result of a custom objective with the symbolic result. - :return: - """ - # Create test data - xdata = np.linspace(0, 100, 25) # From 0 to 100 in 100 steps - a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) - b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) - ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 - - # Normal symbolic fit - a = Parameter('a', value=0, min=0.0, max=1000) - b = Parameter('b', value=0, min=0.0, max=1000) - x = Variable('x') - y = Variable('y') - model = a * x + b - - fit = Fit(model, xdata, ydata, minimizer=BFGS) - fit_result = fit.execute() - - def f(x, a, b): - return a * x + b - - def chi_squared(a, b): - return np.sum((ydata - f(xdata, a, b))**2) - - with warnings.catch_warnings(record=True) as w: - # Should no longer raise warnings, because internally we practice - # what we preach. - warnings.simplefilter("always") - fit_custom = BFGS(chi_squared, [a, b]) - self.assertTrue(len(w) == 0) - - fit_custom_result = fit_custom.execute() - - self.assertIsInstance(fit_custom_result, FitResults) - self.assertAlmostEqual(fit_custom_result.value(a) / fit_result.value(a), 1.0, 5) - self.assertAlmostEqual(fit_custom_result.value(b) / fit_result.value(b), 1.0, 4) - - # New preferred usage, multi component friendly. - with self.assertRaises(TypeError): - callable_model = CallableNumericalModel( - chi_squared, - connectivity_mapping={y: {a, b}} - ) +def setup_function(): + np.random.seed(0) + + +def test_custom_objective(recwarn): + """ + Compare the result of a custom objective with the symbolic result. + :return: + """ + # Create test data + xdata = np.linspace(0, 100, 25) # From 0 to 100 in 100 steps + a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) + b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) + ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 + + # Normal symbolic fit + a = Parameter('a', value=0, min=0.0, max=1000) + b = Parameter('b', value=0, min=0.0, max=1000) + x = Variable('x') + y = Variable('y') + model = {y: a * x + b} + + fit = Fit(model, xdata, ydata, minimizer=BFGS) + fit_result = fit.execute() + + def f(x, a, b): + return a * x + b + + def chi_squared(a, b): + return np.sum((ydata - f(xdata, a, b))**2) + + # Should no longer raise warnings, because internally we practice + # what we preach. + fit_custom = BFGS(chi_squared, [a, b]) + assert len(recwarn) == 0 + + fit_custom_result = fit_custom.execute() + + assert isinstance(fit_custom_result, FitResults) + assert fit_custom_result.value(a) == pytest.approx(fit_result.value(a), 1e-5) + assert fit_custom_result.value(b) == pytest.approx(fit_result.value(b), 1e-4) + + # New preferred usage, multi component friendly. + with pytest.raises(TypeError): callable_model = CallableNumericalModel( - {y: chi_squared}, + chi_squared, connectivity_mapping={y: {a, b}} ) - self.assertEqual(callable_model.params, [a, b]) - self.assertEqual(callable_model.independent_vars, []) - self.assertEqual(callable_model.dependent_vars, [y]) - self.assertEqual(callable_model.interdependent_vars, []) - self.assertEqual(callable_model.connectivity_mapping, {y: {a, b}}) - fit_custom = BFGS(callable_model, [a, b]) - fit_custom_result = fit_custom.execute() - - self.assertIsInstance(fit_custom_result, FitResults) - self.assertAlmostEqual(fit_custom_result.value(a) / fit_result.value(a), 1.0, 5) - self.assertAlmostEqual(fit_custom_result.value(b) / fit_result.value(b), 1.0, 4) - - def test_custom_parameter_names(self): + callable_model = CallableNumericalModel( + {y: chi_squared}, + connectivity_mapping={y: {a, b}} + ) + assert callable_model.params == [a, b] + assert callable_model.independent_vars == [] + assert callable_model.dependent_vars == [y] + assert callable_model.interdependent_vars == [] + assert callable_model.connectivity_mapping == {y: {a, b}} + fit_custom = BFGS(callable_model, [a, b]) + fit_custom_result = fit_custom.execute() + + assert isinstance(fit_custom_result, FitResults) + assert fit_custom_result.value(a) == pytest.approx(fit_result.value(a), 1e-5) + assert fit_custom_result.value(b) == pytest.approx(fit_result.value(b), 1e-4) + + +def test_custom_parameter_names(): + """ + For cusom objective functions you still have to provide a list of Parameter + objects to use with the same name as the keyword arguments to your function. + """ + a = Parameter() + c = Parameter() + + def chi_squared(a, b): """ - For cusom objective functions you still have to provide a list of Parameter - objects to use with the same name as the keyword arguments to your function. + Dummy function with different keyword argument names """ - a = Parameter() - c = Parameter() + pass - def chi_squared(a, b): - """ - Dummy function with different keyword argument names - """ - pass + fit_custom = BFGS(chi_squared, [a, c]) + with pytest.raises(TypeError): + fit_custom.execute() - fit_custom = BFGS(chi_squared, [a, c]) - with self.assertRaises(TypeError): - fit_custom.execute() - def test_powell(self): - """ - Powell with a single parameter gave an error because a 0-d array was - returned by scipy. So no error here is winning. - """ - x, y = variables('x, y') - a, b = parameters('a, b') - b.fixed = True - - model = Model({y: a * x + b}) - xdata = np.linspace(0, 10) - ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) - fit = Fit({y: a * x + b}, x=xdata, y=ydata, minimizer=Powell) - fit_result = fit.execute() - self.assertAlmostEqual(fit_result.value(b), 1.0) - - def test_jac_hess(self): - """ - Make sure both the Jacobian and Hessian are passed to the minimizer. - """ - x, y = variables('x, y') - a, b = parameters('a, b') - b.fixed = True - - model = Model({y: a * x + b}) - xdata = np.linspace(0, 10) - ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) - fit = Fit({y: a * x + b}, x=xdata, y=ydata, minimizer=TrustConstr) - self.assertIsInstance(fit.minimizer.objective, LeastSquares) - self.assertIsInstance(fit.minimizer.jacobian.__self__, LeastSquares) - self.assertIsInstance(fit.minimizer.hessian.__self__, LeastSquares) - - fit_result = fit.execute() - self.assertAlmostEqual(fit_result.value(b), 1.0) - - def test_pickle(self): - """ - Test the picklability of the different minimizers. - """ - # Create test data - xdata = np.linspace(0, 100, 100) # From 0 to 100 in 100 steps - a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) - b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) - ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 - - # Normal symbolic fit - a = Parameter('a', value=0, min=0.0, max=1000) - b = Parameter('b', value=0, min=0.0, max=1000) - x, y = variables('x, y') - - # Make a set of all ScipyMinimizers, and add a chained minimizer. - scipy_minimizers = list(subclasses(ScipyMinimize)) - chained_minimizer = (DifferentialEvolution, BFGS) - scipy_minimizers.append(chained_minimizer) - constrained_minimizers = subclasses(ScipyConstrainedMinimize) - # Test for all of them if they can be pickled. - for minimizer in scipy_minimizers: - if minimizer in constrained_minimizers: - constraints = [Ge(b, a)] - else: - constraints = [] - model = CallableNumericalModel( - {y: f}, - independent_vars=[x], params=[a, b] - ) - fit = Fit(model, x=xdata, y=ydata, minimizer=minimizer, - constraints=constraints) - if minimizer is not MINPACK: - self.assertIsInstance(fit.objective, LeastSquares) - self.assertIsInstance(fit.minimizer.objective, LeastSquares) - else: - self.assertIsInstance(fit.objective, VectorLeastSquares) - self.assertIsInstance(fit.minimizer.objective, VectorLeastSquares) - - fit = fit.minimizer # Just check if the minimizer pickles - dump = pickle.dumps(fit) - pickled_fit = pickle.loads(dump) - problematic_attr = [ - 'objective', '_pickle_kwargs', 'wrapped_objective', - 'constraints', 'wrapped_constraints', - 'local_minimizer', 'minimizers' - ] - - for key, value in fit.__dict__.items(): - new_value = pickled_fit.__dict__[key] - try: - self.assertEqual(value, new_value) - except AssertionError as err: - if key in problematic_attr: - # These attr are new instances, and therefore do not - # pass an equality test. All we can do is see if they - # are at least the same type. - if isinstance(value, (list, tuple)): - for val1, val2 in zip(value, new_value): - self.assertTrue(isinstance(val1, val2.__class__)) - if key == 'constraints': - self.assertEqual(val1.model.constraint_type, - val2.model.constraint_type) - self.assertEqual( - list(val1.model.model_dict.values())[0], - list(val2.model.model_dict.values())[0] - ) - self.assertEqual(val1.model.independent_vars, - val2.model.independent_vars) - self.assertEqual(val1.model.params, - val2.model.params) - self.assertEqual(val1.model.__signature__, - val2.model.__signature__) - elif key == 'wrapped_constraints': - if isinstance(val1, dict): - self.assertEqual(val1['type'], - val2['type']) - self.assertEqual(set(val1.keys()), - set(val2.keys())) - elif isinstance(val1, NonlinearConstraint): - # For trust-ncg we manually check if - # their dicts are equal, because no - # __eq__ is implemented on - # NonLinearConstraint - self.assertEqual(len(val1.__dict__), - len(val2.__dict__)) - for key in val1.__dict__: - try: - self.assertEqual( - val1.__dict__[key], - val2.__dict__[key] - ) - except AssertionError: - self.assertIsInstance( - val1.__dict__[key], - val2.__dict__[key].__class__ - ) - else: - raise NotImplementedError( - 'No such constraint type is known.' - ) - elif key == '_pickle_kwargs': - FitResults._array_safe_dict_eq(value, new_value) - else: - self.assertTrue(isinstance(new_value, value.__class__)) - else: - raise err - self.assertEqual(set(fit.__dict__.keys()), - set(pickled_fit.__dict__.keys())) - - # Test if we converge to the same result. - np.random.seed(2) - res_before = fit.execute() - np.random.seed(2) - res_after = pickled_fit.execute() - self.assertTrue(FitResults._array_safe_dict_eq(res_before.__dict__, - res_after.__dict__)) - - def test_multiprocessing(self): - """ - To make sure pickling truly works, try multiprocessing. No news is good - news. - """ - np.random.seed(2) - x = np.arange(100, dtype=float) - a_values = np.array([1, 2, 3]) - np.random.shuffle(a_values) - - def gen_fit_objs(x, a, minimizer): - """Generates linear fits with different a parameter values.""" - for a_i in a: - a_par = Parameter('a', 4.0, min=0.0, max=20) - b_par = Parameter('b', 1.2, min=0.0, max=2) - x_var = Variable('x') - y_var = Variable('y') - - model = CallableNumericalModel({y_var: f}, [x_var], [a_par, b_par]) - - fit = Fit( - model, x, a_i * x + 1, minimizer=minimizer, - objective=SqrtLeastSquares if minimizer is not MINPACK else VectorLeastSquares - ) - yield fit - - minimizers = subclasses(ScipyMinimize) - chained_minimizer = (DifferentialEvolution, BFGS) - minimizers.add(chained_minimizer) - - pool = mp.Pool() - for minimizer in minimizers: - results = pool.map(worker, gen_fit_objs(x, a_values, minimizer)) - a_results = [res.params['a'] for res in results] - minimizer_results = [res.minimizer for res in results] - # Check the results - np.testing.assert_almost_equal(a_values, a_results, decimal=2) - for result in results: - # Check that we are actually using the right minimizer - if isinstance(result.minimizer, ChainedMinimizer): - for used, target in zip(result.minimizer.minimizers, minimizer): - self.assertIsInstance(used, target) - else: - self.assertIsInstance(result.minimizer, minimizer) - self.assertIsInstance(result.iterations, int) +def test_powell(): + """ + Powell with a single parameter gave an error because a 0-d array was + returned by scipy. So no error here is winning. + """ + x, y = variables('x, y') + a, b = parameters('a, b') + b.fixed = True - def test_minimizer_constraint_compatibility(self): - """ - Test if #156 has been solved, and test all the other constraint styles. - """ - x, y, z = variables('x, y, z') - a, b, c = parameters('a, b, c') - b.fixed = True - - model = Model({z: a * x**2 - b * y**2 + c}) - # Generate data, z has to be scalar for MinimizeModel to be happy - xdata = 3 #np.linspace(0, 10) - ydata = 5 # np.linspace(0, 10) - zdata = model(a=2, b=3, c=5, x=xdata, y=ydata).z - data_dict = {x: xdata, y: ydata, z: zdata} - - # Equivalent ways of defining the same constraint - constraint_model = Model.as_constraint(a - c, model, constraint_type=Eq) - constraint_model.params = model.params - constraints = [ - Eq(a, c), - MinimizeModel(constraint_model, data=data_dict), - constraint_model + model = Model({y: a * x + b}) + xdata = np.linspace(0, 10) + ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) + fit = Fit({y: a * x + b}, x=xdata, y=ydata, minimizer=Powell) + fit_result = fit.execute() + assert fit_result.value(b) == pytest.approx(1.0) + + +def test_jac_hess(): + """ + Make sure both the Jacobian and Hessian are passed to the minimizer. + """ + x, y = variables('x, y') + a, b = parameters('a, b') + b.fixed = True + + model = Model({y: a * x + b}) + xdata = np.linspace(0, 10) + ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) + fit = Fit({y: a * x + b}, x=xdata, y=ydata, minimizer=TrustConstr) + assert isinstance(fit.minimizer.objective, LeastSquares) + assert isinstance(fit.minimizer.jacobian.__self__, LeastSquares) + assert isinstance(fit.minimizer.hessian.__self__, LeastSquares) + + fit_result = fit.execute() + assert fit_result.value(b) == pytest.approx(1.0) + + +def test_pickle(): + """ + Test the picklability of the different minimizers. + """ + # Create test data + xdata = np.linspace(0, 100, 100) # From 0 to 100 in 100 steps + a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) + b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) + ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 + + # Normal symbolic fit + a = Parameter('a', value=0, min=0.0, max=1000) + b = Parameter('b', value=0, min=0.0, max=1000) + x, y = variables('x, y') + + # Make a set of all ScipyMinimizers, and add a chained minimizer. + scipy_minimizers = list(subclasses(ScipyMinimize)) + chained_minimizer = (DifferentialEvolution, BFGS) + scipy_minimizers.append(chained_minimizer) + constrained_minimizers = subclasses(ScipyConstrainedMinimize) + # Test for all of them if they can be pickled. + for minimizer in scipy_minimizers: + if minimizer in constrained_minimizers: + constraints = [Ge(b, a)] + else: + constraints = [] + model = CallableNumericalModel( + {y: f}, + independent_vars=[x], params=[a, b] + ) + fit = Fit(model, x=xdata, y=ydata, minimizer=minimizer, + constraints=constraints) + if minimizer is not MINPACK: + assert isinstance(fit.objective, LeastSquares) + assert isinstance(fit.minimizer.objective, LeastSquares) + else: + assert isinstance(fit.objective, VectorLeastSquares) + assert isinstance(fit.minimizer.objective, VectorLeastSquares) + + fit = fit.minimizer # Just check if the minimizer pickles + dump = pickle.dumps(fit) + pickled_fit = pickle.loads(dump) + problematic_attr = [ + 'objective', '_pickle_kwargs', 'wrapped_objective', + 'constraints', 'wrapped_constraints', + 'local_minimizer', 'minimizers' ] - objective = MinimizeModel(model, data=data_dict) - for constraint in constraints: - fit = SLSQP(objective, parameters=[a, b, c], - constraints=[constraint]) - wrapped_constr = fit.wrapped_constraints[0]['fun'].model - self.assertIsInstance(wrapped_constr, Model) - self.assertEqual(wrapped_constr.params, model.params) - self.assertEqual(wrapped_constr.jacobian_model.params, model.params) - self.assertEqual(wrapped_constr.hessian_model.params, model.params) - # Set the data for the dependent var of the constraint to None - # Normally this is handled by Fit because here we interact with the - # Minimizer directly, it is up to us. - constraint_var = fit.wrapped_constraints[0]['fun'].model.dependent_vars[0] - objective.data[constraint_var] = None - fit.execute() - - # No scipy style dicts allowed. - with self.assertRaises(TypeError): - fit = SLSQP(MinimizeModel(model, data=data_dict), - parameters=[a, b, c], - constraints=[ - {'type': 'eq', 'fun': lambda a, b, c: a - c} - ] + for key, value in fit.__dict__.items(): + new_value = pickled_fit.__dict__[key] + try: + assert value == new_value + except AssertionError as err: + if key not in problematic_attr: + raise err + # These attr are new instances, and therefore do not + # pass an equality test. All we can do is see if they + # are at least the same type. + if isinstance(value, (list, tuple)): + for val1, val2 in zip(value, new_value): + assert isinstance(val1, val2.__class__) + if key == 'constraints': + assert val1.model.constraint_type == val2.model.constraint_type + assert list(val1.model.model_dict.values())[0] == list(val2.model.model_dict.values())[0] + assert val1.model.independent_vars == val2.model.independent_vars + assert val1.model.params == val2.model.params + assert val1.model.__signature__ == val2.model.__signature__ + elif key == 'wrapped_constraints': + if isinstance(val1, dict): + assert val1['type'] == val2['type'] + assert set(val1.keys()) == set(val2.keys()) + elif isinstance(val1, NonlinearConstraint): + # For trust-ncg we manually check if + # their dicts are equal, because no + # __eq__ is implemented on + # NonLinearConstraint + assert len(val1.__dict__) == len(val2.__dict__) + for key in val1.__dict__: + try: + assert val1.__dict__[key] == val2.__dict__[key] + except AssertionError: + assert isinstance(val1.__dict__[key], val2.__dict__[key].__class__) + else: + raise NotImplementedError('No such constraint type is known.') + elif key == '_pickle_kwargs': + FitResults._array_safe_dict_eq(value, new_value) + else: + assert isinstance(new_value, value.__class__) + assert set(fit.__dict__.keys()) == set(pickled_fit.__dict__.keys()) + + # Test if we converge to the same result. + np.random.seed(2) + res_before = fit.execute() + np.random.seed(2) + res_after = pickled_fit.execute() + assert FitResults._array_safe_dict_eq(res_before.__dict__, res_after.__dict__) + + +def test_multiprocessing(): + """ + To make sure pickling truly works, try multiprocessing. No news is good + news. + """ + np.random.seed(2) + x = np.arange(100, dtype=float) + a_values = np.array([1, 2, 3]) + np.random.shuffle(a_values) + + def gen_fit_objs(x, a, minimizer): + """Generates linear fits with different a parameter values.""" + for a_i in a: + a_par = Parameter('a', 4.0, min=0.0, max=20) + b_par = Parameter('b', 1.2, min=0.0, max=2) + x_var = Variable('x') + y_var = Variable('y') + + model = CallableNumericalModel({y_var: f}, [x_var], [a_par, b_par]) + + fit = Fit( + model, x, a_i * x + 1, minimizer=minimizer, + objective=SqrtLeastSquares if minimizer is not MINPACK else VectorLeastSquares ) + yield fit + + minimizers = subclasses(ScipyMinimize) + chained_minimizer = (DifferentialEvolution, BFGS) + minimizers.add(chained_minimizer) + + pool = mp.Pool() + for minimizer in minimizers: + results = pool.map(worker, gen_fit_objs(x, a_values, minimizer)) + a_results = [res.params['a'] for res in results] + # Check the results + assert a_values == pytest.approx(a_results, 1e-2) + for result in results: + # Check that we are actually using the right minimizer + if isinstance(result.minimizer, ChainedMinimizer): + for used, target in zip(result.minimizer.minimizers, minimizer): + assert isinstance(used, target) + else: + assert isinstance(result.minimizer, minimizer) + assert isinstance(result.iterations, int) + -if __name__ == '__main__': - try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() +def test_minimizer_constraint_compatibility(): + """ + Test if #156 has been solved, and test all the other constraint styles. + """ + x, y, z = variables('x, y, z') + a, b, c = parameters('a, b, c') + b.fixed = True + + model = Model({z: a * x**2 - b * y**2 + c}) + # Generate data, z has to be scalar for MinimizeModel to be happy + xdata = 3 # np.linspace(0, 10) + ydata = 5 # np.linspace(0, 10) + zdata = model(a=2, b=3, c=5, x=xdata, y=ydata).z + data_dict = {x: xdata, y: ydata, z: zdata} + + # Equivalent ways of defining the same constraint + constraint_model = Model.as_constraint(a - c, model, constraint_type=Eq) + constraint_model.params = model.params + constraints = [ + Eq(a, c), + MinimizeModel(constraint_model, data=data_dict), + constraint_model + ] + + objective = MinimizeModel(model, data=data_dict) + for constraint in constraints: + fit = SLSQP(objective, parameters=[a, b, c], + constraints=[constraint]) + wrapped_constr = fit.wrapped_constraints[0]['fun'].model + assert isinstance(wrapped_constr, Model) + assert wrapped_constr.params == model.params + assert wrapped_constr.jacobian_model.params == model.params + assert wrapped_constr.hessian_model.params == model.params + # Set the data for the dependent var of the constraint to None + # Normally this is handled by Fit because here we interact with the + # Minimizer directly, it is up to us. + constraint_var = fit.wrapped_constraints[0]['fun'].model.dependent_vars[0] + objective.data[constraint_var] = None + fit.execute() + + # No scipy style dicts allowed. + with pytest.raises(TypeError): + fit = SLSQP(MinimizeModel(model, data=data_dict), + parameters=[a, b, c], + constraints=[{'type': 'eq', 'fun': lambda a, b, c: a - c}]) diff --git a/tests/test_model.py b/tests/test_model.py index e8a1b8d1..2f60bf9d 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,5 +1,5 @@ from __future__ import division, print_function -import unittest +import pytest from collections import OrderedDict import pickle try: @@ -18,448 +18,431 @@ jacobian_from_model, hessian_from_model, ModelError, ModelOutput ) -class TestModel(unittest.TestCase): + +""" +Tests for Model objects. +""" + + +def test_model_as_dict(): + x, y_1, y_2 = variables('x, y_1, y_2') + a, b = parameters('a, b') + + model_dict = OrderedDict([(y_1, a * x**2), (y_2, 2 * x * b)]) + model = Model(model_dict) + + assert model[y_1] is model_dict[y_1] + assert model[y_2] is model_dict[y_2] + assert len(model) == len(model_dict) + assert model.items() == model_dict.items() + assert model.keys() == model_dict.keys() + assert list(model.values()) == list(model_dict.values()) + assert y_1 in model + assert not model[y_1] in model + + +def test_order(): """ - Tests for Model objects. + The model has to behave like an OrderedDict. This is of the utmost importance! """ - def test_model_as_dict(self): - x, y_1, y_2 = variables('x, y_1, y_2') - a, b = parameters('a, b') + x, y_1, y_2 = variables('x, y_1, y_2') + a, b = parameters('a, b') - model_dict = OrderedDict([(y_1, a * x**2), (y_2, 2 * x * b)]) - model = Model(model_dict) + model_dict = {y_2: a * x**2, y_1: 2 * x * b} + model = Model(model_dict) - self.assertEqual(id(model[y_1]), id(model_dict[y_1])) - self.assertEqual(id(model[y_2]), id(model_dict[y_2])) - self.assertEqual(len(model), len(model_dict)) - self.assertEqual(model.items(), model_dict.items()) - self.assertEqual(model.keys(), model_dict.keys()) - self.assertEqual(list(model.values()), list(model_dict.values())) - self.assertTrue(y_1 in model) - self.assertFalse(model[y_1] in model) - - def test_order(self): - """ - The model has to behave like an OrderedDict. This is of the utmost importance! - """ - x, y_1, y_2 = variables('x, y_1, y_2') - a, b = parameters('a, b') - - model_dict = {y_2: a * x**2, y_1: 2 * x * b} - model = Model(model_dict) + assert model.dependent_vars == list(model.keys()) - self.assertEqual(model.dependent_vars, list(model.keys())) +def test_neg(): + """ + Test negation of all model types + """ + x, y_1, y_2 = variables('x, y_1, y_2') + a, b = parameters('a, b') + + model_dict = {y_2: a * x ** 2, y_1: 2 * x * b} + model = Model(model_dict) + + model_neg = - model + for key in model: + assert model[key] == - model_neg[key] + + # Constraints + constraint = Model.as_constraint(Eq(a * x, 2), model) + + constraint_neg = - constraint + # for key in constraint: + assert constraint[constraint.dependent_vars[0]] == - constraint_neg[constraint_neg.dependent_vars[0]] + + # ODEModel + odemodel = ODEModel({D(y_1, x): a * x}, initial={a: 1.0}) + + odemodel_neg = - odemodel + for key in odemodel: + assert odemodel[key] == - odemodel_neg[key] + + # For models with interdependency, negation should only change the + # dependent components. + model_dict = {x: y_1**2, y_1: a * y_2 + b} + model = Model(model_dict) + + model_neg = - model + for key in model: + if key in model.dependent_vars: + assert model[key] == - model_neg[key] + elif key in model.interdependent_vars: + assert model[key] == model_neg[key] + else: + pytest.fail() + + +def test_CallableNumericalModel(): + x, y, z = variables('x, y, z') + a, b = parameters('a, b') + + model = CallableModel({y: a * x + b}) + numerical_model = CallableNumericalModel( + {y: lambda x, a, b: a * x + b}, [x], [a, b] + ) + assert model.__signature__ == numerical_model.__signature__ + + xdata = np.linspace(0, 10) + ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) + + symbolic_answer = np.array(model(x=xdata, a=5.5, b=15.0)) + numerical_answer = np.array(numerical_model(x=xdata, a=5.5, b=15.0)) + + assert numerical_answer == pytest.approx(symbolic_answer) + + faulty_model = CallableNumericalModel({y: lambda x, a, b: a * x + b}, + [], [a, b]) + assert not model.__signature__ == faulty_model.__signature__ + with pytest.raises(TypeError): + # This is an incorrect signature, even though the lambda function is + # correct. Should fail. + faulty_model(xdata, 5.5, 15.0) + + # Faulty model whose components do not all accept all of the args + faulty_model = CallableNumericalModel( + {y: lambda x, a, b: a * x + b, z: lambda x, a: x**a}, [x], [a, b] + ) + assert model.__signature__ == faulty_model.__signature__ + + with pytest.raises(TypeError): + # Lambda got an unexpected keyword 'b' + faulty_model(xdata, 5.5, 15.0) + + # Faulty model with a wrongly named argument + faulty_model = CallableNumericalModel( + {y: lambda x, a, c=5: a * x + c}, [x], [a, b] + ) + assert model.__signature__ == faulty_model.__signature__ + + with pytest.raises(TypeError): + # Lambda got an unexpected keyword 'b' + faulty_model(xdata, 5.5, 15.0) + + # Correct version of the previous model + numerical_model = CallableNumericalModel( + {y: lambda x, a, b: a * x + b, z: lambda x, a: x ** a}, + connectivity_mapping={y: {a, b, x}, z: {x, a}} + ) + # Correct version of the previous model + mixed_model = CallableNumericalModel( + {y: lambda x, a, b: a * x + b, z: x ** a}, [x], + [a, b] + ) + + numberical_answer = np.array(numerical_model(x=xdata, a=5.5, b=15.0)) + mixed_answer = np.array(mixed_model(x=xdata, a=5.5, b=15.0)) + assert numberical_answer == pytest.approx(mixed_answer) + + zdata = mixed_model(x=xdata, a=5.5, b=15.0).z + np.random.normal(0, 1) + + # Check if the fits are the same + fit = Fit(mixed_model, x=xdata, y=ydata, z=zdata) + mixed_result = fit.execute() + fit = Fit(numerical_model, x=xdata, y=ydata, z=zdata) + numerical_result = fit.execute() + for param in [a, b]: + assert mixed_result.value(param) == pytest.approx(numerical_result.value(param)) + if mixed_result.stdev(param) is not None and numerical_result.stdev(param) is not None: + assert mixed_result.stdev(param) == pytest.approx(numerical_result.stdev(param)) + else: + assert mixed_result.stdev(param) is None and numerical_result.stdev(param) is None + assert mixed_result.r_squared == pytest.approx(numerical_result.r_squared) + + # Test if the constrained syntax is supported + fit = Fit(numerical_model, x=xdata, y=ydata, + z=zdata, constraints=[Eq(a, b)]) + constrained_result = fit.execute() + assert constrained_result.value(a) == pytest.approx(constrained_result.value(b)) + + +def test_CallableNumericalModel_infer_connectivity(): + """ + When a CallableNumericalModel is initiated with symbolical and + non-symbolical components, only the connectivity mapping for + non-symbolical part has to be provided. + """ + x, y, z = variables('x, y, z') + a, b = parameters('a, b') + model_dict = {z: lambda y, a, b: a * y + b, + y: x ** a} + mixed_model = CallableNumericalModel( + model_dict, connectivity_mapping={z: {y, a, b}} + ) + assert mixed_model.connectivity_mapping == {z: {y, a, b}, y: {x, a}} - def test_neg(self): - """ - Test negation of all model types - """ - x, y_1, y_2 = variables('x, y_1, y_2') - a, b = parameters('a, b') - model_dict = {y_2: a * x ** 2, y_1: 2 * x * b} - model = Model(model_dict) +def test_CallableNumericalModel2D(): + """ + Apply a CallableNumericalModel to 2D data, to see if it is + agnostic to data shape. + """ + shape = (30, 40) - model_neg = - model - for key in model: - self.assertEqual(model[key], - model_neg[key]) + def function(a, b): + out = np.ones(shape) * a + out[15:, :] += b + return out - # Constraints - constraint = Model.as_constraint(Eq(a * x, 2), model) + a, b = parameters('a, b') + y, = variables('y') - constraint_neg = - constraint - # for key in constraint: - self.assertEqual(constraint[constraint.dependent_vars[0]], - constraint_neg[constraint_neg.dependent_vars[0]]) + model = CallableNumericalModel({y: function}, [], [a, b]) + data = 15 * np.ones(shape) + data[15:, :] += 20 - # ODEModel - odemodel = ODEModel({D(y_1, x): a * x}, initial={a: 1.0}) + fit = Fit(model, y=data) + fit_result = fit.execute() + assert fit_result.value(a) == pytest.approx(15) + assert fit_result.value(b) == pytest.approx(20) - odemodel_neg = - odemodel - for key in odemodel: - self.assertEqual(odemodel[key], - odemodel_neg[key]) + def flattened_function(a, b): + out = np.ones(shape) * a + out[15:, :] += b + return out.flatten() - # For models with interdependency, negation should only change the - # dependent components. - model_dict = {x: y_1**2, y_1: a * y_2 + b} - model = Model(model_dict) + model = CallableNumericalModel({y: flattened_function}, [], [a, b]) + data = 15 * np.ones(shape) + data[15:, :] += 20 + data = data.flatten() - model_neg = - model - for key in model: - if key in model.dependent_vars: - self.assertEqual(model[key], - model_neg[key]) - elif key in model.interdependent_vars: - self.assertEqual(model[key], model_neg[key]) - else: - raise Exception('There should be no such variable') - - - def test_CallableNumericalModel(self): - x, y, z = variables('x, y, z') - a, b = parameters('a, b') - - model = CallableModel({y: a * x + b}) - numerical_model = CallableNumericalModel( - {y: lambda x, a, b: a * x + b}, [x], [a, b] - ) - self.assertEqual(model.__signature__, numerical_model.__signature__) - - xdata = np.linspace(0, 10) - ydata = model(x=xdata, a=5.5, b=15.0).y + np.random.normal(0, 1) - np.testing.assert_almost_equal( - model(x=xdata, a=5.5, b=15.0), - numerical_model(x=xdata, a=5.5, b=15.0), - ) - - faulty_model = CallableNumericalModel({y: lambda x, a, b: a * x + b}, - [], [a, b]) - self.assertNotEqual(model.__signature__, faulty_model.__signature__) - with self.assertRaises(TypeError): - # This is an incorrect signature, even though the lambda function is - # correct. Should fail. - faulty_model(xdata, 5.5, 15.0) - - # Faulty model whose components do not all accept all of the args - faulty_model = CallableNumericalModel( - {y: lambda x, a, b: a * x + b, z: lambda x, a: x**a}, [x], [a, b] - ) - self.assertEqual(model.__signature__, faulty_model.__signature__) - with self.assertRaises(TypeError): - # Lambda got an unexpected keyword 'b' - faulty_model(xdata, 5.5, 15.0) - - # Faulty model with a wrongly named argument - faulty_model = CallableNumericalModel( - {y: lambda x, a, c=5: a * x + c}, [x], [a, b] - ) - self.assertEqual(model.__signature__, faulty_model.__signature__) - with self.assertRaises(TypeError): - # Lambda got an unexpected keyword 'b' - faulty_model(xdata, 5.5, 15.0) - - - # Correct version of the previous model - numerical_model = CallableNumericalModel( - {y: lambda x, a, b: a * x + b, z: lambda x, a: x ** a}, - connectivity_mapping={y: {a, b, x}, z: {x, a}} - ) - # Correct version of the previous model - mixed_model = CallableNumericalModel( - {y: lambda x, a, b: a * x + b, z: x ** a}, [x], - [a, b] - ) - np.testing.assert_almost_equal( - numerical_model(x=xdata, a=5.5, b=15.0), - mixed_model(x=xdata, a=5.5, b=15.0) - ) - zdata = mixed_model(x=xdata, a=5.5, b=15.0).z + np.random.normal(0, 1) - - # Check if the fits are the same - fit = Fit(mixed_model, x=xdata, y=ydata, z=zdata) - mixed_result = fit.execute() - fit = Fit(numerical_model, x=xdata, y=ydata, z=zdata) - numerical_result = fit.execute() - for param in [a, b]: - self.assertAlmostEqual( - mixed_result.value(param), - numerical_result.value(param) - ) - self.assertAlmostEqual( - mixed_result.stdev(param), - numerical_result.stdev(param) - ) - self.assertAlmostEqual(mixed_result.r_squared, numerical_result.r_squared) - - # Test if the constrained syntax is supported - fit = Fit(numerical_model, x=xdata, y=ydata, z=zdata, constraints=[Eq(a, b)]) - constrained_result = fit.execute() - self.assertAlmostEqual(constrained_result.value(a), constrained_result.value(b)) - - def test_CallableNumericalModel_infer_connectivity(self): - """ - When a CallableNumericalModel is initiated with symbolical and - non-symbolical components, only the connectivity mapping for - non-symbolical part has to be provided. - """ - x, y, z = variables('x, y, z') - a, b = parameters('a, b') - model_dict = {z: lambda y, a, b: a * y + b, - y: x ** a} - mixed_model = CallableNumericalModel( - model_dict, connectivity_mapping={z: {y, a, b}} - ) - self.assertEqual(mixed_model.connectivity_mapping, - {z: {y, a, b}, y: {x, a}}) - - - def test_CallableNumericalModel2D(self): - """ - Apply a CallableNumericalModel to 2D data, to see if it is - agnostic to data shape. - """ - shape = (30, 40) - - def function(a, b): - out = np.ones(shape) * a - out[15:, :] += b - return out - - a, b = parameters('a, b') - y, = variables('y') - - model = CallableNumericalModel({y: function}, [], [a, b]) - data = 15 * np.ones(shape) - data[15:, :] += 20 - - fit = Fit(model, y=data) - fit_result = fit.execute() - self.assertAlmostEqual(fit_result.value(a), 15) - self.assertAlmostEqual(fit_result.value(b), 20) - - def flattened_function(a, b): - out = np.ones(shape) * a - out[15:, :] += b - return out.flatten() - - model = CallableNumericalModel({y: flattened_function}, [], [a, b]) - data = 15 * np.ones(shape) - data[15:, :] += 20 - data = data.flatten() - - fit = Fit(model, y=data) - flat_result = fit.execute() - - self.assertAlmostEqual(fit_result.value(a), flat_result.value(a)) - self.assertAlmostEqual(fit_result.value(b), flat_result.value(b)) - self.assertAlmostEqual(fit_result.stdev(a), flat_result.stdev(a)) - self.assertAlmostEqual(fit_result.stdev(b), flat_result.stdev(b)) - self.assertAlmostEqual(fit_result.r_squared, flat_result.r_squared) - - def test_pickle(self): - """ - Make sure models can be pickled are preserved when pickling - """ - a, b = parameters('a, b') - x, y = variables('x, y') - exact_model = Model({y: a * x ** b}) - constraint = Model.as_constraint(Eq(a, b), exact_model) - num_model = CallableNumericalModel( - {y: a * x ** b}, independent_vars=[x], params=[a, b] - ) - connected_num_model = CallableNumericalModel( - {y: a * x ** b}, connectivity_mapping={y: {x, a, b}} - ) - # Test if lsoda args and kwargs are pickled too - ode_model = ODEModel({D(y, x): a * x + b}, {x: 0.0}, 3, 4, some_kwarg=True) - - models = [exact_model, constraint, num_model, ode_model, - connected_num_model] - for model in models: - new_model = pickle.loads(pickle.dumps(model)) - # Compare signatures - self.assertEqual(model.__signature__, new_model.__signature__) - # Trigger the cached vars because we compare `__dict__` s - model.vars - new_model.vars - # Explicitly make sure the connectivity mapping is identical. - self.assertEqual(model.connectivity_mapping, - new_model.connectivity_mapping) - if not isinstance(model, ODEModel): - model.function_dict - model.vars_as_functions - new_model.function_dict - new_model.vars_as_functions - self.assertEqual(model.__dict__, new_model.__dict__) - - def test_MatrixSymbolModel(self): - """ - Test a model which is defined by ModelSymbols, see #194 - """ - N = Symbol('N', integer=True) - M = MatrixSymbol('M', N, N) - W = MatrixSymbol('W', N, N) - I = MatrixSymbol('I', N, N) - y = MatrixSymbol('y', N, 1) - c = MatrixSymbol('c', N, 1) - a, b = parameters('a, b') - z, x = variables('z, x') + fit = Fit(model, y=data) + flat_result = fit.execute() - model_dict = { - W: Inverse(I + M / a ** 2), - c: - W * y, - z: sqrt(c.T * c) - } - # TODO: This should be a Model in the future, but sympy is not yet - # capable of computing Matrix derivatives at the time of writing. - model = CallableModel(model_dict) - - self.assertEqual(model.params, [a]) - self.assertEqual(model.independent_vars, [I, M, y]) - self.assertEqual(model.dependent_vars, [z]) - self.assertEqual(model.interdependent_vars, [W, c]) - self.assertEqual(model.connectivity_mapping, - {W: {I, M, a}, c: {W, y}, z: {c}}) - # Generate data - iden = np.eye(2) - M_mat = np.array([[2, 1], [3, 4]]) - y_vec = np.array([3, 5]) - - eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) - W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) - c_manual = - W_manual.dot(y_vec) - z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) - np.testing.assert_allclose(eval_model.W, W_manual) - np.testing.assert_allclose(eval_model.c, c_manual) - np.testing.assert_allclose(eval_model.z, z_manual) - - # Now try to retrieve the value of `a` from a fit - a.value = 0.2 - fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) - fit_result = fit.execute() - eval_model = model(I=iden, M=M_mat, y=y_vec, **fit_result.params) - self.assertAlmostEqual(0.1, np.abs(fit_result.value(a))) - np.testing.assert_allclose(eval_model.W, W_manual, rtol=1e-5) - np.testing.assert_allclose(eval_model.c, c_manual, rtol=1e-5) - np.testing.assert_allclose(eval_model.z, z_manual, rtol=1e-5) - - # TODO: add constraints to Matrix model. But since Matrix expressions - # can not yet be derived, this needs #154 to be solved first. - - def test_interdependency_invalid(self): - """ - Create an invalid model with interdependency. - """ - a, b, c = parameters('a, b, c') - x, y, z = variables('x, y, z') - - with self.assertRaises(ModelError): - # Invalid, parameters can not be keys - model_dict = { - c: a ** 3 * x + b ** 2, - z: c ** 2 + a * b - } - model = Model(model_dict) - with self.assertRaises(ModelError): - # Invalid, parameters can not be keys - model_dict = {c: a ** 3 * x + b ** 2} - model = Model(model_dict) - - - def test_interdependency(self): - a, b = parameters('a, b') - x, y, z = variables('x, y, z') - model_dict = { - y: a**3 * x + b**2, - z: y**2 + a * b - } - callable_model = CallableModel(model_dict) - self.assertEqual(callable_model.independent_vars, [x]) - self.assertEqual(callable_model.interdependent_vars, [y]) - self.assertEqual(callable_model.dependent_vars, [z]) - self.assertEqual(callable_model.params, [a, b]) - self.assertEqual(callable_model.connectivity_mapping, - {y: {a, b, x}, z: {a, b, y}}) - np.testing.assert_almost_equal(callable_model(x=3, a=1, b=2), - np.atleast_2d([7, 51]).T) - for var, func in callable_model.vars_as_functions.items(): - self.assertEqual( - set(str(x) for x in callable_model.connectivity_mapping[var]), - set(str(x.__class__) if isinstance(x, Function) else str(x) - for x in func.args) - ) - - jac_model = jacobian_from_model(callable_model) - self.assertEqual(jac_model.params, [a, b]) - self.assertEqual(jac_model.dependent_vars, [D(z, a), D(z, b), z]) - self.assertEqual(jac_model.interdependent_vars, [D(y, a), D(y, b), y]) - self.assertEqual(jac_model.independent_vars, [x]) - for p1, p2 in zip_longest(jac_model.__signature__.parameters, [x, a, b]): - self.assertEqual(str(p1), str(p2)) - # The connectivity of jac_model should be that from it's own components - # plus that of the model. The latter is needed to properly compute the - # Hessian. - self.assertEqual( - jac_model.connectivity_mapping, - {D(y, a): {a, x}, - D(y, b): {b}, - D(z, a): {b, y, D(y, a)}, - D(z, b): {a, y, D(y, b)}, - y: {a, b, x}, z: {a, b, y} - } - ) - self.assertEqual( - jac_model.model_dict, - {D(y, a): 3 * a**2 * x, - D(y, b): 2 * b, - D(z, a): b + 2 * y * D(y, a), - D(z, b): a + 2 * y * D(y, b), - y: callable_model[y], z: callable_model[z] - } - ) - for var, func in jac_model.vars_as_functions.items(): - self.assertEqual( - set(x.name for x in jac_model.connectivity_mapping[var]), - set(str(x.__class__) if isinstance(x, Function) else str(x) - for x in func.args) - ) - hess_model = hessian_from_model(callable_model) - # Result according to Mathematica - hess_as_dict = { - D(y, (a, 2)): 6 * a * x, - D(y, a, b): 0, - D(y, b, a): 0, - D(y, (b, 2)): 2, - D(z, (a, 2)): 2 * D(y, a)**2 + 2 * y * D(y, (a, 2)), - D(z, a, b): 1 + 2 * D(y, b) * D(y, a) + 2 * y * D(y, a, b), - D(z, b, a): 1 + 2 * D(y, b) * D(y, a) + 2 * y * D(y, a, b), - D(z, (b, 2)): 2 * D(y, b)**2 + 2 * y * D(y, (b, 2)), - D(y, a): 3 * a ** 2 * x, - D(y, b): 2 * b, - D(z, a): b + 2 * y * D(y, a), - D(z, b): a + 2 * y * D(y, b), - y: callable_model[y], z: callable_model[z] - } - self.assertEqual(len(hess_model), len(hess_as_dict)) - for key, expr in hess_model.items(): - self.assertEqual(expr, hess_as_dict[key]) + assert fit_result.value(a) == pytest.approx(flat_result.value(a)) + assert fit_result.value(b) == pytest.approx(flat_result.value(b)) + + assert fit_result.stdev(a) is None and flat_result.stdev(a) is None + assert fit_result.stdev(b) is None and flat_result.stdev(b) is None + + assert fit_result.r_squared == pytest.approx(flat_result.r_squared) - self.assertEqual(hess_model.params, [a, b]) - self.assertEqual( - hess_model.dependent_vars, - [D(z, (a, 2)), D(z, a, b), D(z, (b, 2)), D(z, b, a), - D(z, a), D(z, b), z] - ) - self.assertEqual(hess_model.interdependent_vars, - [D(y, (a, 2)), D(y, a), D(y, b), y]) - self.assertEqual(hess_model.independent_vars, [x]) +def test_pickle(): + """ + Make sure models can be pickled are preserved when pickling + """ + a, b = parameters('a, b') + x, y = variables('x, y') + exact_model = Model({y: a * x ** b}) + constraint = Model.as_constraint(Eq(a, b), exact_model) + num_model = CallableNumericalModel( + {y: a * x ** b}, independent_vars=[x], params=[a, b] + ) + connected_num_model = CallableNumericalModel( + {y: a * x ** b}, connectivity_mapping={y: {x, a, b}} + ) + # Test if lsoda args and kwargs are pickled too + ode_model = ODEModel({D(y, x): a * x + b}, {x: 0.0}, 3, 4, some_kwarg=True) + + models = [exact_model, constraint, num_model, ode_model, connected_num_model] + for model in models: + new_model = pickle.loads(pickle.dumps(model)) + # Compare signatures + assert model.__signature__ == new_model.__signature__ + # Trigger the cached vars because we compare `__dict__` s + model.vars + new_model.vars + # Explicitly make sure the connectivity mapping is identical. + assert model.connectivity_mapping == new_model.connectivity_mapping + if not isinstance(model, ODEModel): + model.function_dict + model.vars_as_functions + new_model.function_dict + new_model.vars_as_functions + assert model.__dict__ == new_model.__dict__ + + +def test_MatrixSymbolModel(): + """ + Test a model which is defined by ModelSymbols, see #194 + """ + N = Symbol('N', integer=True) + M = MatrixSymbol('M', N, N) + W = MatrixSymbol('W', N, N) + I = MatrixSymbol('I', N, N) + y = MatrixSymbol('y', N, 1) + c = MatrixSymbol('c', N, 1) + a, b = parameters('a, b') + z, x = variables('z, x') + + model_dict = { + W: Inverse(I + M / a ** 2), + c: - W * y, + z: sqrt(c.T * c) + } + # TODO: This should be a Model in the future, but sympy is not yet + # capable of computing Matrix derivatives at the time of writing. + model = CallableModel(model_dict) + + assert model.params == [a] + assert model.independent_vars == [I, M, y] + assert model.dependent_vars == [z] + assert model.interdependent_vars == [W, c] + assert model.connectivity_mapping == {W: {I, M, a}, c: {W, y}, z: {c}} + # Generate data + iden = np.eye(2) + M_mat = np.array([[2, 1], [3, 4]]) + y_vec = np.array([3, 5]) + + eval_model = model(I=iden, M=M_mat, y=y_vec, a=0.1) + W_manual = np.linalg.inv(iden + M_mat / 0.1 ** 2) + c_manual = - W_manual.dot(y_vec) + z_manual = np.atleast_1d(np.sqrt(c_manual.T.dot(c_manual))) + assert eval_model.W == pytest.approx(W_manual) + assert eval_model.c == pytest.approx(c_manual) + assert eval_model.z == pytest.approx(z_manual) + + # Now try to retrieve the value of `a` from a fit + a.value = 0.2 + fit = Fit(model, z=z_manual, I=iden, M=M_mat, y=y_vec) + fit_result = fit.execute() + eval_model = model(I=iden, M=M_mat, y=y_vec, **fit_result.params) + assert 0.1 == pytest.approx(np.abs(fit_result.value(a))) + assert eval_model.W == pytest.approx(W_manual) + assert eval_model.c == pytest.approx(c_manual) + assert eval_model.z == pytest.approx(z_manual) + + # TODO: add constraints to Matrix model. But since Matrix expressions + # can not yet be derived, this needs #154 to be solved first. + + +def test_interdependency_invalid(): + """ + Create an invalid model with interdependency. + """ + a, b, c = parameters('a, b, c') + x, y, z = variables('x, y, z') + + with pytest.raises(ModelError): + # Invalid, parameters can not be keys + model_dict = { + c: a ** 3 * x + b ** 2, + z: c ** 2 + a * b + } + model = Model(model_dict) + with pytest.raises(ModelError): + # Invalid, parameters can not be keys + model_dict = {c: a ** 3 * x + b ** 2} model = Model(model_dict) - np.testing.assert_almost_equal(model(x=3, a=1, b=2), - np.atleast_2d([7, 51]).T) - np.testing.assert_almost_equal(model.eval_jacobian(x=3, a=1, b=2), - np.array([[[9], [4]], [[128], [57]]])) - np.testing.assert_almost_equal( - model.eval_hessian(x=3, a=1, b=2), - np.array([[[[18], [0]], [[0], [2]]], - [[[414], [73]], [[73], [60]]]])) - - self.assertEqual(model.__signature__, model.jacobian_model.__signature__) - self.assertEqual(model.__signature__, model.hessian_model.__signature__) - - def test_ModelOutput(self): - """ - Test the ModelOutput object. To prevent #267 from recurring, - we attempt to make a model with more than 255 variables. - """ - params = parameters(','.join('a{}'.format(i) for i in range(300))) - data = np.ones(300) - output = ModelOutput(params, data) - self.assertEqual(len(output), 300) - self.assertIsInstance(output._asdict(), OrderedDict) - self.assertIsNot(output._asdict(), output.output_dict) - - -if __name__ == '__main__': - unittest.main() + +def test_interdependency(): + a, b = parameters('a, b') + x, y, z = variables('x, y, z') + model_dict = { + y: a**3 * x + b**2, + z: y**2 + a * b + } + callable_model = CallableModel(model_dict) + assert callable_model.independent_vars == [x] + assert callable_model.interdependent_vars == [y] + assert callable_model.dependent_vars == [z] + assert callable_model.params == [a, b] + assert callable_model.connectivity_mapping == {y: {a, b, x}, z: {a, b, y}} + assert callable_model(x=3, a=1, b=2) == pytest.approx(np.atleast_2d([7, 51]).T) + for var, func in callable_model.vars_as_functions.items(): + # TODO comment on what this does + str_con_map = set(x.name for x in callable_model.connectivity_mapping[var]) + str_args = set(str(x.__class__) if isinstance(x, Function) else x.name + for x in func.args) + assert str_con_map == str_args + + jac_model = jacobian_from_model(callable_model) + assert jac_model.params == [a, b] + assert jac_model.dependent_vars == [D(z, a), D(z, b), z] + assert jac_model.interdependent_vars == [D(y, a), D(y, b), y] + assert jac_model.independent_vars == [x] + for p1, p2 in zip_longest(jac_model.__signature__.parameters, [x, a, b]): + assert str(p1) == str(p2) + # The connectivity of jac_model should be that from it's own components + # plus that of the model. The latter is needed to properly compute the + # Hessian. + jac_con_map = {D(y, a): {a, x}, + D(y, b): {b}, + D(z, a): {b, y, D(y, a)}, + D(z, b): {a, y, D(y, b)}, + y: {a, b, x}, z: {a, b, y}} + assert jac_model.connectivity_mapping == jac_con_map + jac_model_dict = {D(y, a): 3 * a**2 * x, + D(y, b): 2 * b, + D(z, a): b + 2 * y * D(y, a), + D(z, b): a + 2 * y * D(y, b), + y: callable_model[y], z: callable_model[z]} + assert jac_model.model_dict == jac_model_dict + for var, func in jac_model.vars_as_functions.items(): + str_con_map = set(x.name for x in jac_model.connectivity_mapping[var]) + str_args = set(str(x.__class__) if isinstance(x, Function) else x.name + for x in func.args) + assert str_con_map == str_args + + hess_model = hessian_from_model(callable_model) + # Result according to Mathematica + hess_as_dict = { + D(y, (a, 2)): 6 * a * x, + D(y, a, b): 0, + D(y, b, a): 0, + D(y, (b, 2)): 2, + D(z, (a, 2)): 2 * D(y, a)**2 + 2 * y * D(y, (a, 2)), + D(z, a, b): 1 + 2 * D(y, b) * D(y, a) + 2 * y * D(y, a, b), + D(z, b, a): 1 + 2 * D(y, b) * D(y, a) + 2 * y * D(y, a, b), + D(z, (b, 2)): 2 * D(y, b)**2 + 2 * y * D(y, (b, 2)), + D(y, a): 3 * a ** 2 * x, + D(y, b): 2 * b, + D(z, a): b + 2 * y * D(y, a), + D(z, b): a + 2 * y * D(y, b), + y: callable_model[y], z: callable_model[z] + } + assert dict(hess_model) == hess_as_dict + + assert hess_model.params == [a, b] + assert hess_model.dependent_vars == [D(z, (a, 2)), D(z, a, b), D(z, (b, 2)), D(z, b, a), D(z, a), D(z, b), z] + assert hess_model.interdependent_vars == [D(y, (a, 2)), D(y, a), D(y, b), y] + assert hess_model.independent_vars == [x] + + model = Model(model_dict) + assert model(x=3, a=1, b=2) == pytest.approx(np.atleast_2d([7, 51]).T) + assert model.eval_jacobian(x=3, a=1, b=2) == pytest.approx(np.array([[[9], [4]], [[128], [57]]])) + assert model.eval_hessian(x=3, a=1, b=2) == pytest.approx(np.array([[[[18], [0]], [[0], [2]]],[[[414], [73]], [[73], [60]]]])) + + assert model.__signature__ == model.jacobian_model.__signature__ + assert model.__signature__ == model.hessian_model.__signature__ + + +def test_ModelOutput(): + """ + Test the ModelOutput object. To prevent #267 from recurring, + we attempt to make a model with more than 255 variables. + """ + params = parameters(','.join('a{}'.format(i) for i in range(300))) + data = np.ones(300) + output = ModelOutput(params, data) + assert len(output) == 300 + assert isinstance(output._asdict(), OrderedDict) + assert output._asdict() is not output.output_dict + assert output._asdict() == output.output_dict diff --git a/tests/test_objectives.py b/tests/test_objectives.py index d56a4430..1beba913 100644 --- a/tests/test_objectives.py +++ b/tests/test_objectives.py @@ -1,20 +1,18 @@ from __future__ import division, print_function -import unittest -import warnings +import pytest import pickle import numpy as np from symfit import ( - Variable, Parameter, Eq, Ge, Le, Lt, Gt, Ne, parameters, ModelError, Fit, - Model, FitResults, variables, CallableNumericalModel, Idx, - IndexedBase, symbols, Sum, log, exp, cos, pi, besseli + Variable, Parameter, parameters, Fit, + Model, FitResults, variables, Idx, + symbols, Sum, log, exp, cos, pi, besseli ) from symfit.core.objectives import ( VectorLeastSquares, LeastSquares, LogLikelihood, MinimizeModel, BaseIndependentObjective ) -from symfit.core.fit_results import FitResults from symfit.distributions import Exp # Overwrite the way Sum is printed by numpy just while testing. Is not @@ -23,268 +21,244 @@ # the dimensions, and instead just flattens everything to a scalar. Only used # in this test to build the analytical equivalents of our LeastSquares # and LogLikelihood + + class FlattenSum(Sum): """ Just a sum which is printed differently: by flattening the whole array and summing it. Used in tests only. """ + def _numpycode(self, printer): - return "%s(%s)" % (printer._module_format('numpy.sum'), - printer.doprint(self.function)) - - -class TestObjectives(unittest.TestCase): - @classmethod - def setUpClass(cls): - np.random.seed(0) - - def test_pickle(self): - """ - Test the picklability of the built-in objectives. - """ - # Create test data - xdata = np.linspace(0, 100, 100) # From 0 to 100 in 100 steps - a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) - b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) - ydata = a_vec * xdata + b_vec # Point scattered around the line 15 * x + 100 - - # Normal symbolic fit - a = Parameter('a', value=0, min=0.0, max=1000) - b = Parameter('b', value=0, min=0.0, max=1000) - x, y = variables('x, y') - model = Model({y: a * x + b}) - - for objective in [VectorLeastSquares, LeastSquares, LogLikelihood, MinimizeModel]: - if issubclass(objective, BaseIndependentObjective): - data = {x: xdata} - else: - data = {x: xdata, y: ydata, - model.sigmas[y]: np.ones_like(ydata)} - obj = objective(model, data=data) - new_obj = pickle.loads(pickle.dumps(obj)) - self.assertTrue(FitResults._array_safe_dict_eq(obj.__dict__, - new_obj.__dict__)) - - def test_LeastSquares(self): - """ - Tests if the LeastSquares objective gives the right shapes of output by - comparing with its analytical equivalent. - """ - i = Idx('i', 100) - x, y = symbols('x, y', cls=Variable) - X2 = symbols('X2', cls=Variable) - a, b = parameters('a, b') - - model = Model({y: a * x**2 + b * x}) - xdata = np.linspace(0, 10, 100) - ydata = model(x=xdata, a=5, b=2).y + np.random.normal(0, 5, xdata.shape) - - # Construct a LeastSquares objective and its analytical equivalent - chi2_numerical = LeastSquares(model, data={ - x: xdata, y: ydata, model.sigmas[y]: np.ones_like(xdata) - }) - chi2_exact = Model( - {X2: FlattenSum(0.5 * ((a * x ** 2 + b * x) - y) ** 2, i)}) - - eval_exact = chi2_exact(x=xdata, y=ydata, a=2, b=3) - jac_exact = chi2_exact.eval_jacobian(x=xdata, y=ydata, a=2, b=3) - hess_exact = chi2_exact.eval_hessian(x=xdata, y=ydata, a=2, b=3) - eval_numerical = chi2_numerical(x=xdata, a=2, b=3) - jac_numerical = chi2_numerical.eval_jacobian(x=xdata, a=2, b=3) - hess_numerical = chi2_numerical.eval_hessian(x=xdata, a=2, b=3) - - # Test model jacobian and hessian shape - self.assertEqual(model(x=xdata, a=2, b=3)[0].shape, ydata.shape) - self.assertEqual(model.eval_jacobian(x=xdata, a=2, b=3)[0].shape, - (2, 100)) - self.assertEqual(model.eval_hessian(x=xdata, a=2, b=3)[0].shape, - (2, 2, 100)) - # Test exact chi2 shape - self.assertEqual(eval_exact[0].shape, (1,)) - self.assertEqual(jac_exact[0].shape, (2, 1)) - self.assertEqual(hess_exact[0].shape, (2, 2, 1)) - - # Test if these two models have the same call, jacobian, and hessian - self.assertAlmostEqual(eval_exact[0], eval_numerical) - self.assertIsInstance(eval_numerical, float) - self.assertIsInstance(eval_exact[0][0], float) - np.testing.assert_almost_equal(np.squeeze(jac_exact[0], axis=-1), - jac_numerical) - self.assertIsInstance(jac_numerical, np.ndarray) - np.testing.assert_almost_equal(np.squeeze(hess_exact[0], axis=-1), - hess_numerical) - self.assertIsInstance(hess_numerical, np.ndarray) - - fit = Fit(chi2_exact, x=xdata, y=ydata, objective=MinimizeModel) - fit_exact_result = fit.execute() - fit = Fit(model, x=xdata, y=ydata, absolute_sigma=True) - fit_num_result = fit.execute() - self.assertEqual(fit_exact_result.value(a), fit_num_result.value(a)) - self.assertEqual(fit_exact_result.value(b), fit_num_result.value(b)) - self.assertAlmostEqual(fit_exact_result.stdev(a), - fit_num_result.stdev(a)) - self.assertAlmostEqual(fit_exact_result.stdev(b), - fit_num_result.stdev(b)) - - - def test_LogLikelihood(self): - """ - Tests if the LeastSquares objective gives the right shapes of output by - comparing with its analytical equivalent. - """ - # TODO: update these tests to use indexed variables in the future - a, b = parameters('a, b') - i = Idx('i', 100) - x, y = variables('x, y') - pdf = Exp(x, 1 / a) * Exp(x, b) - - np.random.seed(10) - xdata = np.random.exponential(3.5, 100) - - # We use minus loglikelihood for the model, because the objective was - # designed to find the maximum when used with a *minimizer*, so it has - # opposite sign. Also test MinimizeModel at the same time. - logL_model = Model({y: pdf}) - logL_exact = Model({y: - FlattenSum(log(pdf), i)}) - logL_numerical = LogLikelihood(logL_model, {x: xdata, y: None}) - logL_minmodel = MinimizeModel(logL_exact, data={x: xdata, y: None}) - - # Test model jacobian and hessian shape - eval_exact = logL_exact(x=xdata, a=2, b=3) - jac_exact = logL_exact.eval_jacobian(x=xdata, a=2, b=3) - hess_exact = logL_exact.eval_hessian(x=xdata, a=2, b=3) - eval_minimizemodel = logL_minmodel(a=2, b=3) - jac_minimizemodel = logL_minmodel.eval_jacobian(a=2, b=3) - hess_minimizemodel = logL_minmodel.eval_hessian(a=2, b=3) - eval_numerical = logL_numerical(a=2, b=3) - jac_numerical = logL_numerical.eval_jacobian(a=2, b=3) - hess_numerical = logL_numerical.eval_hessian(a=2, b=3) - - # TODO: These shapes should not have the ones! This is due to the current - # convention that scalars should be returned as a 1d array by Model's. - self.assertEqual(eval_exact[0].shape, (1,)) - self.assertEqual(jac_exact[0].shape, (2, 1)) - self.assertEqual(hess_exact[0].shape, (2, 2, 1)) - # Test if identical to MinimizeModel - np.testing.assert_almost_equal(eval_exact[0], eval_minimizemodel) - np.testing.assert_almost_equal(jac_exact[0], jac_minimizemodel) - np.testing.assert_almost_equal(hess_exact[0], hess_minimizemodel) - - # Test if these two models have the same call, jacobian, and hessian. - # Since models always have components as their first dimension, we have - # to slice that away. - self.assertAlmostEqual(eval_exact.y, eval_numerical) - self.assertIsInstance(eval_numerical, float) - self.assertIsInstance(eval_exact.y[0], float) - np.testing.assert_almost_equal(np.squeeze(jac_exact[0], axis=-1), - jac_numerical) - self.assertIsInstance(jac_numerical, np.ndarray) - np.testing.assert_almost_equal(np.squeeze(hess_exact[0], axis=-1), - hess_numerical) - self.assertIsInstance(hess_numerical, np.ndarray) - - fit = Fit(logL_exact, x=xdata, objective=MinimizeModel) - fit_exact_result = fit.execute() - fit = Fit(logL_model, x=xdata, objective=LogLikelihood) - fit_num_result = fit.execute() - self.assertAlmostEqual(fit_exact_result.value(a), fit_num_result.value(a)) - self.assertAlmostEqual(fit_exact_result.value(b), fit_num_result.value(b)) - self.assertAlmostEqual(fit_exact_result.stdev(a), fit_num_result.stdev(a)) - self.assertAlmostEqual(fit_exact_result.stdev(b), fit_num_result.stdev(b)) - - def test_data_sanity(self): - """ - Tests very basicly the data sanity for different objective types. - :return: - """ - # Create test data - xdata = np.linspace(0, 100, 25) # From 0 to 100 in 100 steps - a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) - b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) - ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 - - # Normal symbolic fit - a = Parameter('a', value=0, min=0.0, max=1000) - b = Parameter('b', value=0, min=0.0, max=1000) - x, y, z = variables('x, y, z') - model = Model({y: a * x + b}) - - for objective in [VectorLeastSquares, LeastSquares, LogLikelihood, - MinimizeModel]: - if issubclass(objective, BaseIndependentObjective): - incomplete_data = {} - data = {x: xdata} - overcomplete_data = {x: xdata, z: ydata} - else: - incomplete_data = {x: xdata, y: ydata} - data = {x: xdata, y: ydata, - model.sigmas[y]: np.ones_like(ydata)} - overcomplete_data = {x: xdata, y: ydata, z: ydata, - model.sigmas[y]: np.ones_like(ydata)} - with self.assertRaises(KeyError): - obj = objective(model, data=incomplete_data) - obj = objective(model, data=data) - # Overcomplete data has to be allowed, since constraints share their - # data with models. - obj = objective(model, data=overcomplete_data) - - def test_LogLikelihood_global(self): - """ - This is a test for global likelihood fitting to multiple data sets. - Based on SO question 56006357. - """ - # creating the data - mu1, mu2 = .05, -.05 - sigma1, sigma2 = 3.5, 2.5 - n1, n2 = 80, 90 - np.random.seed(42) - x1 = np.random.vonmises(mu1, sigma1, n1) - x2 = np.random.vonmises(mu2, sigma2, n2) - - n = 2 # number of components - xs = variables( - 'x,' + ','.join('x_{}'.format(i) for i in range(1, n + 1))) - x, xs = xs[0], xs[1:] - ys = variables(','.join('y_{}'.format(i) for i in range(1, n + 1))) - mu, kappa = parameters('mu, kappa') - kappas = parameters(','.join('k_{}'.format(i) for i in range(1, n + 1)), - min=0, max=10) - mu.min, mu.max = - np.pi, np.pi - - template = exp(kappa * cos(x - mu)) / (2 * pi * besseli(0, kappa)) - - model = Model( - {y_i: template.subs({kappa: k_i, x: x_i}) for y_i, x_i, k_i in - zip(ys, xs, kappas)} - ) - - all_data = {xs[0]: x1, xs[1]: x2, ys[0]: None, ys[1]: None} - all_params = {'mu': 1} - all_params.update({k_i.name: 1 for k_i in kappas}) - - # Evaluate the loglikelihood and its jacobian and hessian - logL = LogLikelihood(model, data=all_data) - eval_numerical = logL(**all_params) - jac_numerical = logL.eval_jacobian(**all_params) - hess_numerical = logL.eval_hessian(**all_params) - - # Test the types and shapes of the components. - self.assertIsInstance(eval_numerical, float) - self.assertIsInstance(jac_numerical, np.ndarray) - self.assertIsInstance(hess_numerical, np.ndarray) - - self.assertEqual(eval_numerical.shape, tuple()) # Empty tuple -> scalar - self.assertEqual(jac_numerical.shape, (3,)) - self.assertEqual(hess_numerical.shape, (3, 3,)) - - -if __name__ == '__main__': - try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() + return "%s(%s)" % (printer._module_format('numpy.sum'), printer.doprint(self.function)) + + +def setup_method(): + np.random.seed(0) + + +def test_pickle(): + """ + Test the picklability of the built-in objectives. + """ + # Create test data + xdata = np.linspace(0, 100, 100) # From 0 to 100 in 100 steps + a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) + b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) + ydata = a_vec * xdata + b_vec # Point scattered around the line 15 * x + 100 + + # Normal symbolic fit + a = Parameter('a', value=0, min=0.0, max=1000) + b = Parameter('b', value=0, min=0.0, max=1000) + x, y = variables('x, y') + model = Model({y: a * x + b}) + + for objective in [VectorLeastSquares, LeastSquares, LogLikelihood, MinimizeModel]: + if issubclass(objective, BaseIndependentObjective): + data = {x: xdata} + else: + data = {x: xdata, y: ydata, model.sigmas[y]: np.ones_like(ydata)} + obj = objective(model, data=data) + new_obj = pickle.loads(pickle.dumps(obj)) + assert FitResults._array_safe_dict_eq(obj.__dict__, new_obj.__dict__) + + +def test_LeastSquares(): + """ + Tests if the LeastSquares objective gives the right shapes of output by + comparing with its analytical equivalent. + """ + i = Idx('i', 100) + x, y = symbols('x, y', cls=Variable) + X2 = symbols('X2', cls=Variable) + a, b = parameters('a, b') + + model = Model({y: a * x**2 + b * x}) + xdata = np.linspace(0, 10, 100) + ydata = model(x=xdata, a=5, b=2).y + np.random.normal(0, 5, xdata.shape) + + # Construct a LeastSquares objective and its analytical equivalent + chi2_numerical = LeastSquares(model, data={ + x: xdata, y: ydata, model.sigmas[y]: np.ones_like(xdata) + }) + chi2_exact = Model({X2: FlattenSum(0.5 * ((a * x ** 2 + b * x) - y) ** 2, i)}) + + eval_exact = chi2_exact(x=xdata, y=ydata, a=2, b=3) + jac_exact = chi2_exact.eval_jacobian(x=xdata, y=ydata, a=2, b=3) + hess_exact = chi2_exact.eval_hessian(x=xdata, y=ydata, a=2, b=3) + eval_numerical = chi2_numerical(x=xdata, a=2, b=3) + jac_numerical = chi2_numerical.eval_jacobian(x=xdata, a=2, b=3) + hess_numerical = chi2_numerical.eval_hessian(x=xdata, a=2, b=3) + + # Test model jacobian and hessian shape + assert model(x=xdata, a=2, b=3)[0].shape == ydata.shape + assert model.eval_jacobian(x=xdata, a=2, b=3)[0].shape == (2, 100) + assert model.eval_hessian(x=xdata, a=2, b=3)[0].shape == (2, 2, 100) + # Test exact chi2 shape + assert eval_exact[0].shape, (1,) + assert jac_exact[0].shape, (2, 1) + assert hess_exact[0].shape, (2, 2, 1) + + # Test if these two models have the same call, jacobian, and hessian + assert eval_exact[0] == pytest.approx(eval_numerical) + assert isinstance(eval_numerical, float) + assert isinstance(eval_exact[0][0], float) + assert np.squeeze(jac_exact[0], axis=-1) == pytest.approx(jac_numerical) + assert isinstance(jac_numerical, np.ndarray) + assert np.squeeze(hess_exact[0], axis=-1) == pytest.approx(hess_numerical) + assert isinstance(hess_numerical, np.ndarray) + + fit = Fit(chi2_exact, x=xdata, y=ydata, objective=MinimizeModel) + fit_exact_result = fit.execute() + fit = Fit(model, x=xdata, y=ydata, absolute_sigma=True) + fit_num_result = fit.execute() + assert fit_exact_result.value(a) == fit_num_result.value(a) + assert fit_exact_result.value(b) == fit_num_result.value(b) + assert fit_exact_result.stdev(a) == pytest.approx(fit_num_result.stdev(a)) + assert fit_exact_result.stdev(b) == pytest.approx(fit_num_result.stdev(b)) + + +def test_LogLikelihood(): + """ + Tests if the LeastSquares objective gives the right shapes of output by + comparing with its analytical equivalent. + """ + # TODO: update these tests to use indexed variables in the future + a, b = parameters('a, b') + i = Idx('i', 100) + x, y = variables('x, y') + pdf = Exp(x, 1 / a) * Exp(x, b) + + np.random.seed(10) + xdata = np.random.exponential(3.5, 100) + + # We use minus loglikelihood for the model, because the objective was + # designed to find the maximum when used with a *minimizer*, so it has + # opposite sign. Also test MinimizeModel at the same time. + logL_model = Model({y: pdf}) + logL_exact = Model({y: - FlattenSum(log(pdf), i)}) + logL_numerical = LogLikelihood(logL_model, {x: xdata, y: None}) + logL_minmodel = MinimizeModel(logL_exact, data={x: xdata, y: None}) + + # Test model jacobian and hessian shape + eval_exact = logL_exact(x=xdata, a=2, b=3) + jac_exact = logL_exact.eval_jacobian(x=xdata, a=2, b=3) + hess_exact = logL_exact.eval_hessian(x=xdata, a=2, b=3) + eval_minimizemodel = logL_minmodel(a=2, b=3) + jac_minimizemodel = logL_minmodel.eval_jacobian(a=2, b=3) + hess_minimizemodel = logL_minmodel.eval_hessian(a=2, b=3) + eval_numerical = logL_numerical(a=2, b=3) + jac_numerical = logL_numerical.eval_jacobian(a=2, b=3) + hess_numerical = logL_numerical.eval_hessian(a=2, b=3) + + # TODO: These shapes should not have the ones! This is due to the current + # convention that scalars should be returned as a 1d array by Model's. + assert eval_exact[0].shape == (1,) + assert jac_exact[0].shape == (2, 1) + assert hess_exact[0].shape == (2, 2, 1) + # Test if identical to MinimizeModel + assert eval_exact[0] == pytest.approx(eval_minimizemodel) + assert jac_exact[0] == pytest.approx(jac_minimizemodel) + assert hess_exact[0] == pytest.approx(hess_minimizemodel) + + # Test if these two models have the same call, jacobian, and hessian. + # Since models always have components as their first dimension, we have + # to slice that away. + assert eval_exact.y == pytest.approx(eval_numerical) + assert isinstance(eval_numerical, float) + assert isinstance(eval_exact.y[0], float) + assert np.squeeze(jac_exact[0], axis=-1) == pytest.approx(jac_numerical) + assert isinstance(jac_numerical, np.ndarray) + assert np.squeeze(hess_exact[0], axis=-1) == pytest.approx(hess_numerical) + assert isinstance(hess_numerical, np.ndarray) + + fit = Fit(logL_exact, x=xdata, objective=MinimizeModel) + fit_exact_result = fit.execute() + fit = Fit(logL_model, x=xdata, objective=LogLikelihood) + fit_num_result = fit.execute() + assert fit_exact_result.value(a) == pytest.approx(fit_num_result.value(a)) + assert fit_exact_result.value(b) == pytest.approx(fit_num_result.value(b)) + assert fit_exact_result.stdev(a) == pytest.approx(fit_num_result.stdev(a)) + assert fit_exact_result.stdev(b) == pytest.approx(fit_num_result.stdev(b)) + + +def test_data_sanity(): + """ + Tests very basicly the data sanity for different objective types. + :return: + """ + # Create test data + xdata = np.linspace(0, 100, 25) # From 0 to 100 in 25 steps + a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape) + b_vec = np.random.normal(100, scale=2.0, size=xdata.shape) + ydata = a_vec * xdata + b_vec # Point scattered around the line 5 * x + 105 + + # Normal symbolic fit + a = Parameter('a', value=0, min=0.0, max=1000) + b = Parameter('b', value=0, min=0.0, max=1000) + x, y, z = variables('x, y, z') + model = Model({y: a * x + b}) + + for objective in [VectorLeastSquares, LeastSquares, LogLikelihood, MinimizeModel]: + if issubclass(objective, BaseIndependentObjective): + incomplete_data = {} + data = {x: xdata} + overcomplete_data = {x: xdata, z: ydata} + else: + incomplete_data = {x: xdata, y: ydata} + data = {x: xdata, y: ydata, model.sigmas[y]: np.ones_like(ydata)} + overcomplete_data = {x: xdata, y: ydata, z: ydata, model.sigmas[y]: np.ones_like(ydata)} + with pytest.raises(KeyError): + obj = objective(model, data=incomplete_data) + + obj = objective(model, data=data) + # Overcomplete data has to be allowed, since constraints share their + # data with models. + obj = objective(model, data=overcomplete_data) + + +def test_LogLikelihood_global(): + """ + This is a test for global likelihood fitting to multiple data sets. + Based on SO question 56006357. + """ + # creating the data + mu1, mu2 = .05, -.05 + sigma1, sigma2 = 3.5, 2.5 + n1, n2 = 80, 90 + np.random.seed(42) + x1 = np.random.vonmises(mu1, sigma1, n1) + x2 = np.random.vonmises(mu2, sigma2, n2) + + n = 2 # number of components + xs = variables('x,' + ','.join('x_{}'.format(i) for i in range(1, n + 1))) + x, xs = xs[0], xs[1:] + ys = variables(','.join('y_{}'.format(i) for i in range(1, n + 1))) + mu, kappa = parameters('mu, kappa') + kappas = parameters(','.join('k_{}'.format(i) for i in range(1, n + 1)), min=0, max=10) + mu.min, mu.max = - np.pi, np.pi + + template = exp(kappa * cos(x - mu)) / (2 * pi * besseli(0, kappa)) + + model = Model( + {y_i: template.subs({kappa: k_i, x: x_i}) for y_i, x_i, k_i in zip(ys, xs, kappas)} + ) + + all_data = {xs[0]: x1, xs[1]: x2, ys[0]: None, ys[1]: None} + all_params = {'mu': 1} + all_params.update({k_i.name: 1 for k_i in kappas}) + + # Evaluate the loglikelihood and its jacobian and hessian + logL = LogLikelihood(model, data=all_data) + eval_numerical = logL(**all_params) + jac_numerical = logL.eval_jacobian(**all_params) + hess_numerical = logL.eval_hessian(**all_params) + + # Test the types and shapes of the components. + assert isinstance(eval_numerical, float) + assert isinstance(jac_numerical, np.ndarray) + assert isinstance(hess_numerical, np.ndarray) + + assert eval_numerical.shape == tuple() # Empty tuple -> scalar + assert jac_numerical.shape == (3,) + assert hess_numerical.shape == (3, 3,) diff --git a/tests/test_ode.py b/tests/test_ode.py index 5b93d9ef..a50afdbf 100644 --- a/tests/test_ode.py +++ b/tests/test_ode.py @@ -1,251 +1,254 @@ from __future__ import division, print_function -import sys -import unittest - import pytest + import numpy as np from symfit import parameters, variables, ODEModel, exp, Fit, D, Model, GradientModel, Parameter from symfit.core.minimizers import MINPACK -from symfit.distributions import Gaussian -class TestODE(unittest.TestCase): +""" +Tests for the FitResults object. +""" + + +def setup_method(): + np.random.seed(6) + + +def test_known_solution(): + p, c1 = parameters('p, c1') + y, t = variables('y, t') + p.value = 3.0 + + model_dict = { + D(y, t): - p * y, + } + + # Lets say we know the exact solution to this problem + sol = Model({y: exp(- p * t)}) + + # Generate some data + tdata = np.linspace(0, 3, 10001) + ydata = sol(t=tdata, p=3.22)[0] + ydata += np.random.normal(0, 0.005, ydata.shape) + + ode_model = ODEModel(model_dict, initial={t: 0.0, y: ydata[0]}) + fit = Fit(ode_model, t=tdata, y=ydata) + ode_result = fit.execute() + + c1.value = ydata[0] + fit = Fit(sol, t=tdata, y=ydata) + fit_result = fit.execute() + + assert ode_result.value(p) == pytest.approx(fit_result.value(p), 1e-2) + assert ode_result.r_squared == pytest.approx(fit_result.r_squared, 1e-4) + assert ode_result.stdev(p) == pytest.approx(fit_result.stdev(p), 1e-2) + + +def test_van_der_pol(): + """ + http://hplgit.github.io/odespy/doc/pub/tutorial/html/main_odespy.html + """ + u_0, u_1, t = variables('u_0, u_1, t') + + model_dict = { + D(u_0, t): u_1, + D(u_1, t): 3 * (1 - u_0**2) * u_1 - u_1 + } + + ode_model = ODEModel(model_dict, initial={t: 0.0, u_0: 2.0, u_1: 1.0}) + + # # Generate some data + # tdata = np.linspace(0, 1, 101) + # plt.plot(tdata, ode_model(tdata)[0], color='red') + # plt.plot(tdata, ode_model(tdata)[1], color='blue') + # plt.show() + + +def test_polgar(): + """ + Analysis of data published here: + This whole ODE support was build to do this analysis in the first place + """ + a, b, c, d, t = variables('a, b, c, d, t') + k, p, l, m = parameters('k, p, l, m') + + a0 = 10 + b = a0 - d + a + model_dict = { + D(d, t): l * c * b - m * d, + D(c, t): k * a * b - p * c - l * c * b + m * d, + D(a, t): - k * a * b + p * c, + } + + ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, c: 0.0, d: 0.0}) + + # Generate some data + tdata = np.linspace(0, 3, 1000) + # Eval + AA, AAB, BAAB = ode_model(t=tdata, k=0.1, l=0.2, m=.3, p=0.3) + + # plt.plot(tdata, AA, color='red', label='[AA]') + # plt.plot(tdata, AAB, color='blue', label='[AAB]') + # plt.plot(tdata, BAAB, color='green', label='[BAAB]') + # plt.plot(tdata, b(d=BAAB, a=AA), color='pink', label='[B]') + # plt.plot(tdata, AA + AAB + BAAB, color='black', label='total') + # plt.legend() + # plt.show() + + +def test_simple_kinetics(): + """ + Simple kinetics data to test fitting + """ + tdata = np.array([10, 26, 44, 70, 120]) + adata = 10e-4 * np.array([44, 34, 27, 20, 14]) + a, b, t = variables('a, b, t') + k, a0 = parameters('k, a0') + k.value = 0.01 + # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4 + a0 = 54 * 10e-4 + + model_dict = { + D(a, t): - k * a**2, + D(b, t): k * a**2, + } + + ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0}) + + # Analytical solution + model = GradientModel({a: 1 / (k * t + 1 / a0)}) + fit = Fit(model, t=tdata, a=adata) + fit_result = fit.execute() + + fit = Fit(ode_model, t=tdata, a=adata, b=None, minimizer=MINPACK) + ode_result = fit.execute() + assert ode_result.value(k) == pytest.approx(fit_result.value(k), 1e-4) + assert ode_result.stdev(k) == pytest.approx(fit_result.stdev(k), 1e-4) + assert ode_result.r_squared == pytest.approx(fit_result.r_squared, 1e-4) + + fit = Fit(ode_model, t=tdata, a=adata, b=None) + ode_result = fit.execute() + assert ode_result.value(k) == pytest.approx(fit_result.value(k), 1e-4) + assert ode_result.stdev(k) == pytest.approx(fit_result.stdev(k), 1e-4) + assert ode_result.r_squared == pytest.approx(fit_result.r_squared, 1e-4) + + +def test_single_eval(): + """ + Eval an ODEModel at a single value rather than a vector. + """ + x, y, t = variables('x, y, t') + k, = parameters('k') # C is the integration constant. + + # The harmonic oscillator as a system, >1st order is not supported yet. + harmonic_dict = { + D(x, t): - k * y, + D(y, t): k * x, + } + + # Make a second model to prevent caching of integration results. + # This also means harmonic_dict should NOT be a Model object. + harmonic_model_array = ODEModel( + harmonic_dict, initial={t: 0.0, x: 1.0, y: 0.0}) + harmonic_model_points = ODEModel( + harmonic_dict, initial={t: 0.0, x: 1.0, y: 0.0}) + tdata = np.linspace(-100, 100, 101) + X, Y = harmonic_model_array(t=tdata, k=0.1) + # Shuffle the data to prevent using the result at time t to calculate + # t+dt + random_order = np.random.permutation(len(tdata)) + for idx in random_order: + t = tdata[idx] + X_val = X[idx] + Y_val = Y[idx] + X_point, Y_point = harmonic_model_points(t=t, k=0.1) + assert X_point[0] == pytest.approx(X_val) + assert Y_point[0] == pytest.approx(Y_val) + + +def test_full_eval_range(): + """ + Test if ODEModels can be evaluated at t < t_initial. + + A bit of a no news is good news test. + """ + tdata = np.array([0, 10, 26, 44, 70, 120]) + adata = 10e-4 * np.array([54, 44, 34, 27, 20, 14]) + a, b, t = variables('a, b, t') + k, a0 = parameters('k, a0') + k.value = 0.01 + t0 = tdata[2] + a0 = adata[2] + b0 = 0.02729855 # Obtained from evaluating from t=0. + + model_dict = { + D(a, t): - k * a**2, + D(b, t): k * a**2, + } + + ode_model = ODEModel(model_dict, initial={t: t0, a: a0, b: b0}) + + fit = Fit(ode_model, t=tdata, a=adata, b=None) + ode_result = fit.execute() + assert ode_result.r_squared > 0.95 + + # Now start from a timepoint that is not in the t-array such that it + # triggers another pathway to be taken in integrating it. + # Again, no news is good news. + ode_model = ODEModel(model_dict, initial={t: t0 + 1e-5, a: a0, b: b0}) + + fit = Fit(ode_model, t=tdata, a=adata, b=None) + ode_result = fit.execute() + assert ode_result.r_squared > 0.95 + + +def test_odemodel_sanity(): + """ + If a user provides an ODE like model directly to fit without + explicitly turning it into one, give a warning. + """ + tdata = np.array([0, 10, 26, 44, 70, 120]) + adata = 10e-4 * np.array([54, 44, 34, 27, 20, 14]) + a, t = variables('a, t') + k, a0 = parameters('k, a0') + + model_dict = { + D(a, t): - k * a * t, + } + with pytest.raises(RuntimeWarning): + fit = Fit(model_dict, t=tdata, a=adata) + + +def test_initial_parameters(): """ - Tests for the FitResults object. + Identical to test_polgar, but with a0 as free Parameter. """ - @classmethod - def setUpClass(cls): - np.random.seed(6) - - def test_known_solution(self): - p, c1 = parameters('p, c1') - y, t = variables('y, t') - p.value = 3.0 - - model_dict = { - D(y, t): - p * y, - } - - # Lets say we know the exact solution to this problem - sol = Model({y: exp(- p * t)}) - - # Generate some data - tdata = np.linspace(0, 3, 10001) - ydata = sol(t=tdata, p=3.22)[0] - ydata += np.random.normal(0, 0.005, ydata.shape) - - ode_model = ODEModel(model_dict, initial={t: 0.0, y: ydata[0]}) - fit = Fit(ode_model, t=tdata, y=ydata) - ode_result = fit.execute() - - c1.value = ydata[0] - fit = Fit(sol, t=tdata, y=ydata) - fit_result = fit.execute() - - self.assertAlmostEqual(ode_result.value(p) / fit_result.value(p), 1, 2) - self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) - self.assertAlmostEqual(ode_result.stdev(p) / fit_result.stdev(p), 1, 3) - - def test_van_der_pol(self): - """ - http://hplgit.github.io/odespy/doc/pub/tutorial/html/main_odespy.html - """ - u_0, u_1, t = variables('u_0, u_1, t') - - model_dict = { - D(u_0, t): u_1, - D(u_1, t): 3 * (1 - u_0**2) * u_1 - u_1 - } - - ode_model = ODEModel(model_dict, initial={t: 0.0, u_0: 2.0, u_1: 1.0}) - - # # Generate some data - # tdata = np.linspace(0, 1, 101) - # plt.plot(tdata, ode_model(tdata)[0], color='red') - # plt.plot(tdata, ode_model(tdata)[1], color='blue') - # plt.show() - - def test_polgar(self): - """ - Analysis of data published here: - This whole ODE support was build to do this analysis in the first place - """ - a, b, c, d, t = variables('a, b, c, d, t') - k, p, l, m = parameters('k, p, l, m') - - a0 = 10 - b = a0 - d + a - model_dict = { - D(d, t): l * c * b - m * d, - D(c, t): k * a * b - p * c - l * c * b + m * d, - D(a, t): - k * a * b + p * c, - } - - ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, c: 0.0, d: 0.0}) - - # Generate some data - tdata = np.linspace(0, 3, 1000) - # Eval - AA, AAB, BAAB = ode_model(t=tdata, k=0.1, l=0.2, m=.3, p=0.3) - - # plt.plot(tdata, AA, color='red', label='[AA]') - # plt.plot(tdata, AAB, color='blue', label='[AAB]') - # plt.plot(tdata, BAAB, color='green', label='[BAAB]') - # plt.plot(tdata, b(d=BAAB, a=AA), color='pink', label='[B]') - # plt.plot(tdata, AA + AAB + BAAB, color='black', label='total') - # plt.legend() - # plt.show() - - def test_initial_parameters(self): - """ - Identical to test_polgar, but with a0 as free Parameter. - """ - a, b, c, d, t = variables('a, b, c, d, t') - k, p, l, m = parameters('k, p, l, m') - - a0 = Parameter('a0', min=0, value=10, fixed=True) - c0 = Parameter('c0', min=0, value=0.1) - b = a0 - d + a - model_dict = { - D(d, t): l * c * b - m * d, - D(c, t): k * a * b - p * c - l * c * b + m * d, - D(a, t): - k * a * b + p * c, - } - - ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, c: c0, d: 0.0}) - - # Generate some data - tdata = np.linspace(0, 3, 1000) - # Eval - AA, AAB, BAAB = ode_model(t=tdata, k=0.1, l=0.2, m=.3, p=0.3, a0=10, c0=0) - fit = Fit(ode_model, t=tdata, a=AA, c=AAB, d=BAAB) - results = fit.execute() - print(results) - self.assertEqual(results.value(a0), 10) - self.assertAlmostEqual(results.value(c0), 0) - - self.assertEqual([a0, c0, k, l, m, p], ode_model.params) - self.assertEqual([a0, c0], ode_model.initial_params) - self.assertEqual([a0, k, l, m, p], ode_model.model_params) - - def test_simple_kinetics(self): - """ - Simple kinetics data to test fitting - """ - tdata = np.array([10, 26, 44, 70, 120]) - adata = 10e-4 * np.array([44, 34, 27, 20, 14]) - a, b, t = variables('a, b, t') - k, a0 = parameters('k, a0') - k.value = 0.01 - # a0.value, a0.min, a0.max = 54 * 10e-4, 40e-4, 60e-4 - a0 = 54 * 10e-4 - - model_dict = { - D(a, t): - k * a**2, - D(b, t): k * a**2, - } - - ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, b: 0.0}) - - # Analytical solution - model = GradientModel({a: 1 / (k * t + 1 / a0)}) - fit = Fit(model, t=tdata, a=adata) - fit_result = fit.execute() - - fit = Fit(ode_model, t=tdata, a=adata, b=None, minimizer=MINPACK) - ode_result = fit.execute() - self.assertAlmostEqual(ode_result.value(k) / fit_result.value(k), 1.0, 4) - self.assertAlmostEqual(ode_result.stdev(k) / fit_result.stdev(k), 1.0, 4) - self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) - - fit = Fit(ode_model, t=tdata, a=adata, b=None) - ode_result = fit.execute() - self.assertAlmostEqual(ode_result.value(k) / fit_result.value(k), 1.0, 4) - self.assertAlmostEqual(ode_result.stdev(k) / fit_result.stdev(k), 1.0, 4) - self.assertAlmostEqual(ode_result.r_squared / fit_result.r_squared, 1, 4) - - def test_single_eval(self): - """ - Eval an ODEModel at a single value rather than a vector. - """ - x, y, t = variables('x, y, t') - k, = parameters('k') # C is the integration constant. - - # The harmonic oscillator as a system, >1st order is not supported yet. - harmonic_dict = { - D(x, t): - k * y, - D(y, t): k * x, - } - - # Make a second model to prevent caching of integration results. - # This also means harmonic_dict should NOT be a Model object. - harmonic_model_array = ODEModel(harmonic_dict, initial={t: 0.0, x: 1.0, y: 0.0}) - harmonic_model_points = ODEModel(harmonic_dict, initial={t: 0.0, x: 1.0, y: 0.0}) - tdata = np.linspace(-100, 100, 101) - X, Y = harmonic_model_array(t=tdata, k=0.1) - # Shuffle the data to prevent using the result at time t to calculate - # t+dt - random_order = np.random.permutation(len(tdata)) - for idx in random_order: - t = tdata[idx] - X_val = X[idx] - Y_val = Y[idx] - X_point, Y_point = harmonic_model_points(t=t, k=0.1) - self.assertAlmostEqual(X_point[0], X_val) - self.assertAlmostEqual(Y_point[0], Y_val) - - - def test_full_eval_range(self): - """ - Test if ODEModels can be evaluated at t < t_initial. - - A bit of a no news is good news test. - """ - tdata = np.array([0, 10, 26, 44, 70, 120]) - adata = 10e-4 * np.array([54, 44, 34, 27, 20, 14]) - a, b, t = variables('a, b, t') - k, a0 = parameters('k, a0') - k.value = 0.01 - t0 = tdata[2] - a0 = adata[2] - b0 = 0.02729855 # Obtained from evaluating from t=0. - - model_dict = { - D(a, t): - k * a**2, - D(b, t): k * a**2, - } - - ode_model = ODEModel(model_dict, initial={t: t0, a: a0, b: b0}) - - fit = Fit(ode_model, t=tdata, a=adata, b=None) - ode_result = fit.execute() - self.assertGreater(ode_result.r_squared, 0.95, 4) - - # Now start from a timepoint that is not in the t-array such that it - # triggers another pathway to be taken in integrating it. - # Again, no news is good news. - ode_model = ODEModel(model_dict, initial={t: t0 + 1e-5, a: a0, b: b0}) - - fit = Fit(ode_model, t=tdata, a=adata, b=None) - ode_result = fit.execute() - self.assertGreater(ode_result.r_squared, 0.95, 4) - - def test_odemodel_sanity(self): - """ - If a user provides an ODE like model directly to fit without - explicitly turning it into one, give a warning. - """ - tdata = np.array([0, 10, 26, 44, 70, 120]) - adata = 10e-4 * np.array([54, 44, 34, 27, 20, 14]) - a, t = variables('a, t') - k, a0 = parameters('k, a0') - - model_dict = { - D(a, t): - k * a * t, - } - with self.assertRaises(RuntimeWarning): - fit = Fit(model_dict, t=tdata, a=adata) - -if __name__ == '__main__': - unittest.main() + a, b, c, d, t = variables('a, b, c, d, t') + k, p, l, m = parameters('k, p, l, m') + + a0 = Parameter('a0', min=0, value=10, fixed=True) + c0 = Parameter('c0', min=0, value=0.1) + b = a0 - d + a + model_dict = { + D(d, t): l * c * b - m * d, + D(c, t): k * a * b - p * c - l * c * b + m * d, + D(a, t): - k * a * b + p * c, + } + + ode_model = ODEModel(model_dict, initial={t: 0.0, a: a0, c: c0, d: 0.0}) + + # Generate some data + tdata = np.linspace(0, 3, 1000) + # Eval + AA, AAB, BAAB = ode_model(t=tdata, k=0.1, l=0.2, m=.3, p=0.3, a0=10, c0=0) + fit = Fit(ode_model, t=tdata, a=AA, c=AAB, d=BAAB) + results = fit.execute() + print(results) + assert results.value(a0) == 10 + assert results.value(c0) == pytest.approx(0) + + assert ode_model.params == [a0, c0, k, l, m, p] + assert ode_model.initial_params == [a0, c0] + assert ode_model.model_params == [a0, k, l, m, p] diff --git a/tests/test_support.py b/tests/test_support.py index ea679758..14b39074 100644 --- a/tests/test_support.py +++ b/tests/test_support.py @@ -3,9 +3,8 @@ """ from __future__ import division, print_function -import unittest +import pytest import sys -import warnings from itertools import repeat from symfit.core.support import ( @@ -19,219 +18,214 @@ import funcsigs as inspect_sig -class TestSupport(unittest.TestCase): - def setUp(self): +@keywordonly(c=2, d=RequiredKeyword) +def f(a, b, *args, **kwargs): + c = kwargs.pop('c') + d = kwargs.pop('d') + return a + b + c + d + + +class A(object): + @keywordonly(c=2, d=RequiredKeyword) + def __init__(self, a, b, **kwargs): + pass + + +class B(A): + @keywordonly(e=5) + def __init__(self, *args, **kwargs): + e = kwargs.pop('e') + super(B, self).__init__(*args, **kwargs) + + +def test_keywordonly_signature(): + """ + Test the keywordonly decorators ability to update the signature of the + function it wraps. + """ + kinds = { + 'a': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'b': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'args': inspect_sig.Parameter.VAR_POSITIONAL, + 'kwargs': inspect_sig.Parameter.VAR_KEYWORD, + 'c': inspect_sig.Parameter.KEYWORD_ONLY, + 'd': inspect_sig.Parameter.KEYWORD_ONLY, + } + sig_f = inspect_sig.signature(f) + for param in sig_f.parameters.values(): + assert param.kind == kinds[param.name] + + +def test_keywordonly_call(): + """ + Call our test function with some values to see if it behaves as + expected. + """ + assert f(4, 3, c=5, d=6) == 4 + 3 + 5 + 6 + # In the next case the 5 is left behind since it ends up in *args. + assert f(4, 3, 5, d=6) == 4 + 3 + 2 + 6 + + +def test_keywordonly_norequiredkeyword(): + """ + Try to not provide a RequiredKeyword with a value and get away with it. + (we shouldn't get away with it if all is well.) + """ + with pytest.raises(RequiredKeywordError): + f(4, 3, 5, 6) + + +def test_keywordonly_nokwagrs(): + """ + Decorating a function with no **kwargs-like argument should not be + allowed. + """ + with pytest.raises(RequiredKeywordError): @keywordonly(c=2, d=RequiredKeyword) - def f(a, b, *args, **kwargs): - c = kwargs.pop('c') - d = kwargs.pop('d') - return a + b + c + d - - class A(object): - @keywordonly(c=2, d=RequiredKeyword) - def __init__(self, a, b, **kwargs): - pass - - class B(A): - @keywordonly(e=5) - def __init__(self, *args, **kwargs): - e = kwargs.pop('e') - super(B, self).__init__(*args, **kwargs) - - self._f = f - self._A = A - self._B = B - - def test_keywordonly_signature(self): - """ - Test the keywordonly decorators ability to update the signature of the - function it wraps. - """ - kinds = { - 'a': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'b': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'args': inspect_sig.Parameter.VAR_POSITIONAL, - 'kwargs': inspect_sig.Parameter.VAR_KEYWORD, - 'c': inspect_sig.Parameter.KEYWORD_ONLY, - 'd': inspect_sig.Parameter.KEYWORD_ONLY, - } - sig_f = inspect_sig.signature(self._f) - for param in sig_f.parameters.values(): - self.assertTrue(param.kind == kinds[param.name]) - - def test_keywordonly_call(self): - """ - Call our test function with some values to see if it behaves as - expected. - """ - self.assertEqual(self._f(4, 3, c=5, d=6), 4 + 3 + 5 + 6) - # In the next case the 5 is left behind since it ends up in *args. - self.assertEqual(self._f(4, 3, 5, d=6), 4 + 3 + 2 + 6) - - def test_keywordonly_norequiredkeyword(self): - """ - Try to not provide a RequiredKeyword with a value and get away with it. - (we shouldn't get away with it if all is well.) - """ - with self.assertRaises(RequiredKeywordError): - self._f(4, 3, 5, 6) - - def test_keywordonly_nokwagrs(self): - """ - Decorating a function with no **kwargs-like argument should not be - allowed. - """ - with self.assertRaises(RequiredKeywordError): - @keywordonly(c=2, d=RequiredKeyword) - def g(a, b, *args): - pass - - def test_keywordonly_class(self): - """ - Decorating a function with no **kwargs-like argument should not be - allowed. - """ - kinds = { - 'self': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'a': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'b': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'args': inspect_sig.Parameter.VAR_POSITIONAL, - 'kwargs': inspect_sig.Parameter.VAR_KEYWORD, - 'c': inspect_sig.Parameter.KEYWORD_ONLY, - 'd': inspect_sig.Parameter.KEYWORD_ONLY, - } - sig = inspect_sig.signature(self._A.__init__) - for param in sig.parameters.values(): - self.assertTrue(param.kind == kinds[param.name]) - - def test_keywordonly_inheritance(self): - """ - Tests if the decorator deals with inheritance properly. - """ - kinds_B = { - 'self': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'args': inspect_sig.Parameter.VAR_POSITIONAL, - 'kwargs': inspect_sig.Parameter.VAR_KEYWORD, - 'e': inspect_sig.Parameter.KEYWORD_ONLY, - } - kinds_A = { - 'self': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'a': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'b': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, - 'kwargs': inspect_sig.Parameter.VAR_KEYWORD, - 'c': inspect_sig.Parameter.KEYWORD_ONLY, - 'd': inspect_sig.Parameter.KEYWORD_ONLY, - } - sig_B = inspect_sig.signature(self._B.__init__) - for param in sig_B.parameters.values(): - self.assertTrue(param.kind == kinds_B[param.name]) - self.assertEqual(len(sig_B.parameters), len(kinds_B)) - - sig_A = inspect_sig.signature(self._A.__init__) - for param in sig_A.parameters.values(): - self.assertTrue(param.kind == kinds_A[param.name]) - self.assertEqual(len(sig_A.parameters), len(kinds_A)) - - with self.assertRaises(TypeError): - b = self._B(3, 5, 7, d=2, e=6) - - def test_repeatable_partial(self): - """ - Test the custom repeatable partial, which makes partial behave the same - in older python versions as in the most recent. - """ - def partial_me(a, b, c=None): - return a, b, c - - partialed_one = partial(partial_me, a=2) - partialed_two = partial(partialed_one, b='string') - - self.assertIsInstance(partialed_one, partial) - self.assertEqual(partialed_one.func, partial_me) - self.assertFalse(partialed_one.args) - self.assertEqual(partialed_one.keywords, {'a': 2}) - - # For the second partial, all should remain the same except the keywords - # are extended by one item. - self.assertIsInstance(partialed_two, partial) - self.assertEqual(partialed_two.func, partial_me) - self.assertFalse(partialed_two.args) - self.assertEqual(partialed_two.keywords, {'a': 2, 'b': 'string'}) - - def test_parameters(self): - """ - Test the `parameter` convenience function. - """ - x1, x2 = parameters('x1, x2', value=[2.0, 1.3], min=0.0) - self.assertEqual(x1.value, 2.0) - self.assertEqual(x2.value, 1.3) - self.assertEqual(x1.min, 0.0) - self.assertEqual(x2.min, 0.0) - self.assertEqual(x1.fixed, False) - self.assertEqual(x2.fixed, False) - with self.assertRaises(ValueError): - x1, x2 = parameters('x1, x2', value=[2.0, 1.3, 3.0], min=0.0) - - x1, x2 = parameters('x1, x2', value=[2.0, 1.3], min=[-30, -10], max=[300, 100], fixed=[True, False]) - self.assertEqual(x1.min, -30) - self.assertEqual(x2.min, -10) - self.assertEqual(x1.max, 300) - self.assertEqual(x2.max, 100) - self.assertEqual(x1.value, 2.0) - self.assertEqual(x2.value, 1.3) - self.assertEqual(x1.fixed, True) - self.assertEqual(x2.fixed, False) - - # Illegal bounds - with self.assertRaises(ValueError): - x1, x2 = parameters('x1, x2', value=[2.0, 1.3], min=[400, -10], max=[300, 100]) - # Should not raise any error, as repeat is an endless source of values - x1, x2 = parameters('x1, x2', value=[2.0, 1.3], min=repeat(0.0)) - - def test_cached_property(self): - class A(object): - def __init__(self): - self.counter = 0 - - @cached_property - def f(self): - self.counter += 1 - return 2 - - a = A() - # Deleta before a cache was set will fail silently. - del a.f - with self.assertRaises(AttributeError): - # Cache does not exist before f is called - a._f - self.assertEqual(a.f, 2) - self.assertTrue(hasattr(a, '{}_f'.format(cached_property.base_str))) - del a.f - # check that deletion was successful - with self.assertRaises(AttributeError): - # Does not exist before f is called - a._f - # However, the function should still be there - self.assertEqual(a.f, 2) - with self.assertRaises(AttributeError): - # Setting is not allowed. - a.f = 3 - - # Counter should read 2 at this point, the number of calls since - # object creation. - self.assertEqual(a.counter, 2) - for _ in range(10): - a.f - # Should be returning from cache, so a.f is not actually called - self.assertEqual(a.counter, 2) - -if __name__ == '__main__': - try: - unittest.main(warnings='ignore') - # Note that unittest will catch and handle exceptions raised by tests. - # So this line will *only* deal with exceptions raised by the line - # above. - except TypeError: - # In Py2, unittest.main doesn't take a warnings argument - warnings.simplefilter('ignore') - unittest.main() + def g(a, b, *args): + pass + + +def test_keywordonly_class(): + """ + Decorating a function with no **kwargs-like argument should not be + allowed. + """ + kinds = { + 'self': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'a': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'b': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'args': inspect_sig.Parameter.VAR_POSITIONAL, + 'kwargs': inspect_sig.Parameter.VAR_KEYWORD, + 'c': inspect_sig.Parameter.KEYWORD_ONLY, + 'd': inspect_sig.Parameter.KEYWORD_ONLY, + } + sig = inspect_sig.signature(A.__init__) + for param in sig.parameters.values(): + assert param.kind == kinds[param.name] + + +def test_keywordonly_inheritance(): + """ + Tests if the decorator deals with inheritance properly. + """ + kinds_B = { + 'self': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'args': inspect_sig.Parameter.VAR_POSITIONAL, + 'kwargs': inspect_sig.Parameter.VAR_KEYWORD, + 'e': inspect_sig.Parameter.KEYWORD_ONLY, + } + kinds_A = { + 'self': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'a': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'b': inspect_sig.Parameter.POSITIONAL_OR_KEYWORD, + 'kwargs': inspect_sig.Parameter.VAR_KEYWORD, + 'c': inspect_sig.Parameter.KEYWORD_ONLY, + 'd': inspect_sig.Parameter.KEYWORD_ONLY, + } + sig_B = inspect_sig.signature(B.__init__) + for param in sig_B.parameters.values(): + assert param.kind == kinds_B[param.name] + assert len(sig_B.parameters) == len(kinds_B) + + sig_A = inspect_sig.signature(A.__init__) + for param in sig_A.parameters.values(): + assert param.kind == kinds_A[param.name] + assert len(sig_A.parameters) == len(kinds_A) + + with pytest.raises(TypeError): + b = B(3, 5, 7, d=2, e=6) + + +def test_repeatable_partial(): + """ + Test the custom repeatable partial, which makes partial behave the same + in older python versions as in the most recent. + """ + def partial_me(a, b, c=None): + return a, b, c + + partialed_one = partial(partial_me, a=2) + partialed_two = partial(partialed_one, b='string') + + assert isinstance(partialed_one, partial) + assert partialed_one.func == partial_me + assert not partialed_one.args + assert partialed_one.keywords == {'a': 2} + + # For the second partial, all should remain the same except the keywords + # are extended by one item. + assert isinstance(partialed_two, partial) + assert partialed_two.func == partial_me + assert not partialed_two.args + assert partialed_two.keywords == {'a': 2, 'b': 'string'} + + +def test_parameters(): + """ + Test the `parameter` convenience function. + """ + x1, x2 = parameters('x1, x2', value=[2.0, 1.3], min=0.0) + assert x1.value == 2.0 + assert x2.value == 1.3 + assert x1.min == 0.0 + assert x2.min == 0.0 + assert not x1.fixed + assert not x2.fixed + with pytest.raises(ValueError): + x1, x2 = parameters('x1, x2', value=[2.0, 1.3, 3.0], min=0.0) + + x1, x2 = parameters('x1, x2', value=[2.0, 1.3], min=[-30, -10], max=[300, 100], fixed=[True, False]) + + assert x1.min == -30 + assert x2.min == -10 + assert x1.max == 300 + assert x2.max == 100 + assert x1.value == 2.0 + assert x2.value == 1.3 + assert x1.fixed + assert not x2.fixed + + # Illegal bounds + with pytest.raises(ValueError): + x1, x2 = parameters('x1, x2', value=[2.0, 1.3], min=[400, -10], max=[300, 100]) + # Should not raise any error, as repeat is an endless source of values + x1, x2 = parameters('x1, x2', value=[2.0, 1.3], min=repeat(0.0)) + + +def test_cached_property(): + class A(object): + def __init__(self): + self.counter = 0 + + @cached_property + def f(self): + self.counter += 1 + return 2 + + a = A() + # Delete a.f before a cache was set will fail silently. + del a.f + with pytest.raises(AttributeError): + # Cache does not exist before f is called + a._f + assert a.f == 2 + assert hasattr(a, '{}_f'.format(cached_property.base_str)) + del a.f + # check that deletion was successful + with pytest.raises(AttributeError): + # Does not exist before f is called + a._f + # However, the function should still be there + assert a.f == 2 + with pytest.raises(AttributeError): + # Setting is not allowed. + a.f = 3 + + # Counter should read 2 at this point, the number of calls since + # object creation. + assert a.counter == 2 + for _ in range(10): + a.f + # Should be returning from cache, so a.f is not actually called + assert a.counter == 2