diff --git a/qiskit_experiments/curve_analysis/curve_analysis.py b/qiskit_experiments/curve_analysis/curve_analysis.py index 383931f1ab..fd1a5ff413 100644 --- a/qiskit_experiments/curve_analysis/curve_analysis.py +++ b/qiskit_experiments/curve_analysis/curve_analysis.py @@ -316,20 +316,30 @@ def _run_curve_fit( valid_uncertainty = np.all(np.isfinite(curve_data.y_err)) + model_weights = {} + if valid_uncertainty: + for model in models: + sub_yerr = curve_data.get_subset_of(model._name).y_err + if len(sub_yerr) == 0: + continue + nonzero_yerr = np.where(np.isclose(sub_yerr, 0.0), np.finfo(float).eps, sub_yerr) + raw_weights = 1 / nonzero_yerr + # Remove outlier. When all sample values are the same with sample average, + # or sampling error is zero with shot-weighted average, + # some yerr values might be very close to zero, yielding significant weights. + # With such outlier, the fit doesn't sense residual of other data points. + maximum_weight = np.percentile(raw_weights, 90) + model_weights[model._name] = np.clip(raw_weights, 0.0, maximum_weight) + # Objective function for minimize. This computes composite residuals of sub models. def _objective(_params): ys = [] for model in models: sub_data = curve_data.get_subset_of(model._name) - with np.errstate(divide="ignore"): - # Ignore numpy runtime warning. - # Zero y_err point introduces infinite weight, - # but this should be managed by LMFIT. - weights = 1.0 / sub_data.y_err if valid_uncertainty else None yi = model._residual( params=_params, data=sub_data.y, - weights=weights, + weights=model_weights.get(model._name, None), x=sub_data.x, ) ys.append(yi) @@ -351,14 +361,15 @@ def _objective(_params): ) try: - new = lmfit.minimize( - fcn=_objective, - params=guess_params, - method=self.options.fit_method, - scale_covar=not valid_uncertainty, - nan_policy="omit", - **fit_option.fitter_opts, - ) + with np.errstate(all="ignore"): + new = lmfit.minimize( + fcn=_objective, + params=guess_params, + method=self.options.fit_method, + scale_covar=not valid_uncertainty, + nan_policy="omit", + **fit_option.fitter_opts, + ) except Exception: # pylint: disable=broad-except continue diff --git a/releasenotes/notes/fix-curve-fit-weights-fb43d3aa5ed1c91c.yaml b/releasenotes/notes/fix-curve-fit-weights-fb43d3aa5ed1c91c.yaml new file mode 100644 index 0000000000..98b1172c52 --- /dev/null +++ b/releasenotes/notes/fix-curve-fit-weights-fb43d3aa5ed1c91c.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fix calculation of weight for curve fitting. Previously the weights of data points to obtain + the residual of fit curve were computed by the inverse of the error bars of y data. + This may yield significant weights on certain data points when their error bar is small or zero, + and this can cause the local overfit to these data points. + To avoid this edge case of small error bars, computed weights are now clipped at 90 percentile. + This update might slightly change the outcome of fit. diff --git a/test/curve_analysis/test_baseclass.py b/test/curve_analysis/test_baseclass.py index 930c608f2c..f25a571b8c 100644 --- a/test/curve_analysis/test_baseclass.py +++ b/test/curve_analysis/test_baseclass.py @@ -434,6 +434,49 @@ def test_end_to_end_parallel_analysis(self): self.assertAlmostEqual(taus[0].value.nominal_value, tau1, delta=0.1) self.assertAlmostEqual(taus[1].value.nominal_value, tau2, delta=0.1) + def test_end_to_end_zero_yerr(self): + """Integration test for an edge case of having zero y error. + + When the error bar is zero, the fit weights to compute residual tend to become larger. + When the weight is too much significant, the result locally overfits to + certain data points with smaller or zero y error. + """ + analysis = CurveAnalysis(models=[ExpressionModel(expr="amp * x**2", name="test")]) + analysis.set_options( + data_processor=DataProcessor(input_key="counts", data_actions=[Probability("1")]), + result_parameters=["amp"], + average_method="sample", # Use sample average to make some yerr = 0 + plot=False, + p0={"amp": 0.2}, + ) + + amp = 0.3 + x = np.linspace(0, 1, 100) + y = amp * x**2 + + # Replace small y values with zero. + # Since mock function samples count dictionary from binomial distribution, + # y=0 (or 1) yield always the same count dictionary + # and hence y error becomes zero with sample averaging. + # In this case, amp = 0 may yield the best result. + y[0] = 0 + y[1] = 0 + y[2] = 0 + + test_data1 = self.single_sampler(x, y, seed=123) + test_data2 = self.single_sampler(x, y, seed=124) + test_data3 = self.single_sampler(x, y, seed=125) + + expdata = ExperimentData(experiment=FakeExperiment()) + expdata.add_data(test_data1.data()) + expdata.add_data(test_data2.data()) + expdata.add_data(test_data3.data()) + + result = analysis.run(expdata) + self.assertExperimentDone(result) + + self.assertAlmostEqual(result.analysis_results("amp").value.nominal_value, amp, delta=0.1) + def test_get_init_params(self): """Integration test for getting initial parameter from overview entry.""" diff --git a/test/library/calibration/test_ramsey_xy.py b/test/library/calibration/test_ramsey_xy.py index 68936b3913..ccd9a7db0b 100644 --- a/test/library/calibration/test_ramsey_xy.py +++ b/test/library/calibration/test_ramsey_xy.py @@ -56,7 +56,7 @@ def test_end_to_end(self, freq_shift: float): This test also checks that we can pickup frequency shifts with different signs. """ - test_tol = 0.01 + test_tol = 0.03 abs_tol = max(1e3, abs(freq_shift) * test_tol) exp_helper = RamseyXYHelper()