From 5db1a425cd186c2127182b1c38c88e1d1a40061a Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Wed, 17 Mar 2021 15:28:18 -0400 Subject: [PATCH 01/25] Add CurveFitAnalysis class --- qiskit_experiments/__init__.py | 1 + qiskit_experiments/analysis/__init__.py | 19 ++ .../analysis/curve_fit_analysis.py | 170 ++++++++++++++++++ 3 files changed, 190 insertions(+) create mode 100644 qiskit_experiments/analysis/__init__.py create mode 100644 qiskit_experiments/analysis/curve_fit_analysis.py diff --git a/qiskit_experiments/__init__.py b/qiskit_experiments/__init__.py index eb4560cf67..c34cc5d81b 100644 --- a/qiskit_experiments/__init__.py +++ b/qiskit_experiments/__init__.py @@ -18,4 +18,5 @@ from .experiment_data import ExperimentData, AnalysisResult # Experiment modules +from . import analysis from . import composite diff --git a/qiskit_experiments/analysis/__init__.py b/qiskit_experiments/analysis/__init__.py new file mode 100644 index 0000000000..b7ea07e437 --- /dev/null +++ b/qiskit_experiments/analysis/__init__.py @@ -0,0 +1,19 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +""" +Standalone analysis classes + +These can be used when creating experiments +""" + +from .curve_fit_analysis import CurveFitAnalysis diff --git a/qiskit_experiments/analysis/curve_fit_analysis.py b/qiskit_experiments/analysis/curve_fit_analysis.py new file mode 100644 index 0000000000..4c1df34830 --- /dev/null +++ b/qiskit_experiments/analysis/curve_fit_analysis.py @@ -0,0 +1,170 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +""" +Curve fitter analysis class +""" + +from typing import Union, Optional, Callable, List, Tuple, Dict + +import numpy as np +from scipy.optimize import curve_fit +from qiskit_experiments.base_analysis import BaseAnalysis, AnalysisResult + +try: + from matplotlib import pyplot as plt + + HAS_MATPLOTLIB = True +except ImportError: + HAS_MATPLOTLIB = False + + +class CurveFitAnalysis(BaseAnalysis): + """Analysis class based on scipy.optimize.curve_fit""" + + # pylint: disable = arguments-differ + def _run_analysis( + self, + experiment_data: "ExperimentData", + func: Callable, + outcome: Union[str, int] = 0, + plabels: Optional[List[str]] = None, + p0: Optional[List[float]] = None, + p0_func: Optional[Callable] = None, + bounds: Optional[Tuple] = None, + plot: bool = True, + ax: Optional["AxesSubplot"] = None, + ): + """Run curve fit analysis on circuit data. + + Args: + experiment_data (ExperimentData): the experiment data to analyze. + func: fit function `f(x, *params)`. + outcome: measurement outcome probability to extract from counts. + plabels: Optional, parameter names for fit function. + p0: Optional, initial parameter values for curve_fit. + p0_func: Optional, function for calculating initial p0. + bounds: Optional, parameter bounds for curve_fit. + plot: If True generate a plot of fitted data. + ax: Optional, matplotlib axis to add plot to. + + Returns: + tuple: A pair ``(analysis_results, figures)`` where + ``analysis_results`` may be a single or list of + AnalysisResult objects, and ``figures`` may be + None, a single figure, or a list of figures. + """ + # Initial guess + xdata, ydata, ystderr = self._curve_fit_data(experiment_data.data, outcome=outcome) + + if p0_func is not None and p0 is None: + p0 = p0_func(xdata, ydata, sigma=ystderr) + + # Fit ydata + popt, pcov = curve_fit(func, xdata, ydata, sigma=ystderr, p0=p0, bounds=bounds) + popt_err = np.sqrt(np.diag(pcov)) + chisq = self._chisq(xdata, ydata, ystderr, func, popt) + + result = AnalysisResult( + { + "popt": popt, + "popt_err": popt_err, + "pcov": pcov, + "plabels": plabels, + "chisq": chisq, + } + ) + + if plot: + ax = self._curve_fit_plot(xdata, ydata, func, popt, popt_err, ax=ax) + # TODO: figure out what to do with plots + return result, [ax] + + return result, None + + @staticmethod + def _counts_probability(counts: "Counts", key: Union[str, int]) -> Tuple[float]: + """Return the specified outcome probability mean and variance""" + shots = sum(counts.values()) + p_mean = counts.get(key, 0.0) / shots + p_var = shots * p_mean * (1 - p_mean) + return p_mean, p_var + + @staticmethod + def _chisq( + xdata: np.ndarray, ydata: np.ndarray, ystderr: np.ndarray, func: Callable, popt: np.ndarray + ) -> float: + """Return the chi-squared of fit""" + yfits = func(xdata, *popt) + residuals = ((yfits - ydata) / ystderr) ** 2 + return np.sum(residuals) + + @classmethod + def _curve_fit_data( + cls, data: List[Dict[str, any]], outcome: Union[str, int] + ) -> Tuple[np.ndarray]: + """Return input data for curve_fit function""" + size = len(data) + xdata = np.zeros(size, dtype=int) + ydata = np.zeros(size, dtype=float) + ydata_var = np.zeros(size, dtype=float) + if isinstance(outcome, int): + num_clbits = data[0]["metadata"]["num_qubits"] + outcome = bin(outcome)[2:] + outcome = (num_clbits - len(outcome)) * "0" + outcome + for i, datum in enumerate(data): + metadata = datum["metadata"] + xdata[i] = metadata["xdata"] + y_mean, y_var = cls._counts_probability(datum["counts"], outcome) + xdata[i] = metadata["xdata"] + ydata[i] = y_mean + ydata_var[i] = y_var + ystderr = np.sqrt(ydata_var) + return xdata, ydata, ystderr + + @classmethod + def _curve_fit_plot( + cls, + xdata: np.ndarray, + ydata: np.ndarray, + func: Callable, + popt: np.ndarray, + popt_err: np.ndarray, + num_fit_points: int = 100, + ax: Optional["AxesSubplot"] = None, + ): + + if not HAS_MATPLOTLIB: + raise ImportError( + " requires matplotlib to generate curve fit " 'Run "pip install matplotlib" before.' + ) + + if ax is None: + plt.figure() + ax = plt.gca() + + # Plot raw data + ax.scatter(xdata, ydata, c="red", marker="x") + + # Plot fit data + xs = np.linspace(np.min(xdata), np.max(xdata), num_fit_points) + ys_fit = func(xs, *popt) + ax.plot(xs, ys_fit, color="blue", linestyle="-", linewidth=2) + + # Plot standard error interval + ys_upper = func(xs, *(popt + popt_err)) + ys_lower = func(xs, *(popt - popt_err)) + ax.fill_between(xs, ys_lower, ys_upper, color="blue", alpha=0.1) + + # Formatting + ax.tick_params(labelsize=14) + ax.grid(True) + return ax From a6d9c93410cfbeaea864300b8a6d88a0bd3e6f02 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Wed, 17 Mar 2021 16:03:02 -0400 Subject: [PATCH 02/25] Get outcome label from metadata --- .../analysis/curve_fit_analysis.py | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fit_analysis.py b/qiskit_experiments/analysis/curve_fit_analysis.py index 4c1df34830..f67d312af6 100644 --- a/qiskit_experiments/analysis/curve_fit_analysis.py +++ b/qiskit_experiments/analysis/curve_fit_analysis.py @@ -35,8 +35,6 @@ def _run_analysis( self, experiment_data: "ExperimentData", func: Callable, - outcome: Union[str, int] = 0, - plabels: Optional[List[str]] = None, p0: Optional[List[float]] = None, p0_func: Optional[Callable] = None, bounds: Optional[Tuple] = None, @@ -48,8 +46,6 @@ def _run_analysis( Args: experiment_data (ExperimentData): the experiment data to analyze. func: fit function `f(x, *params)`. - outcome: measurement outcome probability to extract from counts. - plabels: Optional, parameter names for fit function. p0: Optional, initial parameter values for curve_fit. p0_func: Optional, function for calculating initial p0. bounds: Optional, parameter bounds for curve_fit. @@ -63,7 +59,7 @@ def _run_analysis( None, a single figure, or a list of figures. """ # Initial guess - xdata, ydata, ystderr = self._curve_fit_data(experiment_data.data, outcome=outcome) + xdata, ydata, ystderr = self._curve_fit_data(experiment_data.data) if p0_func is not None and p0 is None: p0 = p0_func(xdata, ydata, sigma=ystderr) @@ -78,12 +74,11 @@ def _run_analysis( "popt": popt, "popt_err": popt_err, "pcov": pcov, - "plabels": plabels, "chisq": chisq, } ) - if plot: + if plot and HAS_MATPLOTLIB: ax = self._curve_fit_plot(xdata, ydata, func, popt, popt_err, ax=ax) # TODO: figure out what to do with plots return result, [ax] @@ -108,27 +103,27 @@ def _chisq( return np.sum(residuals) @classmethod - def _curve_fit_data( - cls, data: List[Dict[str, any]], outcome: Union[str, int] - ) -> Tuple[np.ndarray]: - """Return input data for curve_fit function""" + def _curve_fit_data(cls, data: List[Dict[str, any]]) -> Tuple[np.ndarray]: + """Return input data for curve_fit function. + + This requires count data and metadata with `"xdata"`, `"ylabel"` + keys containing the numeric x-value for fitting, and the outcome + bitstring for computing y-value probability from counts. + """ size = len(data) xdata = np.zeros(size, dtype=int) ydata = np.zeros(size, dtype=float) ydata_var = np.zeros(size, dtype=float) - if isinstance(outcome, int): - num_clbits = data[0]["metadata"]["num_qubits"] - outcome = bin(outcome)[2:] - outcome = (num_clbits - len(outcome)) * "0" + outcome + for i, datum in enumerate(data): metadata = datum["metadata"] xdata[i] = metadata["xdata"] - y_mean, y_var = cls._counts_probability(datum["counts"], outcome) - xdata[i] = metadata["xdata"] + y_mean, y_var = cls._counts_probability( + datum["counts"], metadata["ylabel"]) ydata[i] = y_mean ydata_var[i] = y_var - ystderr = np.sqrt(ydata_var) - return xdata, ydata, ystderr + + return xdata, ydata, np.sqrt(ydata_var) @classmethod def _curve_fit_plot( From b81631202e15c3b25b69924fb76788f9b1325cbe Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Wed, 17 Mar 2021 16:21:26 -0400 Subject: [PATCH 03/25] Fix import error message if no matplotlib --- qiskit_experiments/analysis/curve_fit_analysis.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fit_analysis.py b/qiskit_experiments/analysis/curve_fit_analysis.py index f67d312af6..92f4223a26 100644 --- a/qiskit_experiments/analysis/curve_fit_analysis.py +++ b/qiskit_experiments/analysis/curve_fit_analysis.py @@ -118,8 +118,7 @@ def _curve_fit_data(cls, data: List[Dict[str, any]]) -> Tuple[np.ndarray]: for i, datum in enumerate(data): metadata = datum["metadata"] xdata[i] = metadata["xdata"] - y_mean, y_var = cls._counts_probability( - datum["counts"], metadata["ylabel"]) + y_mean, y_var = cls._counts_probability(datum["counts"], metadata["ylabel"]) ydata[i] = y_mean ydata_var[i] = y_var @@ -139,7 +138,8 @@ def _curve_fit_plot( if not HAS_MATPLOTLIB: raise ImportError( - " requires matplotlib to generate curve fit " 'Run "pip install matplotlib" before.' + "{} requires matplotlib to generate curve fit plot." + ' Run "pip install matplotlib" before.'.format(cls.__name__) ) if ax is None: From 1e0e75505c5c490a5613d10f877ba89b42942f38 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Wed, 17 Mar 2021 17:50:12 -0400 Subject: [PATCH 04/25] Add option for fitting means of batched data sets --- .../analysis/curve_fit_analysis.py | 61 ++++++++++++++----- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fit_analysis.py b/qiskit_experiments/analysis/curve_fit_analysis.py index 92f4223a26..59471925a5 100644 --- a/qiskit_experiments/analysis/curve_fit_analysis.py +++ b/qiskit_experiments/analysis/curve_fit_analysis.py @@ -38,6 +38,7 @@ def _run_analysis( p0: Optional[List[float]] = None, p0_func: Optional[Callable] = None, bounds: Optional[Tuple] = None, + fit_mean_data: bool = False, plot: bool = True, ax: Optional["AxesSubplot"] = None, ): @@ -49,6 +50,7 @@ def _run_analysis( p0: Optional, initial parameter values for curve_fit. p0_func: Optional, function for calculating initial p0. bounds: Optional, parameter bounds for curve_fit. + fit_mean_data: Optional, if True pass means of data points to curve_fit. plot: If True generate a plot of fitted data. ax: Optional, matplotlib axis to add plot to. @@ -58,16 +60,20 @@ def _run_analysis( AnalysisResult objects, and ``figures`` may be None, a single figure, or a list of figures. """ - # Initial guess - xdata, ydata, ystderr = self._curve_fit_data(experiment_data.data) + # Compute curve fit data + xdata, ydata, ydata_sigma = self._curve_fit_data(experiment_data.data) + if fit_mean_data: + xvals, yvals, sigma = self._mean_data(xdata, ydata) + else: + xvals, yvals, sigma = xdata, ydata, ydata_sigma if p0_func is not None and p0 is None: - p0 = p0_func(xdata, ydata, sigma=ystderr) + p0 = p0_func(xdata, ydata, sigma=sigma) - # Fit ydata - popt, pcov = curve_fit(func, xdata, ydata, sigma=ystderr, p0=p0, bounds=bounds) + # Run curve fit + popt, pcov = curve_fit(func, xvals, yvals, sigma=sigma, p0=p0, bounds=bounds) popt_err = np.sqrt(np.diag(pcov)) - chisq = self._chisq(xdata, ydata, ystderr, func, popt) + chisq = self._chisq(xvals, yvals, sigma, func, popt) result = AnalysisResult( { @@ -79,7 +85,9 @@ def _run_analysis( ) if plot and HAS_MATPLOTLIB: - ax = self._curve_fit_plot(xdata, ydata, func, popt, popt_err, ax=ax) + mean_data = (xvals, yvals, sigma) if fit_mean_data else None + ax = self._curve_fit_plot(func, popt, popt_err, xdata, + ydata=ydata, mean_data=mean_data, ax=ax) # TODO: figure out what to do with plots return result, [ax] @@ -95,11 +103,11 @@ def _counts_probability(counts: "Counts", key: Union[str, int]) -> Tuple[float]: @staticmethod def _chisq( - xdata: np.ndarray, ydata: np.ndarray, ystderr: np.ndarray, func: Callable, popt: np.ndarray + xdata: np.ndarray, ydata: np.ndarray, sigma: np.ndarray, func: Callable, popt: np.ndarray ) -> float: """Return the chi-squared of fit""" yfits = func(xdata, *popt) - residuals = ((yfits - ydata) / ystderr) ** 2 + residuals = ((yfits - ydata) / sigma) ** 2 return np.sum(residuals) @classmethod @@ -124,18 +132,35 @@ def _curve_fit_data(cls, data: List[Dict[str, any]]) -> Tuple[np.ndarray]: return xdata, ydata, np.sqrt(ydata_var) + @classmethod + def _mean_data(cls, xdata, ydata): + """Return mean data for data containing duplicate xdata values""" + # Note this assumes discrete X-data + x_means = np.unique(xdata) + y_means = np.zeros(x_means.size) + y_sigmas = np.zeros(x_means.size) + for i in range(x_means.size): + ys = ydata[xdata == x_means[i]] + num_samples = len(ys) + sample_mean = np.mean(ys) + sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) + y_means[i] = sample_mean + y_sigmas[i] = np.sqrt(sample_var) + return x_means, y_means, y_sigmas + @classmethod def _curve_fit_plot( cls, - xdata: np.ndarray, - ydata: np.ndarray, func: Callable, popt: np.ndarray, popt_err: np.ndarray, + xdata: np.ndarray, + ydata: Optional[np.ndarray] = None, + mean_data: Optional[Tuple[np.ndarray]] = None, num_fit_points: int = 100, ax: Optional["AxesSubplot"] = None, ): - + """Generate plot of raw and fitted data""" if not HAS_MATPLOTLIB: raise ImportError( "{} requires matplotlib to generate curve fit plot." @@ -146,9 +171,6 @@ def _curve_fit_plot( plt.figure() ax = plt.gca() - # Plot raw data - ax.scatter(xdata, ydata, c="red", marker="x") - # Plot fit data xs = np.linspace(np.min(xdata), np.max(xdata), num_fit_points) ys_fit = func(xs, *popt) @@ -159,6 +181,15 @@ def _curve_fit_plot( ys_lower = func(xs, *(popt - popt_err)) ax.fill_between(xs, ys_lower, ys_upper, color="blue", alpha=0.1) + # Plot raw data + if ydata is not None: + ax.scatter(xdata, ydata, c="grey", marker="x") + + # Error bar plot of mean data + if mean_data is not None: + ax.errorbar(mean_data[0], mean_data[1], mean_data[2], + marker='.', markersize=9, linestyle='--', color='red') + # Formatting ax.tick_params(labelsize=14) ax.grid(True) From 8d1c9711132778c1eb7d289379b3abf69b87cbbd Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Fri, 19 Mar 2021 10:49:14 -0400 Subject: [PATCH 05/25] Reorganize helper functions into separate file --- .../analysis/curve_fit_analysis.py | 147 +--------- qiskit_experiments/analysis/curve_fit_data.py | 255 ++++++++++++++++++ 2 files changed, 261 insertions(+), 141 deletions(-) create mode 100644 qiskit_experiments/analysis/curve_fit_data.py diff --git a/qiskit_experiments/analysis/curve_fit_analysis.py b/qiskit_experiments/analysis/curve_fit_analysis.py index 59471925a5..293553ae9a 100644 --- a/qiskit_experiments/analysis/curve_fit_analysis.py +++ b/qiskit_experiments/analysis/curve_fit_analysis.py @@ -16,15 +16,8 @@ from typing import Union, Optional, Callable, List, Tuple, Dict import numpy as np -from scipy.optimize import curve_fit from qiskit_experiments.base_analysis import BaseAnalysis, AnalysisResult - -try: - from matplotlib import pyplot as plt - - HAS_MATPLOTLIB = True -except ImportError: - HAS_MATPLOTLIB = False +from .curve_fit_data import curve_fit class CurveFitAnalysis(BaseAnalysis): @@ -41,6 +34,7 @@ def _run_analysis( fit_mean_data: bool = False, plot: bool = True, ax: Optional["AxesSubplot"] = None, + **kwargs ): """Run curve fit analysis on circuit data. @@ -53,6 +47,7 @@ def _run_analysis( fit_mean_data: Optional, if True pass means of data points to curve_fit. plot: If True generate a plot of fitted data. ax: Optional, matplotlib axis to add plot to. + kwargs: additional kwargs to pass to curve_fit. Returns: tuple: A pair ``(analysis_results, figures)`` where @@ -60,137 +55,7 @@ def _run_analysis( AnalysisResult objects, and ``figures`` may be None, a single figure, or a list of figures. """ - # Compute curve fit data - xdata, ydata, ydata_sigma = self._curve_fit_data(experiment_data.data) - if fit_mean_data: - xvals, yvals, sigma = self._mean_data(xdata, ydata) - else: - xvals, yvals, sigma = xdata, ydata, ydata_sigma - - if p0_func is not None and p0 is None: - p0 = p0_func(xdata, ydata, sigma=sigma) - - # Run curve fit - popt, pcov = curve_fit(func, xvals, yvals, sigma=sigma, p0=p0, bounds=bounds) - popt_err = np.sqrt(np.diag(pcov)) - chisq = self._chisq(xvals, yvals, sigma, func, popt) - - result = AnalysisResult( - { - "popt": popt, - "popt_err": popt_err, - "pcov": pcov, - "chisq": chisq, - } - ) - - if plot and HAS_MATPLOTLIB: - mean_data = (xvals, yvals, sigma) if fit_mean_data else None - ax = self._curve_fit_plot(func, popt, popt_err, xdata, - ydata=ydata, mean_data=mean_data, ax=ax) - # TODO: figure out what to do with plots - return result, [ax] - + analysis_result, ax = curve_fit( + experiment_data.data, func=func, p0=p0, p0_func=p0_func, + bound=bounds, fit_mean_data=fit_mean_data, plot=plotm ax=ax, **kwargs) return result, None - - @staticmethod - def _counts_probability(counts: "Counts", key: Union[str, int]) -> Tuple[float]: - """Return the specified outcome probability mean and variance""" - shots = sum(counts.values()) - p_mean = counts.get(key, 0.0) / shots - p_var = shots * p_mean * (1 - p_mean) - return p_mean, p_var - - @staticmethod - def _chisq( - xdata: np.ndarray, ydata: np.ndarray, sigma: np.ndarray, func: Callable, popt: np.ndarray - ) -> float: - """Return the chi-squared of fit""" - yfits = func(xdata, *popt) - residuals = ((yfits - ydata) / sigma) ** 2 - return np.sum(residuals) - - @classmethod - def _curve_fit_data(cls, data: List[Dict[str, any]]) -> Tuple[np.ndarray]: - """Return input data for curve_fit function. - - This requires count data and metadata with `"xdata"`, `"ylabel"` - keys containing the numeric x-value for fitting, and the outcome - bitstring for computing y-value probability from counts. - """ - size = len(data) - xdata = np.zeros(size, dtype=int) - ydata = np.zeros(size, dtype=float) - ydata_var = np.zeros(size, dtype=float) - - for i, datum in enumerate(data): - metadata = datum["metadata"] - xdata[i] = metadata["xdata"] - y_mean, y_var = cls._counts_probability(datum["counts"], metadata["ylabel"]) - ydata[i] = y_mean - ydata_var[i] = y_var - - return xdata, ydata, np.sqrt(ydata_var) - - @classmethod - def _mean_data(cls, xdata, ydata): - """Return mean data for data containing duplicate xdata values""" - # Note this assumes discrete X-data - x_means = np.unique(xdata) - y_means = np.zeros(x_means.size) - y_sigmas = np.zeros(x_means.size) - for i in range(x_means.size): - ys = ydata[xdata == x_means[i]] - num_samples = len(ys) - sample_mean = np.mean(ys) - sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) - y_means[i] = sample_mean - y_sigmas[i] = np.sqrt(sample_var) - return x_means, y_means, y_sigmas - - @classmethod - def _curve_fit_plot( - cls, - func: Callable, - popt: np.ndarray, - popt_err: np.ndarray, - xdata: np.ndarray, - ydata: Optional[np.ndarray] = None, - mean_data: Optional[Tuple[np.ndarray]] = None, - num_fit_points: int = 100, - ax: Optional["AxesSubplot"] = None, - ): - """Generate plot of raw and fitted data""" - if not HAS_MATPLOTLIB: - raise ImportError( - "{} requires matplotlib to generate curve fit plot." - ' Run "pip install matplotlib" before.'.format(cls.__name__) - ) - - if ax is None: - plt.figure() - ax = plt.gca() - - # Plot fit data - xs = np.linspace(np.min(xdata), np.max(xdata), num_fit_points) - ys_fit = func(xs, *popt) - ax.plot(xs, ys_fit, color="blue", linestyle="-", linewidth=2) - - # Plot standard error interval - ys_upper = func(xs, *(popt + popt_err)) - ys_lower = func(xs, *(popt - popt_err)) - ax.fill_between(xs, ys_lower, ys_upper, color="blue", alpha=0.1) - - # Plot raw data - if ydata is not None: - ax.scatter(xdata, ydata, c="grey", marker="x") - - # Error bar plot of mean data - if mean_data is not None: - ax.errorbar(mean_data[0], mean_data[1], mean_data[2], - marker='.', markersize=9, linestyle='--', color='red') - - # Formatting - ax.tick_params(labelsize=14) - ax.grid(True) - return ax diff --git a/qiskit_experiments/analysis/curve_fit_data.py b/qiskit_experiments/analysis/curve_fit_data.py new file mode 100644 index 0000000000..fa8b6b620a --- /dev/null +++ b/qiskit_experiments/analysis/curve_fit_data.py @@ -0,0 +1,255 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +""" +Curve fitter analysis class +""" + +from typing import List, Tuple, Dict, Callable, Optional + +import numpy as np +from scipy.optimize import curve_fit as scipy_curve_fit + +from qiskit.exceptions import QiskitError +from qiskit_experiments.base_analysis import AnalysisResult + +try: + from matplotlib import pyplot as plt + + HAS_MATPLOTLIB = True +except ImportError: + HAS_MATPLOTLIB = False + + +# pylint: disable = invalid-name +def curve_fit( + data: List[Dict[str, any]], + func: Callable, + p0: Optional[List[float]] = None, + p0_func: Optional[Callable] = None, + bounds: Optional[Tuple] = None, + fit_mean_data: bool = False, + plot: bool = True, + ax: Optional["AxesSubplot"] = None, + **kwargs +): + """Run curve fit analysis on circuit data. + + Args: + data (ExperimentData): the experiment data to analyze. + func: fit function `f(x, *params)`. + p0: Optional, initial parameter values for curve_fit. + p0_func: Optional, function for calculating initial p0. + bounds: Optional, parameter bounds for curve_fit. + fit_mean_data: Optional, if True pass means of data points to curve_fit. + plot: If True generate a plot of fitted data. + ax: Optional, matplotlib axis to add plot to. + kwargs: additional kwargs to pass to curve_fit. + + Returns: + tuple: A pair ``(analysis_results, figures)`` where + ``analysis_results`` may be a single or list of + AnalysisResult objects, and ``figures`` may be + None, a single figure, or a list of figures. + """ + # Compute curve fit data + if fit_mean_data: + xdata, ydata, sigma, xraw, yraw = average_curve_fit_data( + data, return_raw=True) + else: + xdata, ydata, sigma = curve_fit_data(data) + + if p0_func is not None and p0 is None: + p0 = p0_func(xdata, ydata, sigma=sigma) + + # Run curve fit + popt, pcov = scipy_curve_fit( + func, xdata, ydata, sigma=sigma, p0=p0, bounds=bounds, **kwargs) + popt_err = np.sqrt(np.diag(pcov)) + + # Compute chi-squared for fit + yfits = func(xdata, *popt) + chisq = np.mean(((yfits - ydata) / sigma) ** 2) + + result = AnalysisResult({ + "popt": popt, + "popt_err": popt_err, + "pcov": pcov, + "chisq": chisq, + }) + + if plot and HAS_MATPLOTLIB: + mean_data = (xdata, ydata, sigma) if fit_mean_data else None + ax = curve_fit_plot(func, popt, popt_err, xraw, + ydata=yraw, mean_data=mean_data, ax=ax) + # TODO: figure out what to do with plots + return result, ax + + return result, None + + +def curve_fit_plot( + func: Callable, + popt: np.ndarray, + popt_err: np.ndarray, + xdata: np.ndarray, + ydata: Optional[np.ndarray] = None, + mean_data: Optional[Tuple[np.ndarray]] = None, + num_fit_points: int = 100, + ax: Optional["AxesSubplot"] = None, +): + """Generate plot of raw and fitted data""" + if not HAS_MATPLOTLIB: + raise ImportError( + 'curve_fit_plot requires matplotlib to generate curve fit plot.' + ' Run "pip install matplotlib" before.') + + if ax is None: + plt.figure() + ax = plt.gca() + + # Plot fit data + xs = np.linspace(np.min(xdata), np.max(xdata), num_fit_points) + ys_fit = func(xs, *popt) + ax.plot(xs, ys_fit, color="blue", linestyle="-", linewidth=2) + + # Plot standard error interval + ys_upper = func(xs, *(popt + popt_err)) + ys_lower = func(xs, *(popt - popt_err)) + ax.fill_between(xs, ys_lower, ys_upper, color="blue", alpha=0.1) + + # Plot raw data + if ydata is not None: + ax.scatter(xdata, ydata, c="grey", marker="x") + + # Error bar plot of mean data + if mean_data is not None: + ax.errorbar(mean_data[0], mean_data[1], mean_data[2], + marker='.', markersize=9, linestyle='--', color='red') + + # Formatting + ax.tick_params(labelsize=14) + ax.grid(True) + return ax + + +def curve_fit_data(data: List[Dict[str, any]]) -> Tuple[np.ndarray]: + """Return array of (x, y, sigma) data for curve fitting. + + Args + data: list of circuit data dictionaries. + + Returns: + tuple: ``(x, y, sigma)`` tuple of arrays of x-values, + y-values, and standard deviations of y-values. + + Raises: + QiskitError: if input data is not level-2 measurement. + + .. note:: + + This requires metadata to contain `"xdata"`, `"ylabel"` + keys containing the numeric x-value for fitting, and the outcome + bitstring for computing y-value probability from counts. + + .. note:: + + Currently only level-2 (count) measurement data is supported. + """ + size = len(data) + xdata = np.zeros(size, dtype=int) + ydata = np.zeros(size, dtype=float) + ydata_var = np.zeros(size, dtype=float) + + for i, datum in enumerate(data): + metadata = datum["metadata"] + meas_level = metadata.get("meas_level", 2) + xdata[i] = metadata["xdata"] + if meas_level == 2: + y_mean, y_var = level2_probability( + datum["counts"], metadata["ylabel"]) + ydata[i] = y_mean + ydata_var[i] = y_var + else: + # Adding support for level-1 measurment data is still todo. + raise QiskitError("Measurement level 1 is not supported.") + + return xdata, ydata, np.sqrt(ydata_var) + + +def average_curve_fit_data(data: List[Dict[str, any]], + return_raw: bool = False) -> Tuple[np.ndarray]: + """Return array of (x, y_mean, sigma) data for curve fitting. + + Compared to :func:`curve_fit_data` this computes the sample mean and + standard deviation of each set of y-data with the same x value. + + Args + data: list of circuit data dictionaries. + + Returns: + tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where + ``x`` is an arrays of unique x-values, ``y`` is an array of + sample mean y-values, and ``sigma`` is an array of sample standard + deviation of y values. + tuple: ``(x, y_mean, sigma, x_raw, y_raw) where ``x_raw, y_raw`` are the + full set of x and y data arrays before averaging. + + .. note:: + + This requires metadata to contain `"xdata"`, `"ylabel"` + keys containing the numeric x-value for fitting, and the outcome + bitstring for computing y-value probability from counts. + + .. note:: + + Currently only level-2 (count) measurement data is supported. + """ + x_raw, y_raw, _ = curve_fit_data(data) + + # Note this assumes discrete X-data + x_means = np.unique(x_raw) + y_means = np.zeros(x_means.size) + y_sigmas = np.zeros(x_means.size) + for i in range(x_means.size): + ys = y_raw[x_raw == x_means[i]] + num_samples = len(ys) + sample_mean = np.mean(ys) + sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) + y_means[i] = sample_mean + y_sigmas[i] = np.sqrt(sample_var) + if return_raw: + return x_means, y_means, y_sigmas, x_raw, y_raw + return x_means, y_means, y_sigmas + + +def level2_probability(counts: "Counts", outcome: str) -> Tuple[float]: + """Return the outcome probability mean and variance. + + Args: + counts: A counts object. + outcome: bitstring for desired outcome probability. + + Returns: + tuple: (p_mean, p_var) of the probability mean and variance + estimated from the counts. + + .. note:: + + This assumes a binomial distribution where :math:`K` counts + of the desired outcome from :math:`N` shots the + mean probability is :math:`p = K / N` and the variance is + :math:`\\sigma^2 = N p (1-p)`. + """ + shots = sum(counts.values()) + p_mean = counts.get(outcome, 0.0) / shots + p_var = shots * p_mean * (1 - p_mean) + return p_mean, p_var From 4c8057c21a789cb0f49665053d67ab91ff7509a7 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Fri, 26 Mar 2021 11:56:00 -0400 Subject: [PATCH 06/25] Add curve_fit, multi_curve_fit, and plotting functions for xy data --- qiskit_experiments/analysis/curve_fitting.py | 145 +++++++++++++ qiskit_experiments/analysis/plotting.py | 201 +++++++++++++++++++ requirements.txt | 1 + 3 files changed, 347 insertions(+) create mode 100644 qiskit_experiments/analysis/curve_fitting.py create mode 100644 qiskit_experiments/analysis/plotting.py diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py new file mode 100644 index 0000000000..4230df3595 --- /dev/null +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -0,0 +1,145 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +""" +Curve fitting functions for experiment analysis +""" +# pylint: disable = invalid-name + +from typing import List, Callable, Optional + +import numpy as np +import scipy.optimize as opt + +from qiskit.exceptions import QiskitError +from qiskit_experiments.base_analysis import AnalysisResult + + +def curve_fit( + func: Callable, + xdata: np.ndarray, + ydata: np.ndarray, + p0: np.ndarray, + sigma: Optional[np.ndarray] = None, + **kwargs, +) -> AnalysisResult: + """Use non-linear least squares to fit a function, f, to data + + Wraps scipy.optimize.curve_fit + + Args: + func: a fit function `f(x *params)`. + xdata: a 1D array of x-data + ydata: a 1D array of y-data + p0: initial guess for optimization parameters. + sigma: Optional, a 1D array of standard deviations in ydata. + kwargs: additional kwargs for scipy.optimize.curve_fit. + + Returns: + AnalysisResult: result containing `popt` the optimal fit parameters, + `popt_err` the standard error estimates popt, + `pcov` the covariance matrix for the fit, + `chisq` the chi-squared parameter of fit, + `xrange` the range of xdata values used for fit. + """ + + # Run curve fit + # pylint: disable unbackend-tuple-unpacking + popt, pcov = opt.curve_fit(func, xdata, ydata, sigma=sigma, p0=p0, **kwargs) + popt_err = np.sqrt(np.diag(pcov)) + + # Compute chi-squared for fit + yfits = func(xdata, *popt) + chisq = np.mean(((yfits - ydata) / sigma) ** 2) + + # Compute xdata range for fit + xdata_range = [min(xdata), max(xdata)] + + result = { + "popt": popt, + "popt_err": popt_err, + "pcov": pcov, + "chisq": chisq, + "xrange": xdata_range, + } + # TODO: + # 1. Add some basic validation of computer good / bad based on fit result. + # 2. Add error handling so if fitting fails we can return an analysis + # result containing this information + return AnalysisResult(result) + + +def multi_curve_fit( + funcs: List[Callable], + xdata: np.ndarray, + ydata: np.ndarray, + p0: np.ndarray, + sigma: Optional[np.ndarray] = None, + weights: Optional[np.ndarray] = None, + **kwargs, +): + """Use non-linear least squares to fit a list of functions, f_i, to data + + Args: + funcs: a list of objective functions with signatures `f_i(x, *params)`. + xdata: a 2D array of xdata and function function indexes. + ydata: a 1D array of ydata + p0: initial guess for optimization parameters. + sigma: Optional, a 1D array of standard deviations in ydata. + weights: Optional, a 1D list of numeric weights for each function. + kwargs: additional kwargs for scipy.optimize.curve_fit. + + Returns: + AnalysisResult: result containing `popt` the optimal fit parameters, + `popt_err` the standard error estimates popt, + `pcov` the covariance matrix for the fit, + `chisq` the chi-squared parameter of fit. + `xrange` the range of xdata values used for fit. + + Raises: + QiskitError: if input xdata is not 2D. + """ + num_funcs = len(funcs) + + # Get 1D xdata and indices from 2D input xdata + xdata = np.asarray(xdata, dtype=float) + if xdata.ndim != 2: + raise QiskitError("multi_curve_fit requires 2D xdata.") + xdata1d, xindex = xdata.T + + # Get positions for indexes data sets + idxs = [xindex == i for i in range(num_funcs)] + + # Combine weights and sigma for transformation + if weights is None: + wsigma = sigma + else: + wsigma = np.zeros(ydata.size) + if sigma is None: + for i in range(num_funcs): + wsigma[idxs[i]] = 1 / np.sqrt(weights[i]) + else: + for i in range(num_funcs): + wsigma[idxs[i]] = sigma[idxs[i]] / np.sqrt(weights[i]) + + # Define multi-objective function + def f(x, *params): + y = np.zeros(x.size) + for i in range(num_funcs): + xi = x[idxs[i]] + yi = funcs[i](xi, *params) + y[idxs[i]] = yi + return y + + # Run linearized curve_fit + analysis_result = curve_fit(f, xdata1d, ydata, p0, sigma=wsigma, **kwargs) + + return analysis_result diff --git a/qiskit_experiments/analysis/plotting.py b/qiskit_experiments/analysis/plotting.py new file mode 100644 index 0000000000..bd9312e17a --- /dev/null +++ b/qiskit_experiments/analysis/plotting.py @@ -0,0 +1,201 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +""" +Plotting functions for experiment analysis +""" +import functools +from typing import Callable, Optional +import numpy as np + +from qiskit_experiments.base_analysis import AnalysisResult + +try: + from matplotlib import pyplot as plt + + HAS_MATPLOTLIB = True +except ImportError: + HAS_MATPLOTLIB = False + + +def requires_matplotlib(func): + """Decorator for functions requiring matplotlib""" + + @functools.wraps(func) + def wrapped(*args, **kwargs): + if not HAS_MATPLOTLIB: + raise ImportError( + f"{func} requires matplotlib to generate curve fit plot." + ' Run "pip install matplotlib" before.' + ) + return func(*args, **kwargs) + + return wrapped + + +@requires_matplotlib +def plot_curve_fit( + func: Callable, + result: AnalysisResult, + confidence_interval: bool = True, + ax: Optional["AxesSubplot"] = None, + num_fit_points: int = 100, + labelsize: int = 14, + grid: bool = True, + **kwargs, +) -> "AxesSubplot": + """Generate plot of a curve fit analysis result. + + Wraps ``matplotlib.pyplot.plot``. + + Args: + func: the fit funcion for curve_fit. + result: an AnalysisResult from curve_fit. + confidence_interval: if True plot the confidence interval from popt_err. + ax: Optional, a matplotlib axes to add the plot to. + num_fit_points: the number of points to plot for xrange. + labelsize: label size for plot + grid: Show grid on plot. + **kwargs: Additional options for matplotlib.pyplot.plot + + Returns: + AxesSubPlot: the matplotlib axes containing the plot. + + Raises: + ImportError: if matplotlib is not installed. + """ + if ax is None: + plt.figure() + ax = plt.gca() + + # Result data + popt = result["popt"] + popt_err = result["popt_err"] + xmin, xmax = result["xrange"] + + # Default plot options + plot_opts = kwargs.copy() + if "color" not in plot_opts: + plot_opts["color"] = "blue" + if "linestyle" not in plot_opts: + plot_opts["linestyle"] = "-" + if "linewidth" not in plot_opts: + plot_opts["linewidth"] = 2 + + # Plot fit data + xs = np.linspace(xmin, xmax, num_fit_points) + ys_fit = func(xs, *popt) + ax.plot(xs, ys_fit, **plot_opts) + + # Plot standard error interval + if confidence_interval: + ys_upper = func(xs, *(popt + popt_err)) + ys_lower = func(xs, *(popt - popt_err)) + ax.fill_between(xs, ys_lower, ys_upper, alpha=0.1, color=plot_opts["color"]) + + # Formatting + ax.tick_params(labelsize=labelsize) + ax.grid(grid) + return ax + + +@requires_matplotlib +def plot_scatter( + xdata: np.ndarray, + ydata: np.ndarray, + ax: Optional["AxesSubplot"] = None, + labelsize: int = 14, + grid: bool = True, + **kwargs, +) -> "AxesSubplot": + """Generate a scatter plot of xy data. + + Wraps ``matplotlib.pyplot.scatter``. + + Args: + xdata: xdata used for fitting + ydata: ydata used for fitting + ax: Optional, a matplotlib axes to add the plot to. + labelsize: label size for plot + grid: Show grid on plot. + **kwargs: Additional options for matplotlib.pyplot.scatter + + Returns: + AxesSubPlot: the matplotlib axes containing the plot. + """ + if ax is None: + plt.figure() + ax = plt.gca() + + # Default plot options + plot_opts = kwargs.copy() + if "c" not in plot_opts: + plot_opts["c"] = "grey" + if "marker" not in plot_opts: + plot_opts["marker"] = "x" + + # Plot data + ax.scatter(xdata, ydata, **plot_opts) + + # Formatting + ax.tick_params(labelsize=labelsize) + ax.grid(grid) + return ax + + +@requires_matplotlib +def plot_errorbar( + xdata: np.ndarray, + ydata: np.ndarray, + sigma: Optional[np.ndarray] = None, + ax: Optional["AxesSubplot"] = None, + labelsize: int = 14, + grid: bool = True, + **kwargs, +) -> "AxesSubplot": + """Generate an errorbar plot of xy data. + + Wraps ``matplotlib.pyplot.errorbar`` + + Args: + xdata: xdata used for fitting + ydata: ydata used for fitting + sigma: Optional, standard deviation of ydata + ax: Optional, a matplotlib axes to add the plot to. + labelsize: label size for plot + grid: Show grid on plot. + **kwargs: Additional options for matplotlib.pyplot.scatter + + Returns: + AxesSubPlot: the matplotlib axes containing the plot. + """ + if ax is None: + plt.figure() + ax = plt.gca() + + # Default plot options + plot_opts = kwargs.copy() + if "color" not in plot_opts: + plot_opts["color"] = "red" + if "marker" not in plot_opts: + plot_opts["marker"] = "." + if "markersize" not in plot_opts: + plot_opts["markersize"] = 9 + if "linestyle" not in plot_opts: + plot_opts["linestyle"] = "--" + + # Plot data + ax.errorbar(xdata, ydata, yerr=sigma, **plot_opts) + + # Formatting + ax.tick_params(labelsize=labelsize) + ax.grid(grid) + return ax diff --git a/requirements.txt b/requirements.txt index fb0a5a9e9a..9fa126e95f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ numpy>=1.17 +scipy>=1.4 qiskit-terra>=0.16.0 From 271995ae646243455a4e7dbb1327e0c0e5d0c553 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Fri, 19 Mar 2021 10:55:20 -0400 Subject: [PATCH 07/25] Refactor example CurveFitAnalysis class to use library functions --- .../analysis/curve_fit_analysis.py | 199 +++++++++++++- qiskit_experiments/analysis/curve_fit_data.py | 255 ------------------ 2 files changed, 191 insertions(+), 263 deletions(-) delete mode 100644 qiskit_experiments/analysis/curve_fit_data.py diff --git a/qiskit_experiments/analysis/curve_fit_analysis.py b/qiskit_experiments/analysis/curve_fit_analysis.py index 293553ae9a..23542f6173 100644 --- a/qiskit_experiments/analysis/curve_fit_analysis.py +++ b/qiskit_experiments/analysis/curve_fit_analysis.py @@ -12,12 +12,15 @@ """ Curve fitter analysis class """ +# pylint: disable = invalid-name -from typing import Union, Optional, Callable, List, Tuple, Dict +from typing import Optional, Callable, List, Tuple, Dict import numpy as np -from qiskit_experiments.base_analysis import BaseAnalysis, AnalysisResult -from .curve_fit_data import curve_fit +from qiskit.exceptions import QiskitError +from qiskit_experiments.base_analysis import BaseAnalysis +from .curve_fitting import curve_fit +from .plotting import HAS_MATPLOTLIB, plot_curve_fit, plot_errorbar, plot_scatter class CurveFitAnalysis(BaseAnalysis): @@ -34,7 +37,7 @@ def _run_analysis( fit_mean_data: bool = False, plot: bool = True, ax: Optional["AxesSubplot"] = None, - **kwargs + **kwargs, ): """Run curve fit analysis on circuit data. @@ -55,7 +58,187 @@ def _run_analysis( AnalysisResult objects, and ``figures`` may be None, a single figure, or a list of figures. """ - analysis_result, ax = curve_fit( - experiment_data.data, func=func, p0=p0, p0_func=p0_func, - bound=bounds, fit_mean_data=fit_mean_data, plot=plotm ax=ax, **kwargs) - return result, None + return self._run_curve_fit( + experiment_data.data, + func, + p0=p0, + p0_func=p0_func, + bounds=bounds, + fit_mean_data=fit_mean_data, + plot=plot, + ax=ax, + **kwargs, + ) + + def _run_curve_fit( + self, + data: List[Dict[str, any]], + func: Callable, + p0: Optional[List[float]] = None, + p0_func: Optional[Callable] = None, + bounds: Optional[Tuple] = None, + fit_mean_data: bool = False, + plot: bool = True, + ax: Optional["AxesSubplot"] = None, + **kwargs, + ): + """Run curve fit analysis on circuit data. + + Args: + data (ExperimentData): the experiment data to analyze. + func: fit function `f(x, *params)`. + p0: Optional, initial parameter values for curve_fit. + p0_func: Optional, function for calculating initial p0. + bounds: Optional, parameter bounds for curve_fit. + fit_mean_data: Optional, if True pass means of data points to curve_fit. + plot: If True generate a plot of fitted data. + ax: Optional, matplotlib axis to add plot to. + kwargs: additional kwargs to pass to curve_fit. + + Returns: + tuple: A pair ``(analysis_results, figures)`` where + ``analysis_results`` may be a single or list of + AnalysisResult objects, and ``figures`` may be + None, a single figure, or a list of figures. + """ + # Compute curve fit data + if fit_mean_data: + xdata, ydata, sigma, xraw, yraw = average_curve_fit_data(data, return_raw=True) + else: + xdata, ydata, sigma = curve_fit_data(data) + + if p0_func is not None and p0 is None: + p0 = p0_func(xdata, ydata, sigma=sigma) + + # Run curve fit + result = curve_fit(func, xdata, ydata, p0, sigma=sigma, bounds=bounds, **kwargs) + + if plot and HAS_MATPLOTLIB: + ax = plot_curve_fit(func, result) + if fit_mean_data: + ax = plot_scatter(xraw, yraw, ax=ax) + ax = plot_errorbar(xdata, ydata, sigma, ax=ax) + else: + ax = plot_scatter(xdata, ydata) + figs = [ax] + else: + figs = None + + return result, figs + + +# NOTE: the following should be handled by DataProcessing module + + +def curve_fit_data(data: List[Dict[str, any]]) -> Tuple[np.ndarray]: + """Return array of (x, y, sigma) data for curve fitting. + + Args + data: list of circuit data dictionaries. + + Returns: + tuple: ``(x, y, sigma)`` tuple of arrays of x-values, + y-values, and standard deviations of y-values. + + Raises: + QiskitError: if input data is not level-2 measurement. + + .. note:: + + This requires metadata to contain `"xdata"`, `"ylabel"` + keys containing the numeric x-value for fitting, and the outcome + bitstring for computing y-value probability from counts. + + .. note:: + + Currently only level-2 (count) measurement data is supported. + """ + size = len(data) + xdata = np.zeros(size, dtype=int) + ydata = np.zeros(size, dtype=float) + ydata_var = np.zeros(size, dtype=float) + + for i, datum in enumerate(data): + metadata = datum["metadata"] + meas_level = metadata.get("meas_level", 2) + xdata[i] = metadata["xdata"] + if meas_level == 2: + y_mean, y_var = level2_probability(datum["counts"], metadata["ylabel"]) + ydata[i] = y_mean + ydata_var[i] = y_var + else: + # Adding support for level-1 measurment data is still todo. + raise QiskitError("Measurement level 1 is not supported.") + + return xdata, ydata, np.sqrt(ydata_var) + + +def average_curve_fit_data( + data: List[Dict[str, any]], return_raw: bool = False +) -> Tuple[np.ndarray]: + """Return array of (x, y_mean, sigma) data for curve fitting. + + Compared to :func:`curve_fit_data` this computes the sample mean and + standard deviation of each set of y-data with the same x value. + + Args + data: list of circuit data dictionaries. + + Returns: + tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where + ``x`` is an arrays of unique x-values, ``y`` is an array of + sample mean y-values, and ``sigma`` is an array of sample standard + deviation of y values. + tuple: ``(x, y_mean, sigma, x_raw, y_raw) where ``x_raw, y_raw`` are the + full set of x and y data arrays before averaging. + + .. note:: + + This requires metadata to contain `"xdata"`, `"ylabel"` + keys containing the numeric x-value for fitting, and the outcome + bitstring for computing y-value probability from counts. + + .. note:: + + Currently only level-2 (count) measurement data is supported. + """ + x_raw, y_raw, _ = curve_fit_data(data) + + # Note this assumes discrete X-data + x_means = np.unique(x_raw) + y_means = np.zeros(x_means.size) + y_sigmas = np.zeros(x_means.size) + for i in range(x_means.size): + ys = y_raw[x_raw == x_means[i]] + num_samples = len(ys) + sample_mean = np.mean(ys) + sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) + y_means[i] = sample_mean + y_sigmas[i] = np.sqrt(sample_var) + if return_raw: + return x_means, y_means, y_sigmas, x_raw, y_raw + return x_means, y_means, y_sigmas + + +def level2_probability(counts: "Counts", outcome: str) -> Tuple[float]: + """Return the outcome probability mean and variance. + + Args: + counts: A counts object. + outcome: bitstring for desired outcome probability. + + Returns: + tuple: (p_mean, p_var) of the probability mean and variance + estimated from the counts. + + .. note:: + + This assumes a binomial distribution where :math:`K` counts + of the desired outcome from :math:`N` shots the + mean probability is :math:`p = K / N` and the variance is + :math:`\\sigma^2 = N p (1-p)`. + """ + shots = sum(counts.values()) + p_mean = counts.get(outcome, 0.0) / shots + p_var = shots * p_mean * (1 - p_mean) + return p_mean, p_var diff --git a/qiskit_experiments/analysis/curve_fit_data.py b/qiskit_experiments/analysis/curve_fit_data.py deleted file mode 100644 index fa8b6b620a..0000000000 --- a/qiskit_experiments/analysis/curve_fit_data.py +++ /dev/null @@ -1,255 +0,0 @@ -# This code is part of Qiskit. -# -# (C) Copyright IBM 2021. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. -""" -Curve fitter analysis class -""" - -from typing import List, Tuple, Dict, Callable, Optional - -import numpy as np -from scipy.optimize import curve_fit as scipy_curve_fit - -from qiskit.exceptions import QiskitError -from qiskit_experiments.base_analysis import AnalysisResult - -try: - from matplotlib import pyplot as plt - - HAS_MATPLOTLIB = True -except ImportError: - HAS_MATPLOTLIB = False - - -# pylint: disable = invalid-name -def curve_fit( - data: List[Dict[str, any]], - func: Callable, - p0: Optional[List[float]] = None, - p0_func: Optional[Callable] = None, - bounds: Optional[Tuple] = None, - fit_mean_data: bool = False, - plot: bool = True, - ax: Optional["AxesSubplot"] = None, - **kwargs -): - """Run curve fit analysis on circuit data. - - Args: - data (ExperimentData): the experiment data to analyze. - func: fit function `f(x, *params)`. - p0: Optional, initial parameter values for curve_fit. - p0_func: Optional, function for calculating initial p0. - bounds: Optional, parameter bounds for curve_fit. - fit_mean_data: Optional, if True pass means of data points to curve_fit. - plot: If True generate a plot of fitted data. - ax: Optional, matplotlib axis to add plot to. - kwargs: additional kwargs to pass to curve_fit. - - Returns: - tuple: A pair ``(analysis_results, figures)`` where - ``analysis_results`` may be a single or list of - AnalysisResult objects, and ``figures`` may be - None, a single figure, or a list of figures. - """ - # Compute curve fit data - if fit_mean_data: - xdata, ydata, sigma, xraw, yraw = average_curve_fit_data( - data, return_raw=True) - else: - xdata, ydata, sigma = curve_fit_data(data) - - if p0_func is not None and p0 is None: - p0 = p0_func(xdata, ydata, sigma=sigma) - - # Run curve fit - popt, pcov = scipy_curve_fit( - func, xdata, ydata, sigma=sigma, p0=p0, bounds=bounds, **kwargs) - popt_err = np.sqrt(np.diag(pcov)) - - # Compute chi-squared for fit - yfits = func(xdata, *popt) - chisq = np.mean(((yfits - ydata) / sigma) ** 2) - - result = AnalysisResult({ - "popt": popt, - "popt_err": popt_err, - "pcov": pcov, - "chisq": chisq, - }) - - if plot and HAS_MATPLOTLIB: - mean_data = (xdata, ydata, sigma) if fit_mean_data else None - ax = curve_fit_plot(func, popt, popt_err, xraw, - ydata=yraw, mean_data=mean_data, ax=ax) - # TODO: figure out what to do with plots - return result, ax - - return result, None - - -def curve_fit_plot( - func: Callable, - popt: np.ndarray, - popt_err: np.ndarray, - xdata: np.ndarray, - ydata: Optional[np.ndarray] = None, - mean_data: Optional[Tuple[np.ndarray]] = None, - num_fit_points: int = 100, - ax: Optional["AxesSubplot"] = None, -): - """Generate plot of raw and fitted data""" - if not HAS_MATPLOTLIB: - raise ImportError( - 'curve_fit_plot requires matplotlib to generate curve fit plot.' - ' Run "pip install matplotlib" before.') - - if ax is None: - plt.figure() - ax = plt.gca() - - # Plot fit data - xs = np.linspace(np.min(xdata), np.max(xdata), num_fit_points) - ys_fit = func(xs, *popt) - ax.plot(xs, ys_fit, color="blue", linestyle="-", linewidth=2) - - # Plot standard error interval - ys_upper = func(xs, *(popt + popt_err)) - ys_lower = func(xs, *(popt - popt_err)) - ax.fill_between(xs, ys_lower, ys_upper, color="blue", alpha=0.1) - - # Plot raw data - if ydata is not None: - ax.scatter(xdata, ydata, c="grey", marker="x") - - # Error bar plot of mean data - if mean_data is not None: - ax.errorbar(mean_data[0], mean_data[1], mean_data[2], - marker='.', markersize=9, linestyle='--', color='red') - - # Formatting - ax.tick_params(labelsize=14) - ax.grid(True) - return ax - - -def curve_fit_data(data: List[Dict[str, any]]) -> Tuple[np.ndarray]: - """Return array of (x, y, sigma) data for curve fitting. - - Args - data: list of circuit data dictionaries. - - Returns: - tuple: ``(x, y, sigma)`` tuple of arrays of x-values, - y-values, and standard deviations of y-values. - - Raises: - QiskitError: if input data is not level-2 measurement. - - .. note:: - - This requires metadata to contain `"xdata"`, `"ylabel"` - keys containing the numeric x-value for fitting, and the outcome - bitstring for computing y-value probability from counts. - - .. note:: - - Currently only level-2 (count) measurement data is supported. - """ - size = len(data) - xdata = np.zeros(size, dtype=int) - ydata = np.zeros(size, dtype=float) - ydata_var = np.zeros(size, dtype=float) - - for i, datum in enumerate(data): - metadata = datum["metadata"] - meas_level = metadata.get("meas_level", 2) - xdata[i] = metadata["xdata"] - if meas_level == 2: - y_mean, y_var = level2_probability( - datum["counts"], metadata["ylabel"]) - ydata[i] = y_mean - ydata_var[i] = y_var - else: - # Adding support for level-1 measurment data is still todo. - raise QiskitError("Measurement level 1 is not supported.") - - return xdata, ydata, np.sqrt(ydata_var) - - -def average_curve_fit_data(data: List[Dict[str, any]], - return_raw: bool = False) -> Tuple[np.ndarray]: - """Return array of (x, y_mean, sigma) data for curve fitting. - - Compared to :func:`curve_fit_data` this computes the sample mean and - standard deviation of each set of y-data with the same x value. - - Args - data: list of circuit data dictionaries. - - Returns: - tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where - ``x`` is an arrays of unique x-values, ``y`` is an array of - sample mean y-values, and ``sigma`` is an array of sample standard - deviation of y values. - tuple: ``(x, y_mean, sigma, x_raw, y_raw) where ``x_raw, y_raw`` are the - full set of x and y data arrays before averaging. - - .. note:: - - This requires metadata to contain `"xdata"`, `"ylabel"` - keys containing the numeric x-value for fitting, and the outcome - bitstring for computing y-value probability from counts. - - .. note:: - - Currently only level-2 (count) measurement data is supported. - """ - x_raw, y_raw, _ = curve_fit_data(data) - - # Note this assumes discrete X-data - x_means = np.unique(x_raw) - y_means = np.zeros(x_means.size) - y_sigmas = np.zeros(x_means.size) - for i in range(x_means.size): - ys = y_raw[x_raw == x_means[i]] - num_samples = len(ys) - sample_mean = np.mean(ys) - sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) - y_means[i] = sample_mean - y_sigmas[i] = np.sqrt(sample_var) - if return_raw: - return x_means, y_means, y_sigmas, x_raw, y_raw - return x_means, y_means, y_sigmas - - -def level2_probability(counts: "Counts", outcome: str) -> Tuple[float]: - """Return the outcome probability mean and variance. - - Args: - counts: A counts object. - outcome: bitstring for desired outcome probability. - - Returns: - tuple: (p_mean, p_var) of the probability mean and variance - estimated from the counts. - - .. note:: - - This assumes a binomial distribution where :math:`K` counts - of the desired outcome from :math:`N` shots the - mean probability is :math:`p = K / N` and the variance is - :math:`\\sigma^2 = N p (1-p)`. - """ - shots = sum(counts.values()) - p_mean = counts.get(outcome, 0.0) / shots - p_var = shots * p_mean * (1 - p_mean) - return p_mean, p_var From 0a7fb8b89df009ef8d004ac209088736399ba06c Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Thu, 8 Apr 2021 11:49:24 -0400 Subject: [PATCH 08/25] Add curve fitting data --- .../analysis/curve_fit_analysis.py | 121 +------------ .../analysis/curve_fitting_data.py | 167 ++++++++++++++++++ 2 files changed, 170 insertions(+), 118 deletions(-) create mode 100644 qiskit_experiments/analysis/curve_fitting_data.py diff --git a/qiskit_experiments/analysis/curve_fit_analysis.py b/qiskit_experiments/analysis/curve_fit_analysis.py index 23542f6173..e3c95c41d6 100644 --- a/qiskit_experiments/analysis/curve_fit_analysis.py +++ b/qiskit_experiments/analysis/curve_fit_analysis.py @@ -20,6 +20,7 @@ from qiskit.exceptions import QiskitError from qiskit_experiments.base_analysis import BaseAnalysis from .curve_fitting import curve_fit +from .curve_fitting_data import curve_fit_data, mean_xy_data from .plotting import HAS_MATPLOTLIB, plot_curve_fit, plot_errorbar, plot_scatter @@ -103,7 +104,8 @@ def _run_curve_fit( """ # Compute curve fit data if fit_mean_data: - xdata, ydata, sigma, xraw, yraw = average_curve_fit_data(data, return_raw=True) + xraw, yraw, _ = curve_fit_data(data) + xdata, ydata, sigma = average_curve_fit_data(xraw, yraw) else: xdata, ydata, sigma = curve_fit_data(data) @@ -125,120 +127,3 @@ def _run_curve_fit( figs = None return result, figs - - -# NOTE: the following should be handled by DataProcessing module - - -def curve_fit_data(data: List[Dict[str, any]]) -> Tuple[np.ndarray]: - """Return array of (x, y, sigma) data for curve fitting. - - Args - data: list of circuit data dictionaries. - - Returns: - tuple: ``(x, y, sigma)`` tuple of arrays of x-values, - y-values, and standard deviations of y-values. - - Raises: - QiskitError: if input data is not level-2 measurement. - - .. note:: - - This requires metadata to contain `"xdata"`, `"ylabel"` - keys containing the numeric x-value for fitting, and the outcome - bitstring for computing y-value probability from counts. - - .. note:: - - Currently only level-2 (count) measurement data is supported. - """ - size = len(data) - xdata = np.zeros(size, dtype=int) - ydata = np.zeros(size, dtype=float) - ydata_var = np.zeros(size, dtype=float) - - for i, datum in enumerate(data): - metadata = datum["metadata"] - meas_level = metadata.get("meas_level", 2) - xdata[i] = metadata["xdata"] - if meas_level == 2: - y_mean, y_var = level2_probability(datum["counts"], metadata["ylabel"]) - ydata[i] = y_mean - ydata_var[i] = y_var - else: - # Adding support for level-1 measurment data is still todo. - raise QiskitError("Measurement level 1 is not supported.") - - return xdata, ydata, np.sqrt(ydata_var) - - -def average_curve_fit_data( - data: List[Dict[str, any]], return_raw: bool = False -) -> Tuple[np.ndarray]: - """Return array of (x, y_mean, sigma) data for curve fitting. - - Compared to :func:`curve_fit_data` this computes the sample mean and - standard deviation of each set of y-data with the same x value. - - Args - data: list of circuit data dictionaries. - - Returns: - tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where - ``x`` is an arrays of unique x-values, ``y`` is an array of - sample mean y-values, and ``sigma`` is an array of sample standard - deviation of y values. - tuple: ``(x, y_mean, sigma, x_raw, y_raw) where ``x_raw, y_raw`` are the - full set of x and y data arrays before averaging. - - .. note:: - - This requires metadata to contain `"xdata"`, `"ylabel"` - keys containing the numeric x-value for fitting, and the outcome - bitstring for computing y-value probability from counts. - - .. note:: - - Currently only level-2 (count) measurement data is supported. - """ - x_raw, y_raw, _ = curve_fit_data(data) - - # Note this assumes discrete X-data - x_means = np.unique(x_raw) - y_means = np.zeros(x_means.size) - y_sigmas = np.zeros(x_means.size) - for i in range(x_means.size): - ys = y_raw[x_raw == x_means[i]] - num_samples = len(ys) - sample_mean = np.mean(ys) - sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) - y_means[i] = sample_mean - y_sigmas[i] = np.sqrt(sample_var) - if return_raw: - return x_means, y_means, y_sigmas, x_raw, y_raw - return x_means, y_means, y_sigmas - - -def level2_probability(counts: "Counts", outcome: str) -> Tuple[float]: - """Return the outcome probability mean and variance. - - Args: - counts: A counts object. - outcome: bitstring for desired outcome probability. - - Returns: - tuple: (p_mean, p_var) of the probability mean and variance - estimated from the counts. - - .. note:: - - This assumes a binomial distribution where :math:`K` counts - of the desired outcome from :math:`N` shots the - mean probability is :math:`p = K / N` and the variance is - :math:`\\sigma^2 = N p (1-p)`. - """ - shots = sum(counts.values()) - p_mean = counts.get(outcome, 0.0) / shots - p_var = shots * p_mean * (1 - p_mean) - return p_mean, p_var diff --git a/qiskit_experiments/analysis/curve_fitting_data.py b/qiskit_experiments/analysis/curve_fitting_data.py new file mode 100644 index 0000000000..7f6d1f72c7 --- /dev/null +++ b/qiskit_experiments/analysis/curve_fitting_data.py @@ -0,0 +1,167 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +""" +Curve fitting xydata for experiment analysis +""" +# pylint: disable = invalid-name + +from typing import List, Dict, Tuple, Callable + +import numpy as np + +from qiskit.exceptions import QiskitError + + +def curve_fit_data(data: List[Dict[str, any]], + data_processor: Callable) -> Tuple[np.ndarray]: + """Return array of (x, y, sigma) data for curve fitting. + + Args + data: list of circuit data dictionaries containing counts. + data_processor: callable for processing data to y, sigma + + Returns: + tuple: ``(x, y, sigma)`` tuple of arrays of x-values, + y-values, and standard deviations of y-values. + + Raises: + QiskitError: if input data is not level-2 measurement. + + .. note:: + + This requires metadata to contain `"xdata"`, `"ylabel"` + keys containing the numeric x-value for fitting, and the outcome + bitstring for computing y-value probability from counts. + + .. note:: + + Currently only level-2 (count) measurement data is supported. + """ + size = len(data) + xdata = np.zeros(size, dtype=int) + ydata = np.zeros(size, dtype=float) + ydata_var = np.zeros(size, dtype=float) + + for i, datum in enumerate(data): + metadata = datum["metadata"] + meas_level = metadata.get("meas_level", 2) + xdata[i] = metadata["xval"] + if meas_level == 2: + y_mean, y_var = data_processor(datum) + ydata[i] = y_mean + ydata_var[i] = y_var + else: + # Adding support for level-1 measurment data is still todo. + raise QiskitError("Measurement level 1 is not supported.") + + return xdata, ydata, np.sqrt(ydata_var) + + +def multi_curve_fit_data(data: List[Dict[str, any]], + data_processor: Callable) -> Tuple[np.ndarray]: + """Return array of (x, y, sigma) data for curve fitting. + + Args + data: list of circuit data dictionaries. + data_processor: callable for processing data to y, sigma + + Returns: + tuple: ``(x, y, sigma)`` tuple of arrays of x-values, + y-values, and standard deviations of y-values. + + Raises: + QiskitError: if input data is not level-2 measurement. + + .. note:: + + This requires metadata to contain `"xdata"`, `"ylabel"` + keys containing the numeric x-value for fitting, and the outcome + bitstring for computing y-value probability from counts. + + .. note:: + + Currently only level-2 (count) measurement data is supported. + """ + size = len(data) + xdata = np.zeros((size, 2), dtype=float) + ydata = np.zeros(size, dtype=float) + ydata_var = np.zeros(size, dtype=float) + + for i, datum in enumerate(data): + metadata = datum["metadata"] + meas_level = metadata.get("meas_level", 2) + xdata[i, 0] = metadata["xval"] + xdata[i, 1] = metadata["series"] + if meas_level == 2: + y_mean, y_var = data_processor(datum) + ydata[i] = y_mean + ydata_var[i] = y_var + else: + # Adding support for level-1 measurment data is still todo. + raise QiskitError("Measurement level 1 is not supported.") + + return xdata, ydata, np.sqrt(ydata_var) + + +def mean_xy_data(xdata: np.ndarray, ydata: np.ndarray) -> Tuple[np.ndarray]: + """Return (x, y_mean, sigma) data. + + The mean is taken over all ydata values with the same xdata value. + + Args + xdata: 1D or 2D array of xdata from curve_fit_data or + multi_curve_fit_data + ydata: array of ydata returned from curve_fit_data or + multi_curve_fit_data + + Returns: + tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where + ``x`` is an arrays of unique x-values, ``y`` is an array of + sample mean y-values, and ``sigma`` is an array of sample standard + deviation of y values. + """ + x_means = np.unique(xdata, axis=0) + y_means = np.zeros(x_means.size) + y_sigmas = np.zeros(x_means.size) + for i in range(x_means.size): + ys = ydata[xdata == x_means[i]] + num_samples = len(ys) + sample_mean = np.mean(ys) + sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) + y_means[i] = sample_mean + y_sigmas[i] = np.sqrt(sample_var) + return x_means, y_means, y_sigmas + + +def level2_probability(data: Dict[str, any], outcome: str) -> Tuple[float]: + """Return the outcome probability mean and variance. + + Args: + data: A data dict containing count data. + outcome: bitstring for desired outcome probability. + + Returns: + tuple: (p_mean, p_var) of the probability mean and variance + estimated from the counts. + + .. note:: + + This assumes a binomial distribution where :math:`K` counts + of the desired outcome from :math:`N` shots the + mean probability is :math:`p = K / N` and the variance is + :math:`\\sigma^2 = N p (1-p)`. + """ + counts = data["counts"] + shots = sum(counts.values()) + p_mean = counts.get(outcome, 0.0) / shots + p_var = shots * p_mean * (1 - p_mean) + return p_mean, p_var From 0ba6a74fa2639e2ebfd2c9efc108d0295da30fc2 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Thu, 8 Apr 2021 12:23:16 -0400 Subject: [PATCH 09/25] Remove CurveFitAnalysis class and leave as library functions --- qiskit_experiments/analysis/__init__.py | 4 +- .../analysis/curve_fit_analysis.py | 129 ------------------ 2 files changed, 1 insertion(+), 132 deletions(-) delete mode 100644 qiskit_experiments/analysis/curve_fit_analysis.py diff --git a/qiskit_experiments/analysis/__init__.py b/qiskit_experiments/analysis/__init__.py index b7ea07e437..69ac7c8c17 100644 --- a/qiskit_experiments/analysis/__init__.py +++ b/qiskit_experiments/analysis/__init__.py @@ -11,9 +11,7 @@ # that they have been altered from the originals. """ -Standalone analysis classes - -These can be used when creating experiments +Analysis helper functions """ from .curve_fit_analysis import CurveFitAnalysis diff --git a/qiskit_experiments/analysis/curve_fit_analysis.py b/qiskit_experiments/analysis/curve_fit_analysis.py deleted file mode 100644 index e3c95c41d6..0000000000 --- a/qiskit_experiments/analysis/curve_fit_analysis.py +++ /dev/null @@ -1,129 +0,0 @@ -# This code is part of Qiskit. -# -# (C) Copyright IBM 2021. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. -""" -Curve fitter analysis class -""" -# pylint: disable = invalid-name - -from typing import Optional, Callable, List, Tuple, Dict - -import numpy as np -from qiskit.exceptions import QiskitError -from qiskit_experiments.base_analysis import BaseAnalysis -from .curve_fitting import curve_fit -from .curve_fitting_data import curve_fit_data, mean_xy_data -from .plotting import HAS_MATPLOTLIB, plot_curve_fit, plot_errorbar, plot_scatter - - -class CurveFitAnalysis(BaseAnalysis): - """Analysis class based on scipy.optimize.curve_fit""" - - # pylint: disable = arguments-differ - def _run_analysis( - self, - experiment_data: "ExperimentData", - func: Callable, - p0: Optional[List[float]] = None, - p0_func: Optional[Callable] = None, - bounds: Optional[Tuple] = None, - fit_mean_data: bool = False, - plot: bool = True, - ax: Optional["AxesSubplot"] = None, - **kwargs, - ): - """Run curve fit analysis on circuit data. - - Args: - experiment_data (ExperimentData): the experiment data to analyze. - func: fit function `f(x, *params)`. - p0: Optional, initial parameter values for curve_fit. - p0_func: Optional, function for calculating initial p0. - bounds: Optional, parameter bounds for curve_fit. - fit_mean_data: Optional, if True pass means of data points to curve_fit. - plot: If True generate a plot of fitted data. - ax: Optional, matplotlib axis to add plot to. - kwargs: additional kwargs to pass to curve_fit. - - Returns: - tuple: A pair ``(analysis_results, figures)`` where - ``analysis_results`` may be a single or list of - AnalysisResult objects, and ``figures`` may be - None, a single figure, or a list of figures. - """ - return self._run_curve_fit( - experiment_data.data, - func, - p0=p0, - p0_func=p0_func, - bounds=bounds, - fit_mean_data=fit_mean_data, - plot=plot, - ax=ax, - **kwargs, - ) - - def _run_curve_fit( - self, - data: List[Dict[str, any]], - func: Callable, - p0: Optional[List[float]] = None, - p0_func: Optional[Callable] = None, - bounds: Optional[Tuple] = None, - fit_mean_data: bool = False, - plot: bool = True, - ax: Optional["AxesSubplot"] = None, - **kwargs, - ): - """Run curve fit analysis on circuit data. - - Args: - data (ExperimentData): the experiment data to analyze. - func: fit function `f(x, *params)`. - p0: Optional, initial parameter values for curve_fit. - p0_func: Optional, function for calculating initial p0. - bounds: Optional, parameter bounds for curve_fit. - fit_mean_data: Optional, if True pass means of data points to curve_fit. - plot: If True generate a plot of fitted data. - ax: Optional, matplotlib axis to add plot to. - kwargs: additional kwargs to pass to curve_fit. - - Returns: - tuple: A pair ``(analysis_results, figures)`` where - ``analysis_results`` may be a single or list of - AnalysisResult objects, and ``figures`` may be - None, a single figure, or a list of figures. - """ - # Compute curve fit data - if fit_mean_data: - xraw, yraw, _ = curve_fit_data(data) - xdata, ydata, sigma = average_curve_fit_data(xraw, yraw) - else: - xdata, ydata, sigma = curve_fit_data(data) - - if p0_func is not None and p0 is None: - p0 = p0_func(xdata, ydata, sigma=sigma) - - # Run curve fit - result = curve_fit(func, xdata, ydata, p0, sigma=sigma, bounds=bounds, **kwargs) - - if plot and HAS_MATPLOTLIB: - ax = plot_curve_fit(func, result) - if fit_mean_data: - ax = plot_scatter(xraw, yraw, ax=ax) - ax = plot_errorbar(xdata, ydata, sigma, ax=ax) - else: - ax = plot_scatter(xdata, ydata) - figs = [ax] - else: - figs = None - - return result, figs From ee870b9311b93d31039826a854ef135b8862aadc Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Thu, 8 Apr 2021 12:29:08 -0400 Subject: [PATCH 10/25] Linting --- qiskit_experiments/analysis/__init__.py | 2 -- qiskit_experiments/analysis/curve_fitting_data.py | 6 ++---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/qiskit_experiments/analysis/__init__.py b/qiskit_experiments/analysis/__init__.py index 69ac7c8c17..b26812a6d9 100644 --- a/qiskit_experiments/analysis/__init__.py +++ b/qiskit_experiments/analysis/__init__.py @@ -13,5 +13,3 @@ """ Analysis helper functions """ - -from .curve_fit_analysis import CurveFitAnalysis diff --git a/qiskit_experiments/analysis/curve_fitting_data.py b/qiskit_experiments/analysis/curve_fitting_data.py index 7f6d1f72c7..5b0520f744 100644 --- a/qiskit_experiments/analysis/curve_fitting_data.py +++ b/qiskit_experiments/analysis/curve_fitting_data.py @@ -21,8 +21,7 @@ from qiskit.exceptions import QiskitError -def curve_fit_data(data: List[Dict[str, any]], - data_processor: Callable) -> Tuple[np.ndarray]: +def curve_fit_data(data: List[Dict[str, any]], data_processor: Callable) -> Tuple[np.ndarray]: """Return array of (x, y, sigma) data for curve fitting. Args @@ -66,8 +65,7 @@ def curve_fit_data(data: List[Dict[str, any]], return xdata, ydata, np.sqrt(ydata_var) -def multi_curve_fit_data(data: List[Dict[str, any]], - data_processor: Callable) -> Tuple[np.ndarray]: +def multi_curve_fit_data(data: List[Dict[str, any]], data_processor: Callable) -> Tuple[np.ndarray]: """Return array of (x, y, sigma) data for curve fitting. Args From f54cfeffae832f83ec8e02f128e9e6e0233501ff Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Thu, 8 Apr 2021 12:45:09 -0400 Subject: [PATCH 11/25] Clean up curve_fit_data * Remove unneeded check for meas level now that data processor function is an arg * Add optional kwargs for x and series metadata keys for extracting data --- qiskit_experiments/analysis/curve_fitting.py | 2 +- .../analysis/curve_fitting_data.py | 59 ++++++------------- 2 files changed, 19 insertions(+), 42 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 4230df3595..10ba9519a0 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -52,7 +52,7 @@ def curve_fit( """ # Run curve fit - # pylint: disable unbackend-tuple-unpacking + # pylint: disable = unbalanced-tuple-unpacking popt, pcov = opt.curve_fit(func, xdata, ydata, sigma=sigma, p0=p0, **kwargs) popt_err = np.sqrt(np.diag(pcov)) diff --git a/qiskit_experiments/analysis/curve_fitting_data.py b/qiskit_experiments/analysis/curve_fitting_data.py index 5b0520f744..db17491a68 100644 --- a/qiskit_experiments/analysis/curve_fitting_data.py +++ b/qiskit_experiments/analysis/curve_fitting_data.py @@ -21,12 +21,14 @@ from qiskit.exceptions import QiskitError -def curve_fit_data(data: List[Dict[str, any]], data_processor: Callable) -> Tuple[np.ndarray]: +def curve_fit_data(data: List[Dict[str, any]], data_processor: Callable, + x_key: str = "xval") -> Tuple[np.ndarray]: """Return array of (x, y, sigma) data for curve fitting. Args data: list of circuit data dictionaries containing counts. data_processor: callable for processing data to y, sigma + x_key: key for extracting xdata value from metadata (Default: "xval"). Returns: tuple: ``(x, y, sigma)`` tuple of arrays of x-values, @@ -34,16 +36,6 @@ def curve_fit_data(data: List[Dict[str, any]], data_processor: Callable) -> Tupl Raises: QiskitError: if input data is not level-2 measurement. - - .. note:: - - This requires metadata to contain `"xdata"`, `"ylabel"` - keys containing the numeric x-value for fitting, and the outcome - bitstring for computing y-value probability from counts. - - .. note:: - - Currently only level-2 (count) measurement data is supported. """ size = len(data) xdata = np.zeros(size, dtype=int) @@ -52,25 +44,25 @@ def curve_fit_data(data: List[Dict[str, any]], data_processor: Callable) -> Tupl for i, datum in enumerate(data): metadata = datum["metadata"] - meas_level = metadata.get("meas_level", 2) - xdata[i] = metadata["xval"] - if meas_level == 2: - y_mean, y_var = data_processor(datum) - ydata[i] = y_mean - ydata_var[i] = y_var - else: - # Adding support for level-1 measurment data is still todo. - raise QiskitError("Measurement level 1 is not supported.") + xdata[i] = metadata[x_key] + y_mean, y_var = data_processor(datum) + ydata[i] = y_mean + ydata_var[i] = y_var return xdata, ydata, np.sqrt(ydata_var) -def multi_curve_fit_data(data: List[Dict[str, any]], data_processor: Callable) -> Tuple[np.ndarray]: +def multi_curve_fit_data(data: List[Dict[str, any]], + data_processor: Callable, + x_key: str = "xval", + series_key: str = "series") -> Tuple[np.ndarray]: """Return array of (x, y, sigma) data for curve fitting. Args data: list of circuit data dictionaries. data_processor: callable for processing data to y, sigma + x_key: key for extracting xdata value from metadata (Default: "xval"). + series_key: key for extracting series value from metadata (Default: "series"). Returns: tuple: ``(x, y, sigma)`` tuple of arrays of x-values, @@ -78,16 +70,6 @@ def multi_curve_fit_data(data: List[Dict[str, any]], data_processor: Callable) - Raises: QiskitError: if input data is not level-2 measurement. - - .. note:: - - This requires metadata to contain `"xdata"`, `"ylabel"` - keys containing the numeric x-value for fitting, and the outcome - bitstring for computing y-value probability from counts. - - .. note:: - - Currently only level-2 (count) measurement data is supported. """ size = len(data) xdata = np.zeros((size, 2), dtype=float) @@ -96,16 +78,11 @@ def multi_curve_fit_data(data: List[Dict[str, any]], data_processor: Callable) - for i, datum in enumerate(data): metadata = datum["metadata"] - meas_level = metadata.get("meas_level", 2) - xdata[i, 0] = metadata["xval"] - xdata[i, 1] = metadata["series"] - if meas_level == 2: - y_mean, y_var = data_processor(datum) - ydata[i] = y_mean - ydata_var[i] = y_var - else: - # Adding support for level-1 measurment data is still todo. - raise QiskitError("Measurement level 1 is not supported.") + xdata[i, 0] = metadata[x_key] + xdata[i, 1] = metadata[series_key] + y_mean, y_var = data_processor(datum) + ydata[i] = y_mean + ydata_var[i] = y_var return xdata, ydata, np.sqrt(ydata_var) From 5457af8075fa413e41673e48725dfff0e1ff93ff Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Thu, 8 Apr 2021 12:45:57 -0400 Subject: [PATCH 12/25] Run black --- .../analysis/curve_fitting_data.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting_data.py b/qiskit_experiments/analysis/curve_fitting_data.py index db17491a68..d0b3ddf4e9 100644 --- a/qiskit_experiments/analysis/curve_fitting_data.py +++ b/qiskit_experiments/analysis/curve_fitting_data.py @@ -15,14 +15,12 @@ # pylint: disable = invalid-name from typing import List, Dict, Tuple, Callable - import numpy as np -from qiskit.exceptions import QiskitError - -def curve_fit_data(data: List[Dict[str, any]], data_processor: Callable, - x_key: str = "xval") -> Tuple[np.ndarray]: +def curve_fit_data( + data: List[Dict[str, any]], data_processor: Callable, x_key: str = "xval" +) -> Tuple[np.ndarray]: """Return array of (x, y, sigma) data for curve fitting. Args @@ -52,10 +50,12 @@ def curve_fit_data(data: List[Dict[str, any]], data_processor: Callable, return xdata, ydata, np.sqrt(ydata_var) -def multi_curve_fit_data(data: List[Dict[str, any]], - data_processor: Callable, - x_key: str = "xval", - series_key: str = "series") -> Tuple[np.ndarray]: +def multi_curve_fit_data( + data: List[Dict[str, any]], + data_processor: Callable, + x_key: str = "xval", + series_key: str = "series", +) -> Tuple[np.ndarray]: """Return array of (x, y, sigma) data for curve fitting. Args From c084e67ecbad3f490a8b45dc1700c522b96d6c84 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Thu, 8 Apr 2021 13:18:42 -0400 Subject: [PATCH 13/25] Add filters to curve fit data --- .../analysis/curve_fitting_data.py | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting_data.py b/qiskit_experiments/analysis/curve_fitting_data.py index d0b3ddf4e9..179a5b87ba 100644 --- a/qiskit_experiments/analysis/curve_fitting_data.py +++ b/qiskit_experiments/analysis/curve_fitting_data.py @@ -14,12 +14,12 @@ """ # pylint: disable = invalid-name -from typing import List, Dict, Tuple, Callable +from typing import List, Dict, Tuple, Callable, Iterable import numpy as np def curve_fit_data( - data: List[Dict[str, any]], data_processor: Callable, x_key: str = "xval" + data: List[Dict[str, any]], data_processor: Callable, x_key: str = "xval", **filters ) -> Tuple[np.ndarray]: """Return array of (x, y, sigma) data for curve fitting. @@ -27,6 +27,7 @@ def curve_fit_data( data: list of circuit data dictionaries containing counts. data_processor: callable for processing data to y, sigma x_key: key for extracting xdata value from metadata (Default: "xval"). + filters: additional kwargs to filter metadata on. Returns: tuple: ``(x, y, sigma)`` tuple of arrays of x-values, @@ -35,12 +36,14 @@ def curve_fit_data( Raises: QiskitError: if input data is not level-2 measurement. """ - size = len(data) + indices = _filter_indices(data, **filters) + size = len(indices) xdata = np.zeros(size, dtype=int) ydata = np.zeros(size, dtype=float) ydata_var = np.zeros(size, dtype=float) - for i, datum in enumerate(data): + for i, idx in enumerate(indices): + datum = data[idx] metadata = datum["metadata"] xdata[i] = metadata[x_key] y_mean, y_var = data_processor(datum) @@ -55,6 +58,7 @@ def multi_curve_fit_data( data_processor: Callable, x_key: str = "xval", series_key: str = "series", + **filters, ) -> Tuple[np.ndarray]: """Return array of (x, y, sigma) data for curve fitting. @@ -63,6 +67,7 @@ def multi_curve_fit_data( data_processor: callable for processing data to y, sigma x_key: key for extracting xdata value from metadata (Default: "xval"). series_key: key for extracting series value from metadata (Default: "series"). + filters: additional kwargs to filter metadata on. Returns: tuple: ``(x, y, sigma)`` tuple of arrays of x-values, @@ -71,12 +76,14 @@ def multi_curve_fit_data( Raises: QiskitError: if input data is not level-2 measurement. """ - size = len(data) + indices = _filter_indices(data, **filters) + size = len(indices) xdata = np.zeros((size, 2), dtype=float) ydata = np.zeros(size, dtype=float) ydata_var = np.zeros(size, dtype=float) - for i, datum in enumerate(data): + for i, idx in enumerate(indices): + datum = data[idx] metadata = datum["metadata"] xdata[i, 0] = metadata[x_key] xdata[i, 1] = metadata[series_key] @@ -117,6 +124,23 @@ def mean_xy_data(xdata: np.ndarray, ydata: np.ndarray) -> Tuple[np.ndarray]: return x_means, y_means, y_sigmas +def _filter_indices(data: List[Dict[str, any]], **filters) -> Iterable[int]: + """Return the indices of filtered data""" + if not filters: + return range(len(data)) + indices = [] + for i, datum in enumerate(data): + include = True + metadata = datum["metadata"] + for key, val in filters.items(): + if key not in metadata or metadata[key] != val: + include = False + break + if include: + indices.append(i) + return indices + + def level2_probability(data: Dict[str, any], outcome: str) -> Tuple[float]: """Return the outcome probability mean and variance. From e2998f910ff1babd4b2b791356dd98e0790b46f6 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Thu, 8 Apr 2021 13:38:29 -0400 Subject: [PATCH 14/25] Reorganize files * Move curve_fit_data and mutli_curve_fit_data to curve_fitting.py * Move other processing functions to `data_processing.py` --- qiskit_experiments/analysis/curve_fitting.py | 77 +++++++- .../analysis/curve_fitting_data.py | 166 ------------------ .../analysis/data_processing.py | 104 +++++++++++ 3 files changed, 180 insertions(+), 167 deletions(-) delete mode 100644 qiskit_experiments/analysis/curve_fitting_data.py create mode 100644 qiskit_experiments/analysis/data_processing.py diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 10ba9519a0..9bacc690b1 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -14,13 +14,14 @@ """ # pylint: disable = invalid-name -from typing import List, Callable, Optional +from typing import List, Dict, Tuple, Callable, Optional import numpy as np import scipy.optimize as opt from qiskit.exceptions import QiskitError from qiskit_experiments.base_analysis import AnalysisResult +from qiskit_experiments.analysis.data_processing import filter_data def curve_fit( @@ -143,3 +144,77 @@ def f(x, *params): analysis_result = curve_fit(f, xdata1d, ydata, p0, sigma=wsigma, **kwargs) return analysis_result + + +def curve_fit_data( + data: List[Dict[str, any]], data_processor: Callable, x_key: str = "xval", **filters +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Return array of (x, y, sigma) data for curve fitting. + + Args + data: list of circuit data dictionaries containing counts. + data_processor: callable for processing data to y, sigma + x_key: key for extracting xdata value from metadata (Default: "xval"). + filters: additional kwargs to filter metadata on. + + Returns: + tuple: ``(x, y, sigma)`` tuple of arrays of x-values, + y-values, and standard deviations of y-values. + + Raises: + QiskitError: if input data is not level-2 measurement. + """ + filtered_data = filter_data(data, **filters) + size = len(filtered_data) + xdata = np.zeros(size, dtype=int) + ydata = np.zeros(size, dtype=float) + ydata_var = np.zeros(size, dtype=float) + + for i, datum in enumerate(filter_data): + metadata = datum["metadata"] + xdata[i] = metadata[x_key] + y_mean, y_var = data_processor(datum) + ydata[i] = y_mean + ydata_var[i] = y_var + + return xdata, ydata, np.sqrt(ydata_var) + + +def multi_curve_fit_data( + data: List[Dict[str, any]], + data_processor: Callable, + x_key: str = "xval", + series_key: str = "series", + **filters, +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Return array of (x, y, sigma) data for curve fitting. + + Args + data: list of circuit data dictionaries. + data_processor: callable for processing data to y, sigma + x_key: key for extracting xdata value from metadata (Default: "xval"). + series_key: key for extracting series value from metadata (Default: "series"). + filters: additional kwargs to filter metadata on. + + Returns: + tuple: ``(x, y, sigma)`` tuple of arrays of x-values, + y-values, and standard deviations of y-values. + + Raises: + QiskitError: if input data is not level-2 measurement. + """ + filtered_data = filter_data(data, **filters) + size = len(filtered_data) + xdata = np.zeros((size, 2), dtype=float) + ydata = np.zeros(size, dtype=float) + ydata_var = np.zeros(size, dtype=float) + + for i, datum in enumerate(filter_data): + metadata = datum["metadata"] + xdata[i, 0] = metadata[x_key] + xdata[i, 1] = metadata[series_key] + y_mean, y_var = data_processor(datum) + ydata[i] = y_mean + ydata_var[i] = y_var + + return xdata, ydata, np.sqrt(ydata_var) diff --git a/qiskit_experiments/analysis/curve_fitting_data.py b/qiskit_experiments/analysis/curve_fitting_data.py deleted file mode 100644 index 179a5b87ba..0000000000 --- a/qiskit_experiments/analysis/curve_fitting_data.py +++ /dev/null @@ -1,166 +0,0 @@ -# This code is part of Qiskit. -# -# (C) Copyright IBM 2021. -# -# This code is licensed under the Apache License, Version 2.0. You may -# obtain a copy of this license in the LICENSE.txt file in the root directory -# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. -# -# Any modifications or derivative works of this code must retain this -# copyright notice, and modified files need to carry a notice indicating -# that they have been altered from the originals. -""" -Curve fitting xydata for experiment analysis -""" -# pylint: disable = invalid-name - -from typing import List, Dict, Tuple, Callable, Iterable -import numpy as np - - -def curve_fit_data( - data: List[Dict[str, any]], data_processor: Callable, x_key: str = "xval", **filters -) -> Tuple[np.ndarray]: - """Return array of (x, y, sigma) data for curve fitting. - - Args - data: list of circuit data dictionaries containing counts. - data_processor: callable for processing data to y, sigma - x_key: key for extracting xdata value from metadata (Default: "xval"). - filters: additional kwargs to filter metadata on. - - Returns: - tuple: ``(x, y, sigma)`` tuple of arrays of x-values, - y-values, and standard deviations of y-values. - - Raises: - QiskitError: if input data is not level-2 measurement. - """ - indices = _filter_indices(data, **filters) - size = len(indices) - xdata = np.zeros(size, dtype=int) - ydata = np.zeros(size, dtype=float) - ydata_var = np.zeros(size, dtype=float) - - for i, idx in enumerate(indices): - datum = data[idx] - metadata = datum["metadata"] - xdata[i] = metadata[x_key] - y_mean, y_var = data_processor(datum) - ydata[i] = y_mean - ydata_var[i] = y_var - - return xdata, ydata, np.sqrt(ydata_var) - - -def multi_curve_fit_data( - data: List[Dict[str, any]], - data_processor: Callable, - x_key: str = "xval", - series_key: str = "series", - **filters, -) -> Tuple[np.ndarray]: - """Return array of (x, y, sigma) data for curve fitting. - - Args - data: list of circuit data dictionaries. - data_processor: callable for processing data to y, sigma - x_key: key for extracting xdata value from metadata (Default: "xval"). - series_key: key for extracting series value from metadata (Default: "series"). - filters: additional kwargs to filter metadata on. - - Returns: - tuple: ``(x, y, sigma)`` tuple of arrays of x-values, - y-values, and standard deviations of y-values. - - Raises: - QiskitError: if input data is not level-2 measurement. - """ - indices = _filter_indices(data, **filters) - size = len(indices) - xdata = np.zeros((size, 2), dtype=float) - ydata = np.zeros(size, dtype=float) - ydata_var = np.zeros(size, dtype=float) - - for i, idx in enumerate(indices): - datum = data[idx] - metadata = datum["metadata"] - xdata[i, 0] = metadata[x_key] - xdata[i, 1] = metadata[series_key] - y_mean, y_var = data_processor(datum) - ydata[i] = y_mean - ydata_var[i] = y_var - - return xdata, ydata, np.sqrt(ydata_var) - - -def mean_xy_data(xdata: np.ndarray, ydata: np.ndarray) -> Tuple[np.ndarray]: - """Return (x, y_mean, sigma) data. - - The mean is taken over all ydata values with the same xdata value. - - Args - xdata: 1D or 2D array of xdata from curve_fit_data or - multi_curve_fit_data - ydata: array of ydata returned from curve_fit_data or - multi_curve_fit_data - - Returns: - tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where - ``x`` is an arrays of unique x-values, ``y`` is an array of - sample mean y-values, and ``sigma`` is an array of sample standard - deviation of y values. - """ - x_means = np.unique(xdata, axis=0) - y_means = np.zeros(x_means.size) - y_sigmas = np.zeros(x_means.size) - for i in range(x_means.size): - ys = ydata[xdata == x_means[i]] - num_samples = len(ys) - sample_mean = np.mean(ys) - sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) - y_means[i] = sample_mean - y_sigmas[i] = np.sqrt(sample_var) - return x_means, y_means, y_sigmas - - -def _filter_indices(data: List[Dict[str, any]], **filters) -> Iterable[int]: - """Return the indices of filtered data""" - if not filters: - return range(len(data)) - indices = [] - for i, datum in enumerate(data): - include = True - metadata = datum["metadata"] - for key, val in filters.items(): - if key not in metadata or metadata[key] != val: - include = False - break - if include: - indices.append(i) - return indices - - -def level2_probability(data: Dict[str, any], outcome: str) -> Tuple[float]: - """Return the outcome probability mean and variance. - - Args: - data: A data dict containing count data. - outcome: bitstring for desired outcome probability. - - Returns: - tuple: (p_mean, p_var) of the probability mean and variance - estimated from the counts. - - .. note:: - - This assumes a binomial distribution where :math:`K` counts - of the desired outcome from :math:`N` shots the - mean probability is :math:`p = K / N` and the variance is - :math:`\\sigma^2 = N p (1-p)`. - """ - counts = data["counts"] - shots = sum(counts.values()) - p_mean = counts.get(outcome, 0.0) / shots - p_var = shots * p_mean * (1 - p_mean) - return p_mean, p_var diff --git a/qiskit_experiments/analysis/data_processing.py b/qiskit_experiments/analysis/data_processing.py new file mode 100644 index 0000000000..e5574aac9c --- /dev/null +++ b/qiskit_experiments/analysis/data_processing.py @@ -0,0 +1,104 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +""" +Curve fitting xydata for experiment analysis +""" +# pylint: disable = invalid-name + +from typing import List, Dict, Tuple +import numpy as np + + +def filter_data(data: List[Dict[str, any]], **filters) -> List[Dict[str, any]]: + """Return the list of filtered data + + Args: + data: list of data dicts. + filters: kwargs for filtering based on metadata + values. + + Returns: + The list of filtered data. If no filters are provided this will be the + input list. + + .. note:: + The filtered data list does not make a copy of the items in the input + data last. + """ + if not filters: + return data + filtered_data = [] + for datum in data: + include = True + metadata = datum["metadata"] + for key, val in filters.items(): + if key not in metadata or metadata[key] != val: + include = False + break + if include: + filtered_data.append(datum) + return filtered_data + + +def mean_xy_data(xdata: np.ndarray, ydata: np.ndarray) -> Tuple[np.ndarray]: + """Return (x, y_mean, sigma) data. + + The mean is taken over all ydata values with the same xdata value. + + Args + xdata: 1D or 2D array of xdata from curve_fit_data or + multi_curve_fit_data + ydata: array of ydata returned from curve_fit_data or + multi_curve_fit_data + + Returns: + tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where + ``x`` is an arrays of unique x-values, ``y`` is an array of + sample mean y-values, and ``sigma`` is an array of sample standard + deviation of y values. + """ + x_means = np.unique(xdata, axis=0) + y_means = np.zeros(x_means.size) + y_sigmas = np.zeros(x_means.size) + for i in range(x_means.size): + ys = ydata[xdata == x_means[i]] + num_samples = len(ys) + sample_mean = np.mean(ys) + sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) + y_means[i] = sample_mean + y_sigmas[i] = np.sqrt(sample_var) + return x_means, y_means, y_sigmas + + +def level2_probability(data: Dict[str, any], outcome: str) -> Tuple[float]: + """Return the outcome probability mean and variance. + + Args: + data: A data dict containing count data. + outcome: bitstring for desired outcome probability. + + Returns: + tuple: (p_mean, p_var) of the probability mean and variance + estimated from the counts. + + .. note:: + + This assumes a binomial distribution where :math:`K` counts + of the desired outcome from :math:`N` shots the + mean probability is :math:`p = K / N` and the variance is + :math:`\\sigma^2 = N p (1-p)`. + """ + counts = data["counts"] + shots = sum(counts.values()) + p_mean = counts.get(outcome, 0.0) / shots + p_var = shots * p_mean * (1 - p_mean) + return p_mean, p_var From c19e55c7805b52d86e2195861597acf6c9d754ef Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Thu, 8 Apr 2021 13:40:33 -0400 Subject: [PATCH 15/25] Remove unneeded init import --- qiskit_experiments/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qiskit_experiments/__init__.py b/qiskit_experiments/__init__.py index c34cc5d81b..eb4560cf67 100644 --- a/qiskit_experiments/__init__.py +++ b/qiskit_experiments/__init__.py @@ -18,5 +18,4 @@ from .experiment_data import ExperimentData, AnalysisResult # Experiment modules -from . import analysis from . import composite From 1992900084b64d84b9f2ca39c37ab0f99640e370 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Fri, 9 Apr 2021 14:41:37 -0400 Subject: [PATCH 16/25] Initial review edits --- qiskit_experiments/analysis/curve_fitting.py | 49 +++++++++++++------ .../analysis/data_processing.py | 6 +-- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 9bacc690b1..971b46064b 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -32,24 +32,30 @@ def curve_fit( sigma: Optional[np.ndarray] = None, **kwargs, ) -> AnalysisResult: - """Use non-linear least squares to fit a function, f, to data + r"""Perform a non-linear least squares to fit - Wraps scipy.optimize.curve_fit + This solves the optimization problem + + .. math:: + \Theta_{\mbox{opt}} = \arg\min_\Theta \sum_i + \sigma_i^{-2} (f(x_i, \Theta) - y_i)^2 + + using ``scipy.optimize.curve_fit``. Args: func: a fit function `f(x *params)`. - xdata: a 1D array of x-data - ydata: a 1D array of y-data + xdata: a 1D float array of x-data + ydata: a 1D float array of y-data p0: initial guess for optimization parameters. sigma: Optional, a 1D array of standard deviations in ydata. kwargs: additional kwargs for scipy.optimize.curve_fit. Returns: - AnalysisResult: result containing `popt` the optimal fit parameters, - `popt_err` the standard error estimates popt, - `pcov` the covariance matrix for the fit, - `chisq` the chi-squared parameter of fit, - `xrange` the range of xdata values used for fit. + result containing `popt` the optimal fit parameters, + `popt_err` the standard error estimates popt, + `pcov` the covariance matrix for the fit, + `chisq` the chi-squared parameter of fit, + `xrange` the range of xdata values used for fit. """ # Run curve fit @@ -87,15 +93,28 @@ def multi_curve_fit( weights: Optional[np.ndarray] = None, **kwargs, ): - """Use non-linear least squares to fit a list of functions, f_i, to data + r"""Perform a linearized multi-objective non-linear least squares fit. + + This solves the optimization problem + + .. math:: + \Theta_{\mbox{opt}} = \arg\min_\Theta \sum_{k} w_k + \sum_{i} \sigma_{k, i}^{-2} + (f_k(x_{k, i}, \Theta) - y_{k, i})^2 + + for multiple series of :math:`x_k, y_k, \sigma_k` data evaluated using + a list of objective functions :math:`[f_k]` + using ``scipy.optimize.curve_fit``. Args: - funcs: a list of objective functions with signatures `f_i(x, *params)`. - xdata: a 2D array of xdata and function function indexes. - ydata: a 1D array of ydata + funcs: a list of objective functions with each with signature + :math`f_k`(x, *params)`. + xdata: a 2D float array of xdata and function indexes. + ydata: a 1D float array of ydata p0: initial guess for optimization parameters. sigma: Optional, a 1D array of standard deviations in ydata. - weights: Optional, a 1D list of numeric weights for each function. + weights: Optional, a 1D float list of weights :math:`w_k` for each + component function :math:`f_k`. kwargs: additional kwargs for scipy.optimize.curve_fit. Returns: @@ -166,7 +185,7 @@ def curve_fit_data( """ filtered_data = filter_data(data, **filters) size = len(filtered_data) - xdata = np.zeros(size, dtype=int) + xdata = np.zeros(size, dtype=float) ydata = np.zeros(size, dtype=float) ydata_var = np.zeros(size, dtype=float) diff --git a/qiskit_experiments/analysis/data_processing.py b/qiskit_experiments/analysis/data_processing.py index e5574aac9c..bb8f43287c 100644 --- a/qiskit_experiments/analysis/data_processing.py +++ b/qiskit_experiments/analysis/data_processing.py @@ -10,7 +10,7 @@ # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. """ -Curve fitting xydata for experiment analysis +Data processing utility functions for curve fitting experiments """ # pylint: disable = invalid-name @@ -29,10 +29,6 @@ def filter_data(data: List[Dict[str, any]], **filters) -> List[Dict[str, any]]: Returns: The list of filtered data. If no filters are provided this will be the input list. - - .. note:: - The filtered data list does not make a copy of the items in the input - data last. """ if not filters: return data From a1acfac6228f864bf08ce942484c662f54f25dc6 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Fri, 9 Apr 2021 14:53:17 -0400 Subject: [PATCH 17/25] Separate x and series data for multi curve fit Rename curve_fit_data to process_curve_data Rename multi_curve_fit_data to process_multi_curve_data --- qiskit_experiments/analysis/curve_fitting.py | 51 ++++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 971b46064b..93cb00f425 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -19,7 +19,6 @@ import numpy as np import scipy.optimize as opt -from qiskit.exceptions import QiskitError from qiskit_experiments.base_analysis import AnalysisResult from qiskit_experiments.analysis.data_processing import filter_data @@ -44,8 +43,8 @@ def curve_fit( Args: func: a fit function `f(x *params)`. - xdata: a 1D float array of x-data - ydata: a 1D float array of y-data + xdata: a 1D float array of x-data. + ydata: a 1D float array of y-data. p0: initial guess for optimization parameters. sigma: Optional, a 1D array of standard deviations in ydata. kwargs: additional kwargs for scipy.optimize.curve_fit. @@ -86,6 +85,7 @@ def curve_fit( def multi_curve_fit( funcs: List[Callable], + series: np.ndarray, xdata: np.ndarray, ydata: np.ndarray, p0: np.ndarray, @@ -107,10 +107,13 @@ def multi_curve_fit( using ``scipy.optimize.curve_fit``. Args: - funcs: a list of objective functions with each with signature - :math`f_k`(x, *params)`. - xdata: a 2D float array of xdata and function indexes. - ydata: a 1D float array of ydata + funcs: a list of objective functions :math:`[f_0, f_1, ...]` where + each function has signature :math`f_k`(x, *params)`. + series: a 1D int array that specifies the component objective + function :math:`f_k` to evaluate corresponding x and y + data with. + xdata: a 1D float array of xdata. + ydata: a 1D float array of ydata. p0: initial guess for optimization parameters. sigma: Optional, a 1D array of standard deviations in ydata. weights: Optional, a 1D float list of weights :math:`w_k` for each @@ -129,14 +132,9 @@ def multi_curve_fit( """ num_funcs = len(funcs) - # Get 1D xdata and indices from 2D input xdata - xdata = np.asarray(xdata, dtype=float) - if xdata.ndim != 2: - raise QiskitError("multi_curve_fit requires 2D xdata.") - xdata1d, xindex = xdata.T - # Get positions for indexes data sets - idxs = [xindex == i for i in range(num_funcs)] + series = np.asarray(series, dtype=int) + idxs = [series == i for i in range(num_funcs)] # Combine weights and sigma for transformation if weights is None: @@ -160,15 +158,15 @@ def f(x, *params): return y # Run linearized curve_fit - analysis_result = curve_fit(f, xdata1d, ydata, p0, sigma=wsigma, **kwargs) + analysis_result = curve_fit(f, xdata, ydata, p0, sigma=wsigma, **kwargs) return analysis_result -def curve_fit_data( +def process_curve_data( data: List[Dict[str, any]], data_processor: Callable, x_key: str = "xval", **filters ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - """Return array of (x, y, sigma) data for curve fitting. + """Return tuple of arrays (x, y, sigma) data for curve fitting. Args data: list of circuit data dictionaries containing counts. @@ -199,14 +197,14 @@ def curve_fit_data( return xdata, ydata, np.sqrt(ydata_var) -def multi_curve_fit_data( +def process_multi_curve_data( data: List[Dict[str, any]], data_processor: Callable, x_key: str = "xval", series_key: str = "series", **filters, -) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - """Return array of (x, y, sigma) data for curve fitting. +) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Return tuple of arrays (series, x, y, sigma) data for multi curve fitting. Args data: list of circuit data dictionaries. @@ -216,24 +214,25 @@ def multi_curve_fit_data( filters: additional kwargs to filter metadata on. Returns: - tuple: ``(x, y, sigma)`` tuple of arrays of x-values, - y-values, and standard deviations of y-values. + tuple: ``(series, x, y, sigma)`` tuple of arrays of series values, + x-values, y-values, and standard deviations of y-values. Raises: QiskitError: if input data is not level-2 measurement. """ filtered_data = filter_data(data, **filters) size = len(filtered_data) - xdata = np.zeros((size, 2), dtype=float) + series = np.zeros(size, dtype=int) + xdata = np.zeros(size, dtype=float) ydata = np.zeros(size, dtype=float) ydata_var = np.zeros(size, dtype=float) for i, datum in enumerate(filter_data): metadata = datum["metadata"] - xdata[i, 0] = metadata[x_key] - xdata[i, 1] = metadata[series_key] + series[i] = metadata[series_key] + xdata[i] = metadata[x_key] y_mean, y_var = data_processor(datum) ydata[i] = y_mean ydata_var[i] = y_var - return xdata, ydata, np.sqrt(ydata_var) + return series, xdata, ydata, np.sqrt(ydata_var) From e0919640abc4c128c6cc47c3f66f9b03e8d24ecd Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Fri, 9 Apr 2021 15:33:40 -0400 Subject: [PATCH 18/25] Allow passing p0, and bounds as dict to curve fit --- qiskit_experiments/analysis/curve_fitting.py | 49 +++++++++++++++++--- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 93cb00f425..55238fbbaa 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -14,7 +14,7 @@ """ # pylint: disable = invalid-name -from typing import List, Dict, Tuple, Callable, Optional +from typing import List, Dict, Tuple, Callable, Optional, Union import numpy as np import scipy.optimize as opt @@ -27,8 +27,9 @@ def curve_fit( func: Callable, xdata: np.ndarray, ydata: np.ndarray, - p0: np.ndarray, + p0: Union[Dict[str, float], np.ndarray], sigma: Optional[np.ndarray] = None, + bounds: Optional[Union[Dict[str, Tuple[float, float]], Tuple[np.ndarray, np.ndarray]]] = None, **kwargs, ) -> AnalysisResult: r"""Perform a non-linear least squares to fit @@ -42,11 +43,13 @@ def curve_fit( using ``scipy.optimize.curve_fit``. Args: - func: a fit function `f(x *params)`. + func: a fit function `f(x, *params)`. xdata: a 1D float array of x-data. ydata: a 1D float array of y-data. p0: initial guess for optimization parameters. sigma: Optional, a 1D array of standard deviations in ydata. + bounds: Optional, lower and upper bounds for optimization + parameters. kwargs: additional kwargs for scipy.optimize.curve_fit. Returns: @@ -56,26 +59,55 @@ def curve_fit( `chisq` the chi-squared parameter of fit, `xrange` the range of xdata values used for fit. """ + # Format p0 parameters if specified as dictionary + if isinstance(p0, dict): + param_keys = list(p0.keys()) + param_p0 = list(p0.values()) + + # Convert bounds + if bounds: + lower = [bounds[key][0] for key in param_keys] + upper = [bounds[key][1] for key in param_keys] + param_bounds = (lower, upper) + else: + param_bounds = None + + # Convert fit function + def fit_func(x, *params): + return func(x, **dict(zip(param_keys, params))) + + else: + param_keys = None + param_p0 = p0 + param_bounds = bounds + fit_func = func # Run curve fit # pylint: disable = unbalanced-tuple-unpacking - popt, pcov = opt.curve_fit(func, xdata, ydata, sigma=sigma, p0=p0, **kwargs) + popt, pcov = opt.curve_fit( + fit_func, xdata, ydata, sigma=sigma, p0=param_p0, bounds=param_bounds, **kwargs + ) popt_err = np.sqrt(np.diag(pcov)) # Compute chi-squared for fit - yfits = func(xdata, *popt) - chisq = np.mean(((yfits - ydata) / sigma) ** 2) + yfits = fit_func(xdata, *popt) + residues = (yfits - ydata) ** 2 + if sigma is not None: + residues = residues / (sigma ** 2) + chisq = np.mean(residues) # Compute xdata range for fit xdata_range = [min(xdata), max(xdata)] result = { "popt": popt, + "popt_keys": param_keys, "popt_err": popt_err, "pcov": pcov, "chisq": chisq, "xrange": xdata_range, } + # TODO: # 1. Add some basic validation of computer good / bad based on fit result. # 2. Add error handling so if fitting fails we can return an analysis @@ -91,6 +123,7 @@ def multi_curve_fit( p0: np.ndarray, sigma: Optional[np.ndarray] = None, weights: Optional[np.ndarray] = None, + bounds: Optional[Union[Dict[str, Tuple[float, float]], Tuple[np.ndarray, np.ndarray]]] = None, **kwargs, ): r"""Perform a linearized multi-objective non-linear least squares fit. @@ -118,6 +151,8 @@ def multi_curve_fit( sigma: Optional, a 1D array of standard deviations in ydata. weights: Optional, a 1D float list of weights :math:`w_k` for each component function :math:`f_k`. + bounds: Optional, lower and upper bounds for optimization + parameters. kwargs: additional kwargs for scipy.optimize.curve_fit. Returns: @@ -158,7 +193,7 @@ def f(x, *params): return y # Run linearized curve_fit - analysis_result = curve_fit(f, xdata, ydata, p0, sigma=wsigma, **kwargs) + analysis_result = curve_fit(f, xdata, ydata, p0, sigma=wsigma, bounds=bounds, **kwargs) return analysis_result From 6bb4dc796cfa6407792f08101884d7165256b7d8 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Fri, 9 Apr 2021 16:12:33 -0400 Subject: [PATCH 19/25] Add sigma to mean_xy_data If sigma is provided the mean and variance is computed using inverse-variance weighting, otherwise it is computed as the sample mean and biased sample variance. --- .../analysis/data_processing.py | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/qiskit_experiments/analysis/data_processing.py b/qiskit_experiments/analysis/data_processing.py index bb8f43287c..98471013d0 100644 --- a/qiskit_experiments/analysis/data_processing.py +++ b/qiskit_experiments/analysis/data_processing.py @@ -14,7 +14,7 @@ """ # pylint: disable = invalid-name -from typing import List, Dict, Tuple +from typing import List, Dict, Tuple, Optional import numpy as np @@ -45,16 +45,21 @@ def filter_data(data: List[Dict[str, any]], **filters) -> List[Dict[str, any]]: return filtered_data -def mean_xy_data(xdata: np.ndarray, ydata: np.ndarray) -> Tuple[np.ndarray]: +def mean_xy_data( + xdata: np.ndarray, ydata: np.ndarray, sigma: Optional[np.ndarray] = None +) -> Tuple[np.ndarray]: """Return (x, y_mean, sigma) data. The mean is taken over all ydata values with the same xdata value. + If `sigma=None` the sample mean and biased sample variance is used, + otherwise the inverse-variance weighted mean and variance is used. Args xdata: 1D or 2D array of xdata from curve_fit_data or multi_curve_fit_data ydata: array of ydata returned from curve_fit_data or multi_curve_fit_data + sigma: Optional, array of standard deviations in ydata. Returns: tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where @@ -65,13 +70,25 @@ def mean_xy_data(xdata: np.ndarray, ydata: np.ndarray) -> Tuple[np.ndarray]: x_means = np.unique(xdata, axis=0) y_means = np.zeros(x_means.size) y_sigmas = np.zeros(x_means.size) + if sigma is None: + sigma = np.ones(xdata.size) for i in range(x_means.size): - ys = ydata[xdata == x_means[i]] - num_samples = len(ys) - sample_mean = np.mean(ys) - sample_var = np.sum((sample_mean - ys) ** 2) / (num_samples - 1) - y_means[i] = sample_mean - y_sigmas[i] = np.sqrt(sample_var) + # Get positions of y to average + idxs = xdata == x_means[i] + ys = ydata[idxs] + + if sigma: + # Compute the inverse-variance weighted y mean and variance + weights = 1 / sigma[idxs] ** 2 + y_var = 1 / np.sum(weights) + y_mean = y_var * np.sum(weights * ys) + else: + # Compute biased sample mean and variance + y_mean = np.mean(ys) + y_var = np.sum((y_mean - ys) ** 2) / len(ys) + + y_means[i] = y_mean + y_sigmas[i] = np.sqrt(y_var) return x_means, y_means, y_sigmas From 361ae6c36476a513b89ae05b867938b05e0e84a3 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Mon, 12 Apr 2021 12:02:17 -0400 Subject: [PATCH 20/25] Set default absolute_sigma = True --- qiskit_experiments/analysis/curve_fitting.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 55238fbbaa..6cf4387f43 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -82,6 +82,11 @@ def fit_func(x, *params): param_bounds = bounds fit_func = func + # Override scipy.curve_fit default for absolute_sigma to True + # if not specified in kwargs and sigma is not None + if sigma is not None and 'absolute_sigma' not in kwargs: + kwargs['absolute_sigma'] = True + # Run curve fit # pylint: disable = unbalanced-tuple-unpacking popt, pcov = opt.curve_fit( From ab805e97faef903eb718b679e34438e15c4f01c5 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Mon, 12 Apr 2021 12:03:14 -0400 Subject: [PATCH 21/25] Fix logical check on np array --- qiskit_experiments/analysis/data_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qiskit_experiments/analysis/data_processing.py b/qiskit_experiments/analysis/data_processing.py index 98471013d0..6dec35f230 100644 --- a/qiskit_experiments/analysis/data_processing.py +++ b/qiskit_experiments/analysis/data_processing.py @@ -77,7 +77,7 @@ def mean_xy_data( idxs = xdata == x_means[i] ys = ydata[idxs] - if sigma: + if sigma is not None: # Compute the inverse-variance weighted y mean and variance weights = 1 / sigma[idxs] ** 2 y_var = 1 / np.sum(weights) From 17876dfc0709be1233764e79b07d60b0ba4b4ece Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Mon, 12 Apr 2021 13:57:47 -0400 Subject: [PATCH 22/25] Use reduced chi-squared calculation --- qiskit_experiments/analysis/curve_fitting.py | 35 ++++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 6cf4387f43..29a93f82c2 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -18,7 +18,7 @@ import numpy as np import scipy.optimize as opt - +from qiskit.exceptions import QiskitError from qiskit_experiments.base_analysis import AnalysisResult from qiskit_experiments.analysis.data_processing import filter_data @@ -58,6 +58,11 @@ def curve_fit( `pcov` the covariance matrix for the fit, `chisq` the chi-squared parameter of fit, `xrange` the range of xdata values used for fit. + + Raises: + QiskitError: if the number of y-values in the dataset is not + greater than the number of parameters in the + fit function. """ # Format p0 parameters if specified as dictionary if isinstance(p0, dict): @@ -82,24 +87,34 @@ def fit_func(x, *params): param_bounds = bounds fit_func = func + # Check data set has at least 1 more values than model parameters + num_params = len(param_p0) + if len(ydata) <= num_params: + raise QiskitError( + "The number of y-values in the dataset is not greater than" + " the number of parameters in the fit function." + ) + # Override scipy.curve_fit default for absolute_sigma to True - # if not specified in kwargs and sigma is not None - if sigma is not None and 'absolute_sigma' not in kwargs: - kwargs['absolute_sigma'] = True + # if not specified in kwargs + if sigma is not None and "absolute_sigma" not in kwargs: + kwargs["absolute_sigma"] = True # Run curve fit + # TODO: Add error handling so if fitting fails we can return an analysis + # result containing this information # pylint: disable = unbalanced-tuple-unpacking popt, pcov = opt.curve_fit( fit_func, xdata, ydata, sigma=sigma, p0=param_p0, bounds=param_bounds, **kwargs ) popt_err = np.sqrt(np.diag(pcov)) - # Compute chi-squared for fit + # Calculate the reduced chi-squared for fit yfits = fit_func(xdata, *popt) residues = (yfits - ydata) ** 2 if sigma is not None: residues = residues / (sigma ** 2) - chisq = np.mean(residues) + chisq = np.sum(residues) / (len(yfits) - num_params) # Compute xdata range for fit xdata_range = [min(xdata), max(xdata)] @@ -113,10 +128,6 @@ def fit_func(x, *params): "xrange": xdata_range, } - # TODO: - # 1. Add some basic validation of computer good / bad based on fit result. - # 2. Add error handling so if fitting fails we can return an analysis - # result containing this information return AnalysisResult(result) @@ -168,7 +179,9 @@ def multi_curve_fit( `xrange` the range of xdata values used for fit. Raises: - QiskitError: if input xdata is not 2D. + QiskitError: if the number of y-values in the dataset is not + greater than the number of parameters in the + fit function. """ num_funcs = len(funcs) From 90f19268ac1a301fadcb91e1d6f21b11423c413c Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Tue, 13 Apr 2021 16:12:17 -0400 Subject: [PATCH 23/25] fixup data processing * Fix error in level2_probabilities multiplying by shots instead of dividing * Add explicit method kwarg for mean_xy_data --- qiskit_experiments/analysis/curve_fitting.py | 8 +- .../analysis/data_processing.py | 74 +++++++++++++------ 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 29a93f82c2..45c6e257f3 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -230,9 +230,6 @@ def process_curve_data( Returns: tuple: ``(x, y, sigma)`` tuple of arrays of x-values, y-values, and standard deviations of y-values. - - Raises: - QiskitError: if input data is not level-2 measurement. """ filtered_data = filter_data(data, **filters) size = len(filtered_data) @@ -240,7 +237,7 @@ def process_curve_data( ydata = np.zeros(size, dtype=float) ydata_var = np.zeros(size, dtype=float) - for i, datum in enumerate(filter_data): + for i, datum in enumerate(filtered_data): metadata = datum["metadata"] xdata[i] = metadata[x_key] y_mean, y_var = data_processor(datum) @@ -269,9 +266,6 @@ def process_multi_curve_data( Returns: tuple: ``(series, x, y, sigma)`` tuple of arrays of series values, x-values, y-values, and standard deviations of y-values. - - Raises: - QiskitError: if input data is not level-2 measurement. """ filtered_data = filter_data(data, **filters) size = len(filtered_data) diff --git a/qiskit_experiments/analysis/data_processing.py b/qiskit_experiments/analysis/data_processing.py index 6dec35f230..89cb8e299e 100644 --- a/qiskit_experiments/analysis/data_processing.py +++ b/qiskit_experiments/analysis/data_processing.py @@ -16,6 +16,7 @@ from typing import List, Dict, Tuple, Optional import numpy as np +from qiskit.exceptions import QiskitError def filter_data(data: List[Dict[str, any]], **filters) -> List[Dict[str, any]]: @@ -46,13 +47,20 @@ def filter_data(data: List[Dict[str, any]], **filters) -> List[Dict[str, any]]: def mean_xy_data( - xdata: np.ndarray, ydata: np.ndarray, sigma: Optional[np.ndarray] = None + xdata: np.ndarray, ydata: np.ndarray, sigma: Optional[np.ndarray] = None, method: str = "sample" ) -> Tuple[np.ndarray]: - """Return (x, y_mean, sigma) data. + r"""Return (x, y_mean, sigma) data. - The mean is taken over all ydata values with the same xdata value. - If `sigma=None` the sample mean and biased sample variance is used, - otherwise the inverse-variance weighted mean and variance is used. + The mean is taken over all ydata values with the same xdata value using + the specified method. For each x the mean :math:`\overline{y}` and variance + :math:`\sigma^2` are computed as + + * ``"sample"`` (default) *Sample mean and variance* + :math:`\overline{y} = \sum_{i=1}^N y_i / N`, + :math:`\sigma^2 = \sum_{i=1}^N ((\overline{y} - y_i)^2) / N` + * ``"iwv"`` *Inverse-weighted variance* + :math:`\overline{y} = (\sum_{i=1}^N y_i / \sigma_i^2 ) \sigma^2` + :math:`\sigma^2 = 1 / (\sum_{i=1}^N 1 / \sigma_i^2)` Args xdata: 1D or 2D array of xdata from curve_fit_data or @@ -60,36 +68,56 @@ def mean_xy_data( ydata: array of ydata returned from curve_fit_data or multi_curve_fit_data sigma: Optional, array of standard deviations in ydata. + method: The method to use for computing y means and + standard deviations sigma (default: "sample"). Returns: tuple: ``(x, y_mean, sigma)`` if ``return_raw==False``, where ``x`` is an arrays of unique x-values, ``y`` is an array of sample mean y-values, and ``sigma`` is an array of sample standard deviation of y values. + + Raises: + QiskitError: if "ivw" method is used without providing a sigma. """ x_means = np.unique(xdata, axis=0) y_means = np.zeros(x_means.size) y_sigmas = np.zeros(x_means.size) - if sigma is None: - sigma = np.ones(xdata.size) - for i in range(x_means.size): - # Get positions of y to average - idxs = xdata == x_means[i] - ys = ydata[idxs] - - if sigma is not None: + + # Sample mean and variance method + if method == "sample": + for i in range(x_means.size): + # Get positions of y to average + idxs = xdata == x_means[i] + ys = ydata[idxs] + + # Compute sample mean and biased sample variance + y_means[i] = np.mean(ys) + y_sigmas[i] = np.mean((y_means[i] - ys) ** 2) + + return x_means, y_means, y_sigmas + + # Inverse-weighted variance method + if method == "iwv": + if sigma is None: + raise QiskitError( + "The inverse-weighted variance method cannot be used with" " `sigma=None`" + ) + for i in range(x_means.size): + # Get positions of y to average + idxs = xdata == x_means[i] + ys = ydata[idxs] + # Compute the inverse-variance weighted y mean and variance weights = 1 / sigma[idxs] ** 2 y_var = 1 / np.sum(weights) - y_mean = y_var * np.sum(weights * ys) - else: - # Compute biased sample mean and variance - y_mean = np.mean(ys) - y_var = np.sum((y_mean - ys) ** 2) / len(ys) + y_means[i] = y_var * np.sum(weights * ys) + y_sigmas[i] = np.sqrt(y_var) + + return x_means, y_means, y_sigmas - y_means[i] = y_mean - y_sigmas[i] = np.sqrt(y_var) - return x_means, y_means, y_sigmas + # Invalid method + raise QiskitError(f"Unsupported method {method}") def level2_probability(data: Dict[str, any], outcome: str) -> Tuple[float]: @@ -108,10 +136,10 @@ def level2_probability(data: Dict[str, any], outcome: str) -> Tuple[float]: This assumes a binomial distribution where :math:`K` counts of the desired outcome from :math:`N` shots the mean probability is :math:`p = K / N` and the variance is - :math:`\\sigma^2 = N p (1-p)`. + :math:`\\sigma^2 = p (1-p) / N`. """ counts = data["counts"] shots = sum(counts.values()) p_mean = counts.get(outcome, 0.0) / shots - p_var = shots * p_mean * (1 - p_mean) + p_var = p_mean * (1 - p_mean) / shots return p_mean, p_var From 363d09e710ae570e80c2dec5ec83bfb704d61fe3 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Tue, 13 Apr 2021 16:12:46 -0400 Subject: [PATCH 24/25] Add basic tests for curve fitting --- test/test_curve_fitting.py | 110 +++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 test/test_curve_fitting.py diff --git a/test/test_curve_fitting.py b/test/test_curve_fitting.py new file mode 100644 index 0000000000..de41ff649c --- /dev/null +++ b/test/test_curve_fitting.py @@ -0,0 +1,110 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Test version string generation.""" + +import numpy as np +from qiskit.test import QiskitTestCase +from qiskit import QuantumCircuit +from qiskit.providers.aer import AerSimulator +from qiskit_experiments.analysis.curve_fitting import curve_fit, multi_curve_fit, process_curve_data +from qiskit_experiments.analysis.data_processing import level2_probability + + +class TestCurveFitting(QiskitTestCase): + """Test curve fitting functions.""" + + def simulate_experiment_data(self, thetas, shots=1024): + """Generate experiment data for Ry rotations""" + circuits = [] + for theta in thetas: + qc = QuantumCircuit(1) + qc.ry(theta, 0) + qc.measure_all() + circuits.append(qc) + + sim = AerSimulator(method="statevector") + result = sim.run(circuits, shots=shots, seed_simulator=10).result() + data = [ + {"counts": result.get_counts(i), "metadata": {"xval": theta}} + for i, theta in enumerate(thetas) + ] + return data + + @property + def objective0(self): + """Objective function for P0""" + + def func0(x, omega): + return np.cos(omega * x) ** 2 + + return np.vectorize(func0) + + @property + def objective1(self): + """Objective function for P0""" + + def func1(x, omega): + return np.sin(omega * x) ** 2 + + return np.vectorize(func1) + + @staticmethod + def data_processor_p0(data): + """Return P(0) probabilities""" + return level2_probability(data, "0") + + @staticmethod + def data_processor_p1(data): + """Return P(1) probabilities""" + return level2_probability(data, "1") + + def test_process_curve_data(self): + """Test version string generation.""" + thetas = thetas = np.linspace(0.5, 4 * np.pi - 0.5, 20) + data = self.simulate_experiment_data(thetas) + xdata, ydata, _ = process_curve_data(data, data_processor=self.data_processor_p0) + + xdiff = thetas - xdata + ydiff = self.objective0(xdata, 0.5) - ydata + self.assertTrue(np.allclose(xdiff, 0)) + self.assertTrue(np.allclose(ydiff, 0, atol=0.05)) + + def test_curve_fit(self): + """Test curve_fit function""" + thetas = thetas = np.linspace(0.5, 4 * np.pi - 0.5, 20) + data = self.simulate_experiment_data(thetas) + xdata, ydata, sigma = process_curve_data(data, data_processor=self.data_processor_p0) + p0 = [0.6] + bounds = ([0], [2]) + sol = curve_fit(self.objective0, xdata, ydata, p0, sigma=sigma, bounds=bounds) + self.assertTrue(abs(sol["popt"][0] - 0.5) < 0.05) + + def test_multi_curve_fit(self): + """Test multi_curve_fit function""" + thetas = thetas = np.linspace(0.5, 4 * np.pi - 0.5, 20) + data = self.simulate_experiment_data(thetas) + xdata0, ydata0, sigma0 = process_curve_data(data, data_processor=self.data_processor_p0) + xdata1, ydata1, sigma1 = process_curve_data(data, data_processor=self.data_processor_p1) + + # Combine curve data + xdata = np.concatenate([xdata0, xdata1]) + series = np.concatenate([np.zeros(len(xdata0)), np.ones(len(xdata1))]) + ydata = np.concatenate([ydata0, ydata1]) + sigma = np.concatenate([sigma0, sigma1]) + + p0 = [0.6] + bounds = ([0], [2]) + sol = multi_curve_fit( + [self.objective0, self.objective1], series, xdata, ydata, p0, sigma=sigma, bounds=bounds + ) + self.assertTrue(abs(sol["popt"][0] - 0.5) < 0.05) From 9024ded6b8c2f4d91356dc059d377988a4cd2641 Mon Sep 17 00:00:00 2001 From: Christopher Wood Date: Wed, 14 Apr 2021 12:50:36 -0400 Subject: [PATCH 25/25] More review edits * Change chisq -> reduced_chisq in result and docs * add dof (degrees of freedom) to returned results * add documentation about sigma by default being absolute sigma. --- qiskit_experiments/analysis/curve_fitting.py | 75 ++++++++++++-------- test/test_curve_fitting.py | 8 +-- 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/qiskit_experiments/analysis/curve_fitting.py b/qiskit_experiments/analysis/curve_fitting.py index 45c6e257f3..b2d2e83801 100644 --- a/qiskit_experiments/analysis/curve_fitting.py +++ b/qiskit_experiments/analysis/curve_fitting.py @@ -47,22 +47,31 @@ def curve_fit( xdata: a 1D float array of x-data. ydata: a 1D float array of y-data. p0: initial guess for optimization parameters. - sigma: Optional, a 1D array of standard deviations in ydata. + sigma: Optional, a 1D array of standard deviations in ydata + in absolute units. bounds: Optional, lower and upper bounds for optimization parameters. kwargs: additional kwargs for scipy.optimize.curve_fit. Returns: - result containing `popt` the optimal fit parameters, - `popt_err` the standard error estimates popt, - `pcov` the covariance matrix for the fit, - `chisq` the chi-squared parameter of fit, - `xrange` the range of xdata values used for fit. + result containing ``popt`` the optimal fit parameters, + ``popt_err`` the standard error estimates popt, + ``pcov`` the covariance matrix for the fit, + ``reduced_chisq`` the reduced chi-squared parameter of fit, + ``dof`` the degrees of freedom of the fit, + ``xrange`` the range of xdata values used for fit. Raises: - QiskitError: if the number of y-values in the dataset is not - greater than the number of parameters in the - fit function. + QiskitError: if the number of degrees of freedom of the fit is + less than 1. + + .. note:: + ``sigma`` is assumed to be specified in the same units as ``ydata`` + (absolute units). If sigma is instead specified in relative units + the `absolute_sigma=False` kwarg of scipy curve_fit must be used. + This affects the returned covariance ``pcov`` and error ``popt_err`` + parameters via ``pcov(absolute_sigma=False) = pcov * reduced_chisq`` + ``popt_err(absolute_sigma=False) = popt_err * sqrt(reduced_chisq)``. """ # Format p0 parameters if specified as dictionary if isinstance(p0, dict): @@ -87,16 +96,16 @@ def fit_func(x, *params): param_bounds = bounds fit_func = func - # Check data set has at least 1 more values than model parameters - num_params = len(param_p0) - if len(ydata) <= num_params: + # Check the degrees of freedom is greater than 0 + dof = len(ydata) - len(param_p0) + if dof < 1: raise QiskitError( - "The number of y-values in the dataset is not greater than" - " the number of parameters in the fit function." + "The number of degrees of freedom of the fit data and model " + " (len(ydata) - len(p0)) is less than 1" ) - # Override scipy.curve_fit default for absolute_sigma to True - # if not specified in kwargs + # Override scipy.curve_fit default for absolute_sigma=True + # if sigma is specified. if sigma is not None and "absolute_sigma" not in kwargs: kwargs["absolute_sigma"] = True @@ -114,7 +123,7 @@ def fit_func(x, *params): residues = (yfits - ydata) ** 2 if sigma is not None: residues = residues / (sigma ** 2) - chisq = np.sum(residues) / (len(yfits) - num_params) + reduced_chisq = np.sum(residues) / dof # Compute xdata range for fit xdata_range = [min(xdata), max(xdata)] @@ -124,7 +133,8 @@ def fit_func(x, *params): "popt_keys": param_keys, "popt_err": popt_err, "pcov": pcov, - "chisq": chisq, + "reduced_chisq": reduced_chisq, + "dof": dof, "xrange": xdata_range, } @@ -141,7 +151,7 @@ def multi_curve_fit( weights: Optional[np.ndarray] = None, bounds: Optional[Union[Dict[str, Tuple[float, float]], Tuple[np.ndarray, np.ndarray]]] = None, **kwargs, -): +) -> AnalysisResult: r"""Perform a linearized multi-objective non-linear least squares fit. This solves the optimization problem @@ -164,7 +174,8 @@ def multi_curve_fit( xdata: a 1D float array of xdata. ydata: a 1D float array of ydata. p0: initial guess for optimization parameters. - sigma: Optional, a 1D array of standard deviations in ydata. + sigma: Optional, a 1D array of standard deviations in ydata + in absolute units. weights: Optional, a 1D float list of weights :math:`w_k` for each component function :math:`f_k`. bounds: Optional, lower and upper bounds for optimization @@ -172,16 +183,24 @@ def multi_curve_fit( kwargs: additional kwargs for scipy.optimize.curve_fit. Returns: - AnalysisResult: result containing `popt` the optimal fit parameters, - `popt_err` the standard error estimates popt, - `pcov` the covariance matrix for the fit, - `chisq` the chi-squared parameter of fit. - `xrange` the range of xdata values used for fit. + result containing ``popt`` the optimal fit parameters, + ``popt_err`` the standard error estimates popt, + ``pcov`` the covariance matrix for the fit, + ``reduced_chisq`` the reduced chi-squared parameter of fit, + ``dof`` the degrees of freedom of the fit, + ``xrange`` the range of xdata values used for fit. Raises: - QiskitError: if the number of y-values in the dataset is not - greater than the number of parameters in the - fit function. + QiskitError: if the number of degrees of freedom of the fit is + less than 1. + + .. note:: + ``sigma`` is assumed to be specified in the same units as ``ydata`` + (absolute units). If sigma is instead specified in relative units + the `absolute_sigma=False` kwarg of scipy curve_fit must be used. + This affects the returned covariance ``pcov`` and error ``popt_err`` + parameters via ``pcov(absolute_sigma=False) = pcov * reduced_chisq`` + ``popt_err(absolute_sigma=False) = popt_err * sqrt(reduced_chisq)``. """ num_funcs = len(funcs) diff --git a/test/test_curve_fitting.py b/test/test_curve_fitting.py index de41ff649c..be180d09e9 100644 --- a/test/test_curve_fitting.py +++ b/test/test_curve_fitting.py @@ -14,8 +14,8 @@ import numpy as np from qiskit.test import QiskitTestCase -from qiskit import QuantumCircuit -from qiskit.providers.aer import AerSimulator +from qiskit import QuantumCircuit, execute +from qiskit.providers.basicaer import QasmSimulatorPy from qiskit_experiments.analysis.curve_fitting import curve_fit, multi_curve_fit, process_curve_data from qiskit_experiments.analysis.data_processing import level2_probability @@ -32,8 +32,8 @@ def simulate_experiment_data(self, thetas, shots=1024): qc.measure_all() circuits.append(qc) - sim = AerSimulator(method="statevector") - result = sim.run(circuits, shots=shots, seed_simulator=10).result() + sim = QasmSimulatorPy() + result = execute(circuits, sim, shots=shots, seed_simulator=10).result() data = [ {"counts": result.get_counts(i), "metadata": {"xval": theta}} for i, theta in enumerate(thetas)