diff --git a/qiskit_experiments/curve_analysis/curve_analysis.py b/qiskit_experiments/curve_analysis/curve_analysis.py index 28468eac0e..815e7ac9c3 100644 --- a/qiskit_experiments/curve_analysis/curve_analysis.py +++ b/qiskit_experiments/curve_analysis/curve_analysis.py @@ -21,6 +21,7 @@ import warnings from abc import ABC from typing import Any, Dict, List, Tuple, Callable, Union, Optional +from uncertainties import unumpy as unp import numpy as np from qiskit.providers import Backend @@ -577,18 +578,22 @@ def _is_target_series(datum, **filters): x_key = self._get_option("x_key") try: - x_values = [datum["metadata"][x_key] for datum in data] + x_values = np.asarray([datum["metadata"][x_key] for datum in data], dtype=float) except KeyError as ex: raise DataProcessorError( f"X value key {x_key} is not defined in circuit metadata." ) from ex if isinstance(data_processor, DataProcessor): - y_values, y_sigmas = data_processor(data) - if y_sigmas is None: - y_sigmas = np.full(y_values.shape, np.nan) + y_data = data_processor(data) + + y_nominals = unp.nominal_values(y_data) + y_stderrs = unp.std_devs(y_data) else: - y_values, y_sigmas = zip(*map(data_processor, data)) + y_nominals, y_stderrs = zip(*map(data_processor, data)) + + y_nominals = np.asarray(y_nominals, dtype=float) + y_stderrs = np.asarray(y_stderrs, dtype=float) # Store metadata metadata = np.asarray([datum["metadata"] for datum in data], dtype=object) @@ -596,11 +601,6 @@ def _is_target_series(datum, **filters): # Store shots shots = np.asarray([datum.get("shots", np.nan) for datum in data]) - # Format data - x_values = np.asarray(x_values, dtype=float) - y_values = np.asarray(y_values, dtype=float) - y_sigmas = np.asarray(y_sigmas, dtype=float) - # Find series (invalid data is labeled as -1) data_index = np.full(x_values.size, -1, dtype=int) for idx, series_def in enumerate(self.__series__): @@ -613,8 +613,8 @@ def _is_target_series(datum, **filters): raw_data = CurveData( label="raw_data", x=x_values, - y=y_values, - y_err=y_sigmas, + y=y_nominals, + y_err=y_stderrs, shots=shots, data_index=data_index, metadata=metadata, diff --git a/qiskit_experiments/data_processing/__init__.py b/qiskit_experiments/data_processing/__init__.py index adbbbd716d..2042ca1ce6 100644 --- a/qiskit_experiments/data_processing/__init__.py +++ b/qiskit_experiments/data_processing/__init__.py @@ -16,10 +16,32 @@ .. currentmodule:: qiskit_experiments.data_processing -Data processing is the act of taking taking the data returned by the backend and -converting it into a format that can be analyzed. For instance, counts can be -converted to a probability while two-dimensional IQ data may be converted to a -one-dimensional signal. +Data processing is the act of taking the data returned by the backend and +converting it into a format that can be analyzed. +It is implemented as a chain of data processing steps that transform various input data, +e.g. IQ data, into a desired format, e.g. population, which can be analyzed. + +These data transformations may consist of multiple steps, such as kerneling and discrimination. +Each step is implemented by a :class:`~qiskit_experiments.data_processing.data_action.DataAction` +also called a `node`. + +The data processor implements the :meth:`__call__` method. Once initialized, it +can thus be used as a standard python function: + +.. code-block:: python + + processor = DataProcessor(input_key="memory", [Node1(), Node2(), ...]) + out_data = processor(in_data) + +The data input to the processor is a sequence of dictionaries each representing the result +of a single circuit. The output of the processor is a numpy array whose shape and data type +depend on the combination of the nodes in the data processor. + +Uncertainties that arise from quantum measurements or finite sampling can be taken into account +in the nodes: a standard error can be generated in a node and can be propagated +through the subsequent nodes in the data processor. +Correlation between computed values is also considered. + Classes ======= diff --git a/qiskit_experiments/data_processing/data_action.py b/qiskit_experiments/data_processing/data_action.py index 9427bd508a..8a075ef58f 100644 --- a/qiskit_experiments/data_processing/data_action.py +++ b/qiskit_experiments/data_processing/data_action.py @@ -13,66 +13,65 @@ """Defines the steps that can be used to analyse data.""" from abc import ABCMeta, abstractmethod -from typing import Any, List, Optional, Tuple + +import numpy as np class DataAction(metaclass=ABCMeta): - """ - Abstract action done on measured data to process it. Each subclass of DataAction must - define the way it formats, validates and processes data. + """Abstract action done on measured data to process it. + + Each subclass of DataAction must define the way it formats, validates and processes data. """ def __init__(self, validate: bool = True): - """ + """Create new node. + Args: validate: If set to False the DataAction will not validate its input. """ self._validate = validate @abstractmethod - def _process(self, datum: Any, error: Optional[Any] = None) -> Tuple[Any, Any]: - """ - Applies the data processing step to the datum. + def _process(self, data: np.ndarray) -> np.ndarray: + """Applies the data processing step to the data. Args: - datum: A single item of data which will be processed. - error: An optional error estimation on the datum that can be further propagated. + data: A data array to process. This is a single numpy array containing + all circuit results input to the data processor. + If the elements are ufloat objects consisting of a nominal value and + a standard error, then the error propagation is automatically computed. Returns: - processed data: The data that has been processed along with the propagated error. + The processed data. """ - @abstractmethod - def _format_data(self, datum: Any, error: Optional[Any] = None) -> Tuple[Any, Any]: + def _format_data(self, data: np.ndarray) -> np.ndarray: """Format and validate the input. - Check that the given data and error has the correct structure. This method may + Check that the given data has the correct structure. This method may additionally change the data type, e.g. converting a list to a numpy array. Args: - datum: The data instance to check and format. - error: An optional error estimation on the datum to check and format. + data: A data array to format. This is a single numpy array containing + all circuit results input to the data processor. Returns: - datum, error: The formatted datum and its optional error. - - Raises: - DataProcessorError: If either the data or the error do not have the proper format. + The data that has been validated and formatted. """ + return data - def __call__(self, data: Any, error: Optional[Any] = None) -> Tuple[Any, Any]: - """Call the data action of this node on the data and propagate the error. + def __call__(self, data: np.ndarray) -> np.ndarray: + """Call the data action of this node on the data. Args: - data: The data to process. The action nodes in the data processor will - raise errors if the data does not have the appropriate format. - error: An optional error estimation on the datum that can be further processed. + data: A numpy array with arbitrary dtype. If the elements are ufloat objects + consisting of a nominal value and a standard error, then the error propagation + is done automatically. Returns: - processed data: The data processed by self as a tuple of processed datum and - optionally the propagated error estimate. + The processed data. """ - return self._process(*self._format_data(data, error)) + return self._process(self._format_data(data)) def __repr__(self): """String representation of the node.""" @@ -94,11 +93,13 @@ def is_trained(self) -> bool: """ @abstractmethod - def train(self, data: List[Any]): + def train(self, data: np.ndarray): """Train a DataAction. Certain data processing nodes, such as a SVD, require data to first train. Args: - data: A list of datum. Each datum is a point used to train the node. + data: A data array for training. This is a single numpy array containing + all circuit results input to the data processor :meth:`~qiskit_experiments.\ + data_processing.data_processor.DataProcessor#train` method. """ diff --git a/qiskit_experiments/data_processing/data_processor.py b/qiskit_experiments/data_processing/data_processor.py index 4cb45d71ad..7781c2176d 100644 --- a/qiskit_experiments/data_processing/data_processor.py +++ b/qiskit_experiments/data_processing/data_processor.py @@ -10,9 +10,34 @@ # copyright notice, and modified files need to carry a notice indicating # that they have been altered from the originals. -"""Actions done on the data to bring it in a usable form.""" +r"""Actions done on the data to bring it in a usable form. -from typing import Any, Dict, List, Set, Tuple, Union +In Qiskit Experiments, uncertainty propagation is offloaded to the ``uncertainties`` +package, that offers a python float and numpy-array compatible number +representation that natively supports standard errors and their propagation. + +Given values :math:`a` and :math:`b` with a finite uncertainty, the error propagation +in the function :math:`f` is computed with derivatives + +.. math: + + \sigma_f^2 \sim \left| \frac{\partial f}{\partial a} \right|^2 \sigma_a^2 + + \left| \frac{\partial f}{\partial b} \right|^2 \sigma_b^2 + + 2 \frac{\partial f}{\partial a} \frac{\partial f}{\partial b} \sigma_{ab} + +where :math:`sigma_a` and :math:`sigma_b` are the uncertainties of :math:`a` and :math:`b` while +:math:`sigma_{ab}` is the correlation between :math:`a` and :math:`b`. +Please refer to the ``uncertainties`` package documentation for additional details. + +.. _uncertainties: +https://pypi.org/project/uncertainties/ + +""" + +from typing import Dict, List, Set, Tuple, Union + +import numpy as np +from uncertainties import unumpy as unp from qiskit_experiments.data_processing.data_action import DataAction, TrainableDataAction from qiskit_experiments.data_processing.exceptions import DataProcessorError @@ -44,8 +69,7 @@ def __init__( input_key: The initial key in the datum Dict[str, Any] under which the data processor will find the data to process. data_actions: A list of data processing actions to construct this data processor with. - If None is given an empty DataProcessor will be created. - to_array: Boolean indicating if the input data will be converted to a numpy array. + If nothing is given the processor returns unprocessed data. """ self._input_key = input_key self._nodes = data_actions if data_actions else [] @@ -69,38 +93,41 @@ def is_trained(self) -> bool: return True - def __call__(self, data: Union[Dict, List[Dict]], **options) -> Tuple[Any, Any]: + def __call__(self, data: Union[Dict, List[Dict]], **options) -> np.ndarray: """ Call self on the given datum. This method sequentially calls the stored data actions on the datum. Args: - data: The data, typically from ExperimentData.data(...), that needs to be processed. - This dict or list of dicts also contains the metadata of each experiment. + data: The data, typically from ``ExperimentData.data(...)``, + that needs to be processed. This dict or list of dicts also contains + the metadata of each experiment. options: Run-time options given as keyword arguments that will be passed to the nodes. Returns: - processed data: The data processed by the data processor. + The data processed by the data processor. This is an arbitrary numpy array that + may contain standard errors as a ufloat object. """ return self._call_internal(data, **options) def call_with_history( self, data: Union[Dict, List[Dict]], history_nodes: Set = None - ) -> Tuple[Any, Any, List]: + ) -> Tuple[np.ndarray, List]: """ Call self on the given datum. This method sequentially calls the stored data actions on the datum and also returns the history of the processed data. Args: - data: The data, typically from ExperimentData.data(...), that needs to be processed. - This dict or list of dicts also contains the metadata of each experiment. + data: The data, typically from ``ExperimentData.data(...)``, + that needs to be processed. This dict or list of dicts also contains + the metadata of each experiment. history_nodes: The nodes, specified by index in the data processing chain, to include in the history. If None is given then all nodes will be included in the history. Returns: - processed data: The datum processed by the data processor. - history: The datum processed at each node of the data processor. + A tuple of (processed data, history), that are the data processed by the processor + and its intermediate state in each specified node, respectively. """ return self._call_internal(data, True, history_nodes) @@ -110,7 +137,7 @@ def _call_internal( with_history: bool = False, history_nodes: Set = None, call_up_to_node: int = None, - ) -> Union[Tuple[Any, Any], Tuple[Any, Any, List]]: + ) -> Union[np.ndarray, Tuple[np.ndarray, List]]: """Process the data with or without storing the history of the computation. Args: @@ -125,52 +152,64 @@ def _call_internal( then all nodes in the data processing chain will be called. Returns: - datum_ and history if with_history is True or datum_ if with_history is False. + When ``with_history`` is ``False`` it returns an numpy array of processed data. + Otherwise it returns a tuple of (processed data, history) in which the `history` + is a list of intermediate data at each step. """ if call_up_to_node is None: call_up_to_node = len(self._nodes) - datum_, error_ = self._data_extraction(data), None + data = self._data_extraction(data) history = [] - for index, node in enumerate(self._nodes): - - if index < call_up_to_node: - datum_, error_ = node(datum_, error_) - - if with_history and ( - history_nodes is None or (history_nodes and index in history_nodes) - ): - history.append((node.__class__.__name__, datum_, error_, index)) + for index, node in enumerate(self._nodes[:call_up_to_node]): + data = node(data) + + if with_history and (history_nodes is None or index in history_nodes): + if data.shape[0] == 1: + cache_data = data[0] + else: + cache_data = data + history.append( + ( + node.__class__.__name__, + cache_data, + index, + ) + ) + + # Return only first entry if len(data) == 1, e.g. [[0, 1]] -> [0, 1] + if data.shape[0] == 1: + out_data = data[0] + else: + out_data = data if with_history: - return datum_, error_, history + return out_data, history else: - return datum_, error_ + return out_data - def train(self, data: List[Dict[str, Any]]): + def train(self, data: Union[Dict, List[Dict]]): """Train the nodes of the data processor. Args: data: The data to use to train the data processor. """ - for index, node in enumerate(self._nodes): if isinstance(node, TrainableDataAction): if not node.is_trained: # Process the data up to the untrained node. - node.train(self._call_internal(data, call_up_to_node=index)[0]) + node.train(self._call_internal(data, call_up_to_node=index)) - def _data_extraction(self, data: Union[Dict, List[Dict]]) -> List: + def _data_extraction(self, data: Union[Dict, List[Dict]]) -> np.ndarray: """Extracts the data on which to run the nodes. If the datum is a list of dicts then the data under self._input_key is extracted - from each dict and appended to a list which therefore contains all the data. If the - data processor has to_array set to True then the list will be converted to a numpy - array. + from each dict and appended to a list which therefore contains all the data. Args: - data: A list of such dicts where the data is contained under the key self._input_key. + data: A list of such dicts where the data is contained under the key + ``self._input_key``. Returns: The data formatted in such a way that it is ready to be processed by the nodes. @@ -178,26 +217,60 @@ def _data_extraction(self, data: Union[Dict, List[Dict]]) -> List: Raises: DataProcessorError: - If the input datum is not a list or a dict. - - If the data processor received a single datum but requires all the data to - process it properly. - If the input key of the data processor is not contained in the data. + - If the data processor receives multiple data with different measurement + configuration, i.e. Jagged array. """ if isinstance(data, dict): data = [data] - try: - data_ = [_datum[self._input_key] for _datum in iter(data)] - except KeyError as error: - raise DataProcessorError( - f"The input key {self._input_key} was not found in the input datum." - ) from error - except TypeError as error: - raise DataProcessorError( - f"{self.__class__.__name__} only extracts data from " - f"lists or dicts, received {type(data)}." - ) from error - - return data_ + data_to_process = [] + dims = None + for datum in data: + try: + outcome = datum[self._input_key] + except TypeError as error: + raise DataProcessorError( + f"{self.__class__.__name__} only extracts data from " + f"lists or dicts, received {type(data)}." + ) from error + except KeyError as error: + raise DataProcessorError( + f"The input key {self._input_key} was not found in the input datum." + ) from error + + if self._input_key != "counts": + outcome = np.asarray(outcome) + # Validate data shape + if dims is None: + dims = outcome.shape + else: + # This is because each data node creates full array of all result data. + # Jagged array cannot be numerically operated with numpy array. + if outcome.shape != dims: + raise DataProcessorError( + "Input data is likely a mixture of job results with different " + "measurement setup. Data processor doesn't support jagged array." + ) + data_to_process.append(outcome) + + data_to_process = np.asarray(data_to_process) + + if data_to_process.dtype in (float, int): + # Likely level1 or below. Return ufloat array with un-computed std_dev. + # The output data format is a standard ndarray with dtype=object with + # arbitrary shape [n_circuits, ...] depending on the measurement setup. + nominal_values = np.asarray(data_to_process, float) + return unp.uarray( + nominal_values=nominal_values, + std_devs=np.full_like(nominal_values, np.nan, dtype=float), + ) + else: + # Likely level2 counts or level2 memory data. Cannot be typecasted to ufloat. + # The output data format is a standard ndarray with dtype=object with + # shape [n_circuits] or [n_circuits, n_shots]. + # No error value is bound. + return np.asarray(data_to_process, dtype=object) def __repr__(self): """String representation of data processors.""" diff --git a/qiskit_experiments/data_processing/nodes.py b/qiskit_experiments/data_processing/nodes.py index e4d78b7d1d..7fb6f7e991 100644 --- a/qiskit_experiments/data_processing/nodes.py +++ b/qiskit_experiments/data_processing/nodes.py @@ -14,8 +14,10 @@ from abc import abstractmethod from numbers import Number -from typing import Any, Dict, List, Optional, Tuple, Union, Sequence +from typing import List, Union, Sequence + import numpy as np +from uncertainties import unumpy as unp, ufloat from qiskit_experiments.data_processing.data_action import DataAction, TrainableDataAction from qiskit_experiments.data_processing.exceptions import DataProcessorError @@ -25,74 +27,89 @@ class AverageData(DataAction): """A node to average data representable as numpy arrays.""" def __init__(self, axis: int, validate: bool = True): - """Initialize a data averaging node. + r"""Initialize a data averaging node. Args: axis: The axis along which to average. validate: If set to False the DataAction will not validate its input. + + Notes: + If the input array has no standard error, then this node will compute the + standard error of the mean, i.e. the standard deviation of the datum divided by + :math:`\sqrt{N}` where :math:`N` is the number of data points. + Otherwise the standard error is given by the square root of :math:`N^{-1}` times + the sum of the squared errors. """ super().__init__(validate) self._axis = axis - def _format_data(self, datum: Any, error: Optional[Any] = None): - """Format the data into numpy arrays.""" - datum = np.asarray(datum, dtype=float) + def _format_data(self, data: np.ndarray) -> np.ndarray: + """Format the data into numpy arrays. + + Args: + data: A data array to format. This is a single numpy array containing + all circuit results input to the data processor. + + Returns: + The data that has been validated and formatted. + Raises: + DataProcessorError: When the specified axis does not exist in given array. + """ if self._validate: - if len(datum.shape) <= self._axis: + if len(data.shape) <= self._axis: raise DataProcessorError( - f"Cannot average the {len(datum.shape)} dimensional " + f"Cannot average the {len(data.shape)} dimensional " f"array along axis {self._axis}." ) - if error is not None: - error = np.asarray(error, dtype=float) + return data - return datum, error - - def _process( - self, datum: np.array, error: Optional[np.array] = None - ) -> Tuple[np.array, np.array]: + def _process(self, data: np.ndarray) -> np.ndarray: """Average the data. - Args: - datum: an array of data. - - Returns: - Two arrays with one less dimension than the given datum and error. The error - is the standard error of the mean, i.e. the standard deviation of the datum - divided by :math:`sqrt{N}` where :math:`N` is the number of data points. + Args: + data: A data array to process. This is a single numpy array containing + all circuit results input to the data processor. - Raises: - DataProcessorError: If the axis is not an int. + Returns: + Arrays with one less dimension than the given data. """ - standard_error = np.std(datum, axis=self._axis) / np.sqrt(datum.shape[self._axis]) + ax = self._axis + + reduced_array = np.mean(data, axis=ax) + nominals = unp.nominal_values(reduced_array) + errors = unp.std_devs(reduced_array) + + if np.any(np.isnan(errors)): + # replace empty elements with SEM + sem = np.std(unp.nominal_values(data), axis=ax) / np.sqrt(data.shape[ax]) + errors = np.where(np.isnan(errors), sem, errors) - return np.average(datum, axis=self._axis), standard_error + return unp.uarray(nominals, errors) class MinMaxNormalize(DataAction): """Normalizes the data.""" - def _format_data(self, datum: Any, error: Optional[Any] = None): - """Format the data into numpy arrays.""" - datum = np.asarray(datum, dtype=float) + def _process(self, data: np.ndarray) -> np.ndarray: + """Normalize the data to the interval [0, 1]. - if error is not None: - error = np.asarray(error, dtype=float) + Args: + data: A data array to process. This is a single numpy array containing + all circuit results input to the data processor. - return datum, error + Returns: + The normalized data. - def _process( - self, datum: np.array, error: Optional[np.array] = None - ) -> Tuple[np.array, np.array]: - """Normalize the data to the interval [0, 1].""" - min_y, max_y = np.min(datum), np.max(datum) + Notes: + This doesn't consider the uncertainties of the minimum or maximum values. + Input data array is just scaled by the data range. + """ + nominals = unp.nominal_values(data) + min_y, max_y = np.min(nominals), np.max(nominals) - if error is not None: - return (datum - min_y) / (max_y - min_y), error / (max_y - min_y) - else: - return (datum - min_y) / (max_y - min_y), None + return (data - min_y) / (max_y - min_y) class SVD(TrainableDataAction): @@ -113,28 +130,24 @@ def __init__(self, validate: bool = True): self._n_slots = 0 self._n_iq = 0 - def _format_data(self, datum: Any, error: Optional[Any] = None) -> Tuple[Any, Any]: + def _format_data(self, data: np.ndarray) -> np.ndarray: """Check that the IQ data is 2D and convert it to a numpy array. Args: - datum: All IQ data. This data has different dimensions depending on whether + data: A data array to format. This is a single numpy array containing + all circuit results input to the data processor. + This data has different dimensions depending on whether single-shot or averaged data is being processed. Single-shot data is four dimensional, i.e., ``[#circuits, #shots, #slots, 2]``, while averaged IQ data is three dimensional, i.e., ``[#circuits, #slots, 2]``. Here, ``#slots`` is the number of classical registers used in the circuit. - error: Optional, accompanied error. Returns: - datum and any error estimate as a numpy array. + data and any error estimate as a numpy array. Raises: DataProcessorError: If the datum does not have the correct format. """ - datum = np.asarray(datum, dtype=float) - - if error is not None: - error = np.asarray(error, dtype=float) - self._n_circs = 0 self._n_shots = 0 self._n_slots = 0 @@ -143,11 +156,11 @@ def _format_data(self, datum: Any, error: Optional[Any] = None) -> Tuple[Any, An # identify shape try: # level1 single-shot data - self._n_circs, self._n_shots, self._n_slots, self._n_iq = datum.shape + self._n_circs, self._n_shots, self._n_slots, self._n_iq = data.shape except ValueError: try: # level1 data averaged over shots - self._n_circs, self._n_slots, self._n_iq = datum.shape + self._n_circs, self._n_slots, self._n_iq = data.shape except ValueError as ex: raise DataProcessorError( f"Data given to {self.__class__.__name__} is not likely level1 data." @@ -160,13 +173,7 @@ def _format_data(self, datum: Any, error: Optional[Any] = None) -> Tuple[Any, An f"(I and Q). Instead, {self._n_iq} dimensions were found." ) - if error is not None and error.shape != datum.shape: - raise DataProcessorError( - f"IQ data error given to {self.__class__.__name__} must be a 2D array." - f"Instead, a {len(error.shape)}D array was given." - ) - - return datum, error + return data @property def axis(self) -> List[np.array]: @@ -203,15 +210,12 @@ def is_trained(self) -> bool: """ return self._main_axes is not None - def _process( - self, datum: np.array, error: Optional[np.array] = None - ) -> Tuple[np.array, np.array]: + def _process(self, data: np.ndarray) -> np.ndarray: """Project the IQ data onto the axis defined by an SVD and scale it. Args: - datum: A 2D array of qubits, and an average complex IQ point as [real, imaginary]. - error: An optional 2D array of qubits, and an error on an average complex IQ - point as [real, imaginary]. + data: A data array to process. This is a single numpy array containing + all circuit results input to the data processor. Returns: A Tuple of 1D arrays of the result of the SVD and the associated error. Each entry @@ -231,37 +235,19 @@ def _process( # level1 single mode dims = self._n_circs, self._n_shots, self._n_slots - processed_data = np.zeros(dims, dtype=float) - error_vals = np.zeros(dims, dtype=float) + projected_data = np.zeros(dims, dtype=object) for idx in range(self._n_slots): scale = self.scales[idx] + # error propagation is computed from data if any std error exists centered = np.array( - [datum[..., idx, iq] - self.means(qubit=idx, iq_index=iq) for iq in [0, 1]] + [data[..., idx, iq] - self.means(qubit=idx, iq_index=iq) for iq in [0, 1]] ) - processed_data[..., idx] = (self._main_axes[idx] @ centered) / scale - - if error is not None: - angle = np.arctan(self._main_axes[idx][1] / self._main_axes[idx][0]) - error_vals[..., idx] = ( - np.sqrt( - (error[..., idx, 0] * np.cos(angle)) ** 2 - + (error[..., idx, 1] * np.sin(angle)) ** 2 - ) - / scale - ) + projected_data[..., idx] = (self._main_axes[idx] @ centered) / scale - if self._n_circs == 1: - if error is None: - return processed_data[0], None - else: - return processed_data[0], error_vals[0] + return projected_data - if error is None: - return processed_data, None - return processed_data, error_vals - - def train(self, data: List[Any]): + def train(self, data: np.ndarray): """Train the SVD on the given data. Each element of the given data will be converted to a 2D array of dimension @@ -272,19 +258,21 @@ def train(self, data: List[Any]): qubit so that future data points can be projected onto the axis. Args: - data: A list of datums. Each datum will be converted to a 2D array. + data: A data array to be trained. This is a single numpy array containing + all circuit results input to the data processor. """ if data is None: return - data, _ = self._format_data(data) + # TODO do not remove standard error. Currently svd is not supported. + data = unp.nominal_values(self._format_data(data)) self._main_axes = [] self._scales = [] self._means = [] - for qubit_idx in range(self._n_slots): - datums = np.vstack([datum[qubit_idx] for datum in data]).T + for idx in range(self._n_slots): + datums = np.vstack([datum[idx] for datum in data]).T # Calculate the mean of the data to recenter it in the IQ plane. mean_i = np.average(datums[0, :]) @@ -314,61 +302,42 @@ def __init__(self, scale: float = 1.0, validate: bool = True): super().__init__(validate) @abstractmethod - def _process(self, datum: np.array, error: Optional[np.array] = None) -> np.array: + def _process(self, data: np.ndarray) -> np.ndarray: """Defines how the IQ point is processed. - The dimension of the input datum corresponds to different types of data: - - 2D represents average IQ Data. - - 3D represents either a single-shot datum or all data of averaged data. - - 4D represents all data of single-shot data. + The last dimension of the array should correspond to [real, imaginary] part of data. Args: - datum: A N dimensional array of complex IQ points as [real, imaginary]. - error: A N dimensional array of errors on complex IQ points as [real, imaginary]. + data: A data array to process. This is a single numpy array containing + all circuit results input to the data processor. Returns: - Processed IQ point and its associated error estimate. + The data that has been processed. """ - def _format_data(self, datum: Any, error: Optional[Any] = None) -> Tuple[Any, Any]: - """Check that the IQ data has the correct format and convert to numpy array. + def _format_data(self, data: np.ndarray) -> np.ndarray: + """Format and validate the input. Args: - datum: A single item of data which corresponds to single-shot IQ data. It's - dimension will depend on whether it is single-shot IQ data (three-dimensional) - or averaged IQ date (two-dimensional). + data: A data array to format. This is a single numpy array containing + all circuit results input to the data processor. Returns: - datum and any error estimate as a numpy array. + The data that has been validated and formatted. Raises: - DataProcessorError: If the datum does not have the correct format. + DataProcessorError: When input data is not likely IQ data. """ - datum = np.asarray(datum, dtype=float) - - if error is not None: - error = np.asarray(error, dtype=float) - if self._validate: - if len(datum.shape) not in {2, 3, 4}: - raise DataProcessorError( - f"IQ data given to {self.__class__.__name__} must be an N dimensional" - f"array with N in (2, 3, 4). Instead, a {len(datum.shape)}D array was given." - ) - - if error is not None and len(error.shape) not in {2, 3, 4}: - raise DataProcessorError( - f"IQ data error given to {self.__class__.__name__} must be an N dimensional" - f"array with N in (2, 3, 4). Instead, a {len(error.shape)}D array was given." - ) - - if error is not None and len(error.shape) != len(datum.shape): + if data.shape[-1] != 2: raise DataProcessorError( - "Datum and error do not have the same shape: " - f"{len(datum.shape)} != {len(error.shape)}." + f"IQ data given to {self.__class__.__name__} must be a multi-dimensional array" + "of dimension [d0, d1, ..., 2] in which the last dimension " + "corresponds to IQ elements." + f"Input data contains element with length {data.shape[-1]} != 2." ) - return datum, error + return data def __repr__(self): """String representation of the node.""" @@ -378,46 +347,31 @@ def __repr__(self): class ToReal(IQPart): """IQ data post-processing. Isolate the real part of single-shot IQ data.""" - def _process( - self, datum: np.array, error: Optional[np.array] = None - ) -> Tuple[np.array, np.array]: + def _process(self, data: np.ndarray) -> np.ndarray: """Take the real part of the IQ data. Args: - datum: An N dimensional array of shots, qubits, and a complex IQ point as - [real, imaginary]. - error: An N dimensional optional array of shots, qubits, and an error on a - complex IQ point as [real, imaginary]. + data: An N-dimensional array of complex IQ point as [real, imaginary]. Returns: - A N-1 dimensional array, each entry is the real part of the given IQ data and error. + A N-1 dimensional array, each entry is the real part of the given IQ data. """ - if error is not None: - return datum[..., 0] * self.scale, error[..., 0] * self.scale - else: - return datum[..., 0] * self.scale, None + return data[..., 0] * self.scale class ToImag(IQPart): """IQ data post-processing. Isolate the imaginary part of single-shot IQ data.""" - def _process(self, datum: np.array, error: Optional[np.array] = None) -> np.array: + def _process(self, data: np.ndarray) -> np.ndarray: """Take the imaginary part of the IQ data. Args: - datum: An N dimensional array of shots, qubits, and a complex IQ point as - [real, imaginary]. - error: An N dimensional optional array of shots, qubits, and an error on a - complex IQ point as [real, imaginary]. + data: An N-dimensional array of complex IQ point as [real, imaginary]. Returns: - A N-1 dimensional array, each entry is the imaginary part of the given IQ data - and error. + A N-1 dimensional array, each entry is the imaginary part of the given IQ data. """ - if error is not None: - return datum[..., 1] * self.scale, error[..., 1] * self.scale - else: - return datum[..., 1] * self.scale, None + return data[..., 1] * self.scale class Probability(DataAction): @@ -458,6 +412,8 @@ class Probability(DataAction): \text{E}[p] = \frac{F + 0.5}{N + 1}, \quad \text{Var}[p] = \frac{\text{E}[p] (1 - \text{E}[p])}{N + 2} + + This node will deprecate standard error provided by the previous node. """ def __init__( @@ -488,85 +444,76 @@ def __init__( "Prior for probability node must be a float or pair of floats." ) self._alpha_prior = list(alpha_prior) + super().__init__(validate) - def _format_data(self, datum: dict, error: Optional[Any] = None) -> Tuple[dict, Any]: + def _format_data(self, data: np.ndarray) -> np.ndarray: """ Checks that the given data has a counts format. Args: - datum: An instance of data the should be a dict with bit strings as keys - and counts as values. + data: A data array to format. This is a single numpy array containing + all circuit results input to the data processor. + This is usually an object data type containing Python dictionaries of + count data keyed on the measured bitstring. + A count value is a discrete quantity representing the frequency of an event. + Therefore, count values do not have an uncertainty. Returns: - The datum as given. + The ``data`` as given. Raises: - DataProcessorError: if the data is not a counts dict or a list of counts dicts. + DataProcessorError: If the data is not a counts dict or a list of counts dicts. """ - if self._validate: - - if isinstance(datum, dict): - data = [datum] - elif isinstance(datum, list): - data = datum - else: - raise DataProcessorError(f"Datum must be dict or list, received {type(datum)}.") + valid_count_type = int, np.integer - for datum_ in data: - if not isinstance(datum_, dict): + if self._validate: + for datum in data: + if not isinstance(datum, dict): raise DataProcessorError( - f"Given counts datum {datum_} to " - f"{self.__class__.__name__} is not a valid count format." + f"Data entry must be dictionary of counts, received {type(datum)}." ) - - for bit_str, count in datum_.items(): + for bit_str, count in datum.items(): if not isinstance(bit_str, str): raise DataProcessorError( - f"Key {bit_str} is not a valid count key in{self.__class__.__name__}." + f"Key {bit_str} is not a valid count key in {self.__class__.__name__}." ) - - if not isinstance(count, (int, float, np.integer)): + if not isinstance(count, valid_count_type): raise DataProcessorError( - f"Count {bit_str} is not a valid count value in {self.__class__.__name__}." + f"Count {bit_str} is not a valid count for {self.__class__.__name__}. " + "The uncertainty of probability is computed based on sampling error, " + "thus the count should be an error-free discrete quantity " + "representing the frequency of event." ) - return datum, None + return data + + def _process(self, data: np.ndarray) -> np.ndarray: + """Compute mean and standard error from the beta distribution. - def _process( - self, - datum: Union[Dict[str, Any], List[Dict[str, Any]]], - error: Optional[Union[Dict, List]] = None, - ) -> Union[Tuple[float, float], Tuple[np.array, np.array]]: - """ Args: - datum: The data dictionary,taking the data under counts and - adding the corresponding probabilities. + data: A data array to process. This is a single numpy array containing + all circuit results input to the data processor. + This is usually an object data type containing Python dictionaries of + count data keyed on the measured bitstring. Returns: - processed data: A dict with the populations and standard deviation. + The data that has been processed. """ - if isinstance(datum, dict): - return self._population_error(datum) - else: - populations, errors = [], [] + probabilities = np.empty(data.size, dtype=object) - for datum_ in datum: - pop, error = self._population_error(datum_) - populations.append(pop) - errors.append(error) + for idx, counts_dict in enumerate(data): + shots = sum(counts_dict.values()) + freq = counts_dict.get(self._outcome, 0) + alpha_posterior = [freq + self._alpha_prior[0], shots - freq + self._alpha_prior[1]] + alpha_sum = sum(alpha_posterior) - return np.array(populations), np.array(errors) + p_mean = alpha_posterior[0] / alpha_sum + p_var = p_mean * (1 - p_mean) / (alpha_sum + 1) - def _population_error(self, counts_dict: Dict[str, int]) -> Tuple[float, float]: - """Helper method""" - shots = sum(counts_dict.values()) - freq = counts_dict.get(self._outcome, 0) - alpha_posterior = [freq + self._alpha_prior[0], shots - freq + self._alpha_prior[1]] - alpha_sum = sum(alpha_posterior) - p_mean = alpha_posterior[0] / alpha_sum - p_var = p_mean * (1 - p_mean) / (alpha_sum + 1) - return p_mean, np.sqrt(p_var) + probabilities[idx] = ufloat(nominal_value=p_mean, std_dev=np.sqrt(p_var)) + + return probabilities class BasisExpectationValue(DataAction): @@ -576,40 +523,35 @@ class BasisExpectationValue(DataAction): The sign becomes P(0) -> 1, P(1) -> -1. """ - def _format_data( - self, datum: np.ndarray, error: Optional[np.ndarray] = None - ) -> Tuple[Any, Any]: - """Check that the input data are probabilities. + def _format_data(self, data: np.ndarray) -> np.ndarray: + """Format and validate the input. Args: - datum: An array representing probabilities. - error: An array representing error. + data: A data array to format. This is a single numpy array containing + all circuit results input to the data processor. Returns: - Arrays of probability and its error + The data that has been validated and formatted. Raises: DataProcessorError: When input value is not in [0, 1] """ - if not all(0.0 <= p <= 1.0 for p in datum): - raise DataProcessorError( - f"Input data for node {self.__class__.__name__} is not likely probability." - ) - return datum, error + if self._validate: + if not all(0.0 <= p <= 1.0 for p in data): + raise DataProcessorError( + f"Input data for node {self.__class__.__name__} is not likely probability." + ) + + return data - def _process( - self, datum: np.array, error: Optional[np.array] = None - ) -> Tuple[np.array, np.array]: - """Compute eigenvalue. + def _process(self, data: np.ndarray) -> np.ndarray: + """Compute basis eigenvalue. Args: - datum: An array representing probabilities. - error: An array representing error. + data: A data array to process. This is a single numpy array containing + all circuit results input to the data processor. Returns: - Arrays of eigenvalues and its error + The data that has been processed. """ - if error is not None: - return 2 * (0.5 - datum), 2 * error - else: - return 2 * (0.5 - datum), None + return 2 * (0.5 - data) diff --git a/releasenotes/notes/upgrade-data-processor-30204e10e1958c30.yaml b/releasenotes/notes/upgrade-data-processor-30204e10e1958c30.yaml new file mode 100644 index 0000000000..94de41cc90 --- /dev/null +++ b/releasenotes/notes/upgrade-data-processor-30204e10e1958c30.yaml @@ -0,0 +1,6 @@ +--- +developer: + - | + Data format used in the :py:class:`~qiskit_experiments.data_processing.data_processor.\ + DataProcessor` is upgraded from `Tuple[Any, Any]` to `np.ndarray`. Uncertainty propagation + computation is offloaded to uncertainties package. See module documentation for details. diff --git a/requirements.txt b/requirements.txt index 6380bdb153..39dd8dfc05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ scipy>=1.4 qiskit-terra>=0.18.0 qiskit-ibmq-provider>=0.16.0 matplotlib>=3.4 +uncertainties diff --git a/test/data_processing/test_data_processing.py b/test/data_processing/test_data_processing.py index a4771a159a..98aad43e54 100644 --- a/test/data_processing/test_data_processing.py +++ b/test/data_processing/test_data_processing.py @@ -17,6 +17,7 @@ from test.fake_experiment import FakeExperiment import numpy as np +from uncertainties import unumpy as unp, ufloat from qiskit.result.models import ExperimentResultData, ExperimentResult from qiskit.result import Result @@ -76,16 +77,153 @@ def setUp(self): self.exp_data_lvl2 = ExperimentData(FakeExperiment()) self.exp_data_lvl2.add_data(Result(results=[res1, res2], **self.base_result_args)) + def test_data_prep_level1_memory_single(self): + """Format meas_level=1 meas_return=single.""" + # slots = 3, shots = 2, circuits = 2 + data_raw = [ + { + "memory": [ + [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], + [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], + ], + }, + { + "memory": [ + [[0.7, 0.8], [0.9, 1.0], [1.1, 1.2]], + [[0.7, 0.8], [0.9, 1.0], [1.1, 1.2]], + ], + }, + ] + formatted_data = DataProcessor("memory", [])._data_extraction(data_raw) + + ref_data = np.array( + [ + [ + [ + [ufloat(0.1, np.nan), ufloat(0.2, np.nan)], + [ufloat(0.3, np.nan), ufloat(0.4, np.nan)], + [ufloat(0.5, np.nan), ufloat(0.6, np.nan)], + ], + [ + [ufloat(0.1, np.nan), ufloat(0.2, np.nan)], + [ufloat(0.3, np.nan), ufloat(0.4, np.nan)], + [ufloat(0.5, np.nan), ufloat(0.6, np.nan)], + ], + ], + [ + [ + [ufloat(0.7, np.nan), ufloat(0.8, np.nan)], + [ufloat(0.9, np.nan), ufloat(1.0, np.nan)], + [ufloat(1.1, np.nan), ufloat(1.2, np.nan)], + ], + [ + [ufloat(0.7, np.nan), ufloat(0.8, np.nan)], + [ufloat(0.9, np.nan), ufloat(1.0, np.nan)], + [ufloat(1.1, np.nan), ufloat(1.2, np.nan)], + ], + ], + ] + ) + + self.assertTupleEqual(formatted_data.shape, ref_data.shape) + np.testing.assert_array_equal( + unp.nominal_values(formatted_data), unp.nominal_values(ref_data) + ) + # note that np.nan cannot be evaluated by "==" + self.assertTrue(np.isnan(unp.std_devs(formatted_data)).all()) + + def test_data_prep_level1_memory_average(self): + """Format meas_level=1 meas_return=avg.""" + # slots = 3, circuits = 2 + data_raw = [ + { + "memory": [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], + }, + { + "memory": [[0.7, 0.8], [0.9, 1.0], [1.1, 1.2]], + }, + ] + formatted_data = DataProcessor("memory", [])._data_extraction(data_raw) + + ref_data = np.array( + [ + [ + [ufloat(0.1, np.nan), ufloat(0.2, np.nan)], + [ufloat(0.3, np.nan), ufloat(0.4, np.nan)], + [ufloat(0.5, np.nan), ufloat(0.6, np.nan)], + ], + [ + [ufloat(0.7, np.nan), ufloat(0.8, np.nan)], + [ufloat(0.9, np.nan), ufloat(1.0, np.nan)], + [ufloat(1.1, np.nan), ufloat(1.2, np.nan)], + ], + ] + ) + + self.assertTupleEqual(formatted_data.shape, ref_data.shape) + np.testing.assert_array_equal( + unp.nominal_values(formatted_data), unp.nominal_values(ref_data) + ) + # note that np.nan cannot be evaluated by "==" + self.assertTrue(np.isnan(unp.std_devs(formatted_data)).all()) + + def test_data_prep_level2_counts(self): + """Format meas_level=2.""" + # slots = 2, shots=10, circuits = 2 + data_raw = [ + { + "counts": {"00": 2, "01": 3, "10": 1, "11": 4}, + }, + { + "counts": {"00": 3, "01": 3, "10": 2, "11": 2}, + }, + ] + formatted_data = DataProcessor("counts", [])._data_extraction(data_raw) + + ref_data = np.array( + [ + {"00": 2, "01": 3, "10": 1, "11": 4}, + {"00": 3, "01": 3, "10": 2, "11": 2}, + ], + dtype=object, + ) + + np.testing.assert_array_equal(formatted_data, ref_data) + + def test_data_prep_level2_counts_memory(self): + """Format meas_level=2 with having memory set.""" + # slots = 2, shots=10, circuits = 2 + data_raw = [ + { + "counts": {"00": 2, "01": 3, "10": 1, "11": 4}, + "memory": ["00", "01", "01", "10", "11", "11", "00", "01", "11", "11"], + }, + { + "counts": {"00": 3, "01": 3, "10": 2, "11": 2}, + "memory": ["00", "00", "01", "00", "10", "01", "01", "11", "10", "11"], + }, + ] + formatted_data = DataProcessor("memory", [])._data_extraction(data_raw) + + ref_data = np.array( + [ + ["00", "01", "01", "10", "11", "11", "00", "01", "11", "11"], + ["00", "00", "01", "00", "10", "01", "01", "11", "10", "11"], + ], + dtype=object, + ) + + np.testing.assert_array_equal(formatted_data, ref_data) + def test_empty_processor(self): """Check that a DataProcessor without steps does nothing.""" data_processor = DataProcessor("counts") - datum, error = data_processor(self.exp_data_lvl2.data(0)) - self.assertEqual(datum, [{"00": 4, "10": 6}]) - self.assertIsNone(error) + datum = data_processor(self.exp_data_lvl2.data(0)) + self.assertEqual(datum, {"00": 4, "10": 6}) - datum, error, history = data_processor.call_with_history(self.exp_data_lvl2.data(0)) - self.assertEqual(datum, [{"00": 4, "10": 6}]) + datum, history = data_processor.call_with_history(self.exp_data_lvl2.data(0)) + self.assertEqual(datum, {"00": 4, "10": 6}) self.assertEqual(history, []) def test_to_real(self): @@ -96,7 +234,7 @@ def test_to_real(self): exp_data.add_data(self.result_lvl1) # Test to real on a single datum - new_data, error = processor(exp_data.data(0)) + new_data = processor(exp_data.data(0)) expected_old = { "memory": [ @@ -113,20 +251,29 @@ def test_to_real(self): expected_new = np.array([[1103.26, 2959.012], [442.17, -5279.41], [3016.514, -3404.7560]]) self.assertEqual(exp_data.data(0), expected_old) - self.assertTrue(np.allclose(new_data, expected_new)) - self.assertIsNone(error) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + expected_new, + ) + self.assertTrue(np.isnan(unp.std_devs(new_data)).all()) # Test that we can call with history. - new_data, error, history = processor.call_with_history(exp_data.data(0)) + new_data, history = processor.call_with_history(exp_data.data(0)) self.assertEqual(exp_data.data(0), expected_old) - self.assertTrue(np.allclose(new_data, expected_new)) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + expected_new, + ) self.assertEqual(history[0][0], "ToReal") - self.assertTrue(np.allclose(history[0][1], expected_new)) + np.testing.assert_array_almost_equal( + unp.nominal_values(history[0][1]), + expected_new, + ) # Test to real on more than one datum - new_data, error = processor(exp_data.data()) + new_data = processor(exp_data.data()) expected_new = np.array( [ @@ -134,8 +281,10 @@ def test_to_real(self): [[5131.962, 4438.87], [3415.985, 2942.458], [5199.964, 4030.843]], ] ) - - self.assertTrue(np.allclose(new_data, expected_new)) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + expected_new, + ) def test_to_imag(self): """Test that we can average the data.""" @@ -145,7 +294,7 @@ def test_to_imag(self): exp_data = ExperimentData(FakeExperiment()) exp_data.add_data(self.result_lvl1) - new_data, error = processor(exp_data.data(0)) + new_data = processor(exp_data.data(0)) expected_old = { "memory": [ @@ -168,19 +317,28 @@ def test_to_imag(self): ) self.assertEqual(exp_data.data(0), expected_old) - self.assertTrue(np.allclose(new_data, expected_new)) - self.assertIsNone(error) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + expected_new, + ) + self.assertTrue(np.isnan(unp.std_devs(new_data)).all()) # Test that we can call with history. - new_data, error, history = processor.call_with_history(exp_data.data(0)) + new_data, history = processor.call_with_history(exp_data.data(0)) self.assertEqual(exp_data.data(0), expected_old) - self.assertTrue(np.allclose(new_data, expected_new)) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + expected_new, + ) self.assertEqual(history[0][0], "ToImag") - self.assertTrue(np.allclose(history[0][1], expected_new)) + np.testing.assert_array_almost_equal( + unp.nominal_values(history[0][1]), + expected_new, + ) # Test to imaginary on more than one datum - new_data, error = processor(exp_data.data()) + new_data = processor(exp_data.data()) expected_new = np.array( [ @@ -189,7 +347,10 @@ def test_to_imag(self): ] ) - self.assertTrue(np.allclose(new_data, expected_new)) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + expected_new, + ) def test_populations(self): """Test that counts are properly converted to a population.""" @@ -198,14 +359,17 @@ def test_populations(self): processor.append(Probability("00", alpha_prior=1.0)) # Test on a single datum. - new_data, error = processor(self.exp_data_lvl2.data(0)) + new_data = processor(self.exp_data_lvl2.data(0)) - self.assertAlmostEqual(float(new_data), 0.41666667) - self.assertAlmostEqual(float(error), 0.13673544235706114) + self.assertAlmostEqual(float(unp.nominal_values(new_data)), 0.41666667) + self.assertAlmostEqual(float(unp.std_devs(new_data)), 0.13673544235706114) # Test on all the data - new_data, error = processor(self.exp_data_lvl2.data()) - np.testing.assert_array_almost_equal(new_data, np.array([0.41666667, 0.25])) + new_data = processor(self.exp_data_lvl2.data()) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + np.array([0.41666667, 0.25]), + ) def test_validation(self): """Test the validation mechanism.""" @@ -267,7 +431,7 @@ def test_avg_and_single(self): to_imag = DataProcessor("memory", [ToImag(scale=1)]) # Test the real single shot node - new_data, error = to_real(self.exp_data_single.data(0)) + new_data = to_real(self.exp_data_single.data(0)) expected = np.array( [ [-56470872.0, -53407256.0], @@ -278,11 +442,14 @@ def test_avg_and_single(self): [51426688.0, 34330920.0], ] ) - self.assertTrue(np.allclose(new_data, expected)) - self.assertIsNone(error) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + expected, + ) + self.assertTrue(np.isnan(unp.std_devs(new_data)).all()) # Test the imaginary single shot node - new_data, error = to_imag(self.exp_data_single.data(0)) + new_data = to_imag(self.exp_data_single.data(0)) expected = np.array( [ [-136691568.0, -176278624.0], @@ -293,15 +460,24 @@ def test_avg_and_single(self): [-142703104.0, -185572592.0], ] ) - self.assertTrue(np.allclose(new_data, expected)) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + expected, + ) # Test the real average node - new_data, error = to_real(self.exp_data_avg.data(0)) - self.assertTrue(np.allclose(new_data, np.array([-539698.0, 5541283.0]))) + new_data = to_real(self.exp_data_avg.data(0)) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + np.array([-539698.0, 5541283.0]), + ) # Test the imaginary average node - new_data, error = to_imag(self.exp_data_avg.data(0)) - self.assertTrue(np.allclose(new_data, np.array([-153030784.0, -160369600.0]))) + new_data = to_imag(self.exp_data_avg.data(0)) + np.testing.assert_array_almost_equal( + unp.nominal_values(new_data), + np.array([-153030784.0, -160369600.0]), + ) class TestAveragingAndSVD(BaseDataProcessorTest): @@ -401,18 +577,28 @@ def test_averaging(self): processor = DataProcessor("memory", [AverageData(axis=1)]) # Test that we get the expected outcome for the excited state - processed, error = processor(self.data.data(0)) - expected_avg = np.array([[1.0, 1.0], [-1.0, 1.0]]) - expected_std = np.array([[0.15811388300841894, 0.1], [0.15811388300841894, 0.0]]) / 2.0 - self.assertTrue(np.allclose(processed, expected_avg)) - self.assertTrue(np.allclose(error, expected_std)) + processed = processor(self.data.data(0)) + + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + np.array([[1.0, 1.0], [-1.0, 1.0]]), + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed), + np.array([[0.15811388300841894, 0.1], [0.15811388300841894, 0.0]]) / 2.0, + ) # Test that we get the expected outcome for the ground state - processed, error = processor(self.data.data(1)) - expected_avg = np.array([[-1.0, -1.0], [1.0, -1.0]]) - expected_std = np.array([[0.15811388300841894, 0.1], [0.15811388300841894, 0.0]]) / 2.0 - self.assertTrue(np.allclose(processed, expected_avg)) - self.assertTrue(np.allclose(error, expected_std)) + processed = processor(self.data.data(1)) + + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + np.array([[-1.0, -1.0], [1.0, -1.0]]), + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed), + np.array([[0.15811388300841894, 0.1], [0.15811388300841894, 0.0]]) / 2.0, + ) def test_averaging_and_svd(self): """Test averaging followed by a SVD.""" @@ -425,23 +611,40 @@ def test_averaging_and_svd(self): self.assertTrue(processor.is_trained) # Test the excited state - processed, error = processor(self.data.data(0)) - self.assertTrue(np.allclose(processed, self._sig_es)) + processed = processor(self.data.data(0)) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + self._sig_es, + ) # Test the ground state - processed, error = processor(self.data.data(1)) - self.assertTrue(np.allclose(processed, self._sig_gs)) + processed = processor(self.data.data(1)) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + self._sig_gs, + ) # Test the x90p rotation - processed, error = processor(self.data.data(2)) - self.assertTrue(np.allclose(processed, self._sig_x90)) - self.assertTrue(np.allclose(error, np.array([0.25, 0.25]))) + processed = processor(self.data.data(2)) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + self._sig_x90, + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed), + np.array([0.25, 0.25]), + ) # Test the x45p rotation - processed, error = processor(self.data.data(3)) - expected_std = np.array([np.std([1, 1, 1, -1]) / np.sqrt(4.0) / 2] * 2) - self.assertTrue(np.allclose(processed, self._sig_x45)) - self.assertTrue(np.allclose(error, expected_std)) + processed = processor(self.data.data(3)) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + self._sig_x45, + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed), + np.array([np.std([1, 1, 1, -1]) / np.sqrt(4.0) / 2] * 2), + ) def test_process_all_data(self): """Test that we can process all data at once.""" @@ -463,13 +666,19 @@ def test_process_all_data(self): ) # Test processing of all data - processed = processor(self.data.data())[0] - self.assertTrue(np.allclose(processed, all_expected)) + processed = processor(self.data.data()) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + all_expected, + ) # Test processing of each datum individually for idx, expected in enumerate([self._sig_es, self._sig_gs, self._sig_x90, self._sig_x45]): - processed = processor(self.data.data(idx))[0] - self.assertTrue(np.allclose(processed, expected)) + processed = processor(self.data.data(idx)) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + expected, + ) def test_normalize(self): """Test that by adding a normalization node we get a signal between 1 and 1.""" @@ -480,11 +689,36 @@ def test_normalize(self): processor.train([self.data.data(idx) for idx in [0, 1]]) self.assertTrue(processor.is_trained) - all_expected = np.array([[0.0, 1.0], [1.0, 0.0], [0.5, 0.5], [0.75, 0.25]]) - # Test processing of all data - processed = processor(self.data.data())[0] - self.assertTrue(np.allclose(processed, all_expected)) + processed = processor(self.data.data()) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + np.array([[0.0, 1.0], [1.0, 0.0], [0.5, 0.5], [0.75, 0.25]]), + ) + + def test_distorted_iq_data(self): + """Test if uncertainty can consider correlation. + + SVD projects IQ data onto I-axis, and input different data sets that + have the same mean and same variance but squeezed along different axis. + """ + svd_node = SVD() + svd_node._scales = [1.0] + svd_node._main_axes = [np.array([1, 0])] + svd_node._means = [(0.0, 0.0)] + + processor = DataProcessor("memory", [AverageData(axis=1), svd_node]) + + dist_i_axis = {"memory": [[[-1, 0]], [[-0.5, 0]], [[0.0, 0]], [[0.5, 0]], [[1, 0]]]} + dist_q_axis = {"memory": [[[0, -1]], [[0, -0.5]], [[0, 0.0]], [[0, 0.5]], [[0, 1]]]} + + out_i = processor(dist_i_axis) + self.assertAlmostEqual(out_i[0].nominal_value, 0.0) + self.assertAlmostEqual(out_i[0].std_dev, 0.31622776601683794) + + out_q = processor(dist_q_axis) + self.assertAlmostEqual(out_q[0].nominal_value, 0.0) + self.assertAlmostEqual(out_q[0].std_dev, 0.0) class TestAvgDataAndSVD(BaseDataProcessorTest): @@ -559,8 +793,9 @@ def test_normalize(self): processor.train([self.data.data(idx) for idx in [0, 1]]) self.assertTrue(processor.is_trained) - all_expected = np.array([[0.0, 1.0], [1.0, 0.0], [0.5, 0.5], [0.75, 0.25]]) - # Test processing of all data - processed = processor(self.data.data())[0] - self.assertTrue(np.allclose(processed, all_expected)) + processed = processor(self.data.data()) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed), + np.array([[0.0, 1.0], [1.0, 0.0], [0.5, 0.5], [0.75, 0.25]]), + ) diff --git a/test/data_processing/test_nodes.py b/test/data_processing/test_nodes.py index 11cdcbbed1..2fb520c188 100644 --- a/test/data_processing/test_nodes.py +++ b/test/data_processing/test_nodes.py @@ -12,10 +12,10 @@ """Data processor tests.""" -# pylint: disable=unbalanced-tuple-unpacking from test.base import QiskitExperimentsTestCase -import numpy as np +import numpy as np +from uncertainties import unumpy as unp from qiskit_experiments.data_processing.nodes import ( SVD, @@ -23,8 +23,6 @@ MinMaxNormalize, Probability, ) -from qiskit_experiments.data_processing.data_processor import DataProcessor - from . import BaseDataProcessorTest @@ -32,49 +30,111 @@ class TestAveraging(BaseDataProcessorTest): """Test the averaging nodes.""" def test_simple(self): - """Simple test of averaging.""" - - datum = np.array([[1, 2], [3, 4], [5, 6]]) + """Simple test of averaging. Standard error of mean is generated.""" + datum = unp.uarray([[1, 2], [3, 4], [5, 6]], np.full((3, 2), np.nan)) node = AverageData(axis=1) - self.assertTrue(np.allclose(node(datum)[0], np.array([1.5, 3.5, 5.5]))) - self.assertTrue(np.allclose(node(datum)[1], np.array([0.5, 0.5, 0.5]) / np.sqrt(2))) + processed_data = node(data=datum) + + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + np.array([1.5, 3.5, 5.5]), + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed_data), + np.array([0.5, 0.5, 0.5]) / np.sqrt(2), + ) node = AverageData(axis=0) - self.assertTrue(np.allclose(node(datum)[0], np.array([3.0, 4.0]))) - std = np.std([1, 3, 5]) - self.assertTrue(np.allclose(node(datum)[1], np.array([std, std]) / np.sqrt(3))) + processed_data = node(data=datum) + + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + np.array([3.0, 4.0]), + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed_data), + np.array([1.632993161855452, 1.632993161855452]) / np.sqrt(3), + ) + + def test_with_error(self): + """Compute error propagation. This is quadratic sum divided by samples.""" + datum = unp.uarray( + [[1, 2, 3, 4, 5, 6]], + [[0.1, 0.2, 0.3, 0.4, 0.5, 0.6]], + ) + + node = AverageData(axis=1) + processed_data = node(data=datum) + + self.assertAlmostEqual(processed_data[0].nominal_value, 3.5) + # sqrt(0.1**2 + 0.2**2 + ... + 0.6**2) / 6 + self.assertAlmostEqual(processed_data[0].std_dev, 0.15898986690282427) + + def test_with_error_partly_non_error(self): + """Compute error propagation. Some elements have no error.""" + datum = unp.uarray( + [ + [1, 2, 3, 4, 5, 6], + [1, 2, 3, 4, 5, 6], + ], + [ + [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], + [np.nan, 0.2, 0.3, 0.4, 0.5, 0.6], + ], + ) + + node = AverageData(axis=1) + processed_data = node(data=datum) + + self.assertAlmostEqual(processed_data[0].nominal_value, 3.5) + # sqrt(0.1**2 + 0.2**2 + ... + 0.6**2) / 6 + self.assertAlmostEqual(processed_data[0].std_dev, 0.15898986690282427) + + self.assertAlmostEqual(processed_data[1].nominal_value, 3.5) + # sqrt((0.1 - 0.35)**2 + (0.2 - 0.35)**2 + ... + (0.6 - 0.35)**2) / 6 + self.assertAlmostEqual(processed_data[1].std_dev, 0.6972166887783964) def test_iq_averaging(self): """Test averaging of IQ-data.""" - iq_data = [ - [[-6.20601501e14, -1.33257051e15], [-1.70921324e15, -4.05881657e15]], - [[-5.80546502e14, -1.33492509e15], [-1.65094637e15, -4.05926942e15]], - [[-4.04649069e14, -1.33191056e15], [-1.29680377e15, -4.03604815e15]], - [[-2.22203874e14, -1.30291309e15], [-8.57663429e14, -3.97784973e15]], - [[-2.92074029e13, -1.28578530e15], [-9.78824053e13, -3.92071056e15]], - [[1.98056981e14, -1.26883024e15], [3.77157017e14, -3.87460328e15]], - [[4.29955888e14, -1.25022995e15], [1.02340118e15, -3.79508679e15]], - [[6.38981344e14, -1.25084614e15], [1.68918514e15, -3.78961044e15]], - [[7.09988897e14, -1.21906634e15], [1.91914171e15, -3.73670664e15]], - [[7.63169115e14, -1.20797552e15], [2.03772603e15, -3.74653863e15]], - ] - - self.create_experiment(iq_data, single_shot=True) + iq_data = np.array( + [ + [[-6.20601501e14, -1.33257051e15], [-1.70921324e15, -4.05881657e15]], + [[-5.80546502e14, -1.33492509e15], [-1.65094637e15, -4.05926942e15]], + [[-4.04649069e14, -1.33191056e15], [-1.29680377e15, -4.03604815e15]], + [[-2.22203874e14, -1.30291309e15], [-8.57663429e14, -3.97784973e15]], + [[-2.92074029e13, -1.28578530e15], [-9.78824053e13, -3.92071056e15]], + [[1.98056981e14, -1.26883024e15], [3.77157017e14, -3.87460328e15]], + [[4.29955888e14, -1.25022995e15], [1.02340118e15, -3.79508679e15]], + [[6.38981344e14, -1.25084614e15], [1.68918514e15, -3.78961044e15]], + [[7.09988897e14, -1.21906634e15], [1.91914171e15, -3.73670664e15]], + [[7.63169115e14, -1.20797552e15], [2.03772603e15, -3.74653863e15]], + ], + dtype=float, + ) + iq_std = np.full_like(iq_data, np.nan) + + self.create_experiment(unp.uarray(iq_data, iq_std), single_shot=True) avg_iq = AverageData(axis=0) - - avg_datum, error = avg_iq(self.iq_experiment.data(0)["memory"]) + processed_data = avg_iq(data=np.asarray(self.iq_experiment.data(0)["memory"])) expected_avg = np.array([[8.82943876e13, -1.27850527e15], [1.43410186e14, -3.89952402e15]]) - expected_std = np.array( [[5.07650185e14, 4.44664719e13], [1.40522641e15, 1.22326831e14]] ) / np.sqrt(10) - self.assertTrue(np.allclose(avg_datum, expected_avg)) - self.assertTrue(np.allclose(error, expected_std)) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + expected_avg, + decimal=-8, + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed_data), + expected_std, + decimal=-8, + ) class TestNormalize(QiskitExperimentsTestCase): @@ -91,9 +151,21 @@ def test_simple(self): node = MinMaxNormalize() - self.assertTrue(np.allclose(node(data)[0], expected_data)) - self.assertTrue(np.allclose(node(data, error)[0], expected_data)) - self.assertTrue(np.allclose(node(data, error)[1], expected_error)) + processed_data = node(data=data) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + expected_data, + ) + + processed_data = node(data=unp.uarray(nominal_values=data, std_devs=error)) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + expected_data, + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed_data), + expected_error, + ) class TestSVD(BaseDataProcessorTest): @@ -104,37 +176,42 @@ def test_simple_data(self): A simple setting where the IQ data of qubit 0 is oriented along (1,1) and the IQ data of qubit 1 is oriented along (1,-1). """ - iq_data = [[[0.0, 0.0], [0.0, 0.0]], [[1.0, 1.0], [-1.0, 1.0]], [[-1.0, -1.0], [1.0, -1.0]]] self.create_experiment(iq_data) iq_svd = SVD() - iq_svd.train([datum["memory"] for datum in self.iq_experiment.data()]) + iq_svd.train(np.asarray([datum["memory"] for datum in self.iq_experiment.data()])) # qubit 0 IQ data is oriented along (1,1) - self.assertTrue(np.allclose(iq_svd._main_axes[0], np.array([-1, -1]) / np.sqrt(2))) + np.testing.assert_array_almost_equal(iq_svd._main_axes[0], np.array([-1, -1]) / np.sqrt(2)) # qubit 1 IQ data is oriented along (1, -1) - self.assertTrue(np.allclose(iq_svd._main_axes[1], np.array([-1, 1]) / np.sqrt(2))) - - # Note: input data shape [n_circs, n_slots, n_iq] for avg mode simulation - - processed, _ = iq_svd(np.array([[[1, 1], [1, -1]]])) - expected = np.array([-1, -1]) / np.sqrt(2) - self.assertTrue(np.allclose(processed, expected)) - - processed, _ = iq_svd(np.array([[[2, 2], [2, -2]]])) - self.assertTrue(np.allclose(processed, expected * 2)) + np.testing.assert_array_almost_equal(iq_svd._main_axes[1], np.array([-1, 1]) / np.sqrt(2)) + + # This is n_circuit = 1, n_slot = 2, the input shape should be [1, 2, 2] + # Then the output shape will be [1, 2] by reducing the last dimension + processed_data = iq_svd(np.array([[[1, 1], [1, -1]]])) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + np.array([[-1, -1]]) / np.sqrt(2), + ) + + processed_data = iq_svd(np.array([[[2, 2], [2, -2]]])) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + 2 * np.array([[-1, -1]]) / np.sqrt(2), + ) # Check that orthogonal data gives 0. - processed, _ = iq_svd(np.array([[[1, -1], [1, 1]]])) - expected = np.array([0, 0]) - self.assertTrue(np.allclose(processed, expected)) + processed_data = iq_svd(np.array([[[1, -1], [1, 1]]])) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + np.array([[0, 0]]), + ) def test_svd(self): """Use IQ data gathered from the hardware.""" - # This data is primarily oriented along the real axis with a slight tilt. # There is a large offset in the imaginary dimension when comparing qubits # 0 and 1. @@ -154,13 +231,19 @@ def test_svd(self): self.create_experiment(iq_data) iq_svd = SVD() - iq_svd.train([datum["memory"] for datum in self.iq_experiment.data()]) + iq_svd.train(np.asarray([datum["memory"] for datum in self.iq_experiment.data()])) - self.assertTrue(np.allclose(iq_svd._main_axes[0], np.array([-0.99633018, -0.08559302]))) - self.assertTrue(np.allclose(iq_svd._main_axes[1], np.array([-0.99627747, -0.0862044]))) + np.testing.assert_array_almost_equal( + iq_svd._main_axes[0], np.array([-0.99633018, -0.08559302]) + ) + np.testing.assert_array_almost_equal( + iq_svd._main_axes[1], np.array([-0.99627747, -0.0862044]) + ) def test_svd_error(self): """Test the error formula of the SVD.""" + # This is n_circuit = 1, n_slot = 1, the input shape should be [1, 1, 2] + # Then the output shape will be [1, 1] by reducing the last dimension iq_svd = SVD() iq_svd._main_axes = np.array([[1.0, 0.0]]) @@ -168,45 +251,29 @@ def test_svd_error(self): iq_svd._means = [[0.0, 0.0]] # Since the axis is along the real part the imaginary error is irrelevant. - processed, error = iq_svd([[[1.0, 0.2]]], [[[0.2, 0.1]]]) - self.assertEqual(processed, np.array([1.0])) - self.assertEqual(error, np.array([0.2])) + processed_data = iq_svd(unp.uarray(nominal_values=[[[1.0, 0.2]]], std_devs=[[[0.2, 0.1]]])) + np.testing.assert_array_almost_equal(unp.nominal_values(processed_data), np.array([[1.0]])) + np.testing.assert_array_almost_equal(unp.std_devs(processed_data), np.array([[0.2]])) # Since the axis is along the real part the imaginary error is irrelevant. - processed, error = iq_svd([[[1.0, 0.2]]], [[[0.2, 0.3]]]) - self.assertEqual(processed, np.array([1.0])) - self.assertEqual(error, np.array([0.2])) + processed_data = iq_svd(unp.uarray(nominal_values=[[[1.0, 0.2]]], std_devs=[[[0.2, 0.3]]])) + np.testing.assert_array_almost_equal(unp.nominal_values(processed_data), np.array([[1.0]])) + np.testing.assert_array_almost_equal(unp.std_devs(processed_data), np.array([[0.2]])) # Tilt the axis to an angle of 36.9... degrees iq_svd._main_axes = np.array([[0.8, 0.6]]) - processed, error = iq_svd([[[1.0, 0.0]]], [[[0.2, 0.3]]]) + + processed_data = iq_svd(unp.uarray(nominal_values=[[[1.0, 0.0]]], std_devs=[[[0.2, 0.3]]])) cos_ = np.cos(np.arctan(0.6 / 0.8)) sin_ = np.sin(np.arctan(0.6 / 0.8)) - self.assertEqual(processed, np.array([cos_])) - expected_error = np.sqrt((0.2 * cos_) ** 2 + (0.3 * sin_) ** 2) - self.assertEqual(error, np.array([expected_error])) - - def test_train_svd_processor(self): - """Test that we can train a DataProcessor with an SVD.""" - - processor = DataProcessor("memory", [SVD()]) - - self.assertFalse(processor.is_trained) - - iq_data = [[[0.0, 0.0], [0.0, 0.0]], [[1.0, 1.0], [-1.0, 1.0]], [[-1.0, -1.0], [1.0, -1.0]]] - self.create_experiment(iq_data) - - processor.train(self.iq_experiment.data()) - - self.assertTrue(processor.is_trained) - - # Check that we can use the SVD - iq_data = [[[2, 2], [2, -2]]] - self.create_experiment(iq_data) - - processed, _ = processor(self.iq_experiment.data(0)) - expected = np.array([-2, -2]) / np.sqrt(2) - self.assertTrue(np.allclose(processed, expected)) + np.testing.assert_array_almost_equal( + unp.nominal_values(processed_data), + np.array([[cos_]]), + ) + np.testing.assert_array_almost_equal( + unp.std_devs(processed_data), + np.array([[np.sqrt((0.2 * cos_) ** 2 + (0.3 * sin_) ** 2)]]), + ) class TestProbability(QiskitExperimentsTestCase): @@ -217,14 +284,14 @@ def test_variance_not_zero(self): node = Probability(outcome="1") data = {"1": 1024, "0": 0} - mode, stderr = node(data) - self.assertGreater(stderr, 0.0) - self.assertLessEqual(mode, 1.0) + processed_data = node(data=np.asarray([data])) + self.assertGreater(unp.std_devs(processed_data), 0.0) + self.assertLessEqual(unp.nominal_values(processed_data), 1.0) data = {"1": 0, "0": 1024} - mode, stderr = node(data) - self.assertGreater(stderr, 0.0) - self.assertGreaterEqual(mode, 0.0) + processed_data = node(data=np.asarray([data])) + self.assertGreater(unp.std_devs(processed_data), 0.0) + self.assertGreater(unp.nominal_values(processed_data), 0.0) def test_probability_balanced(self): """Test if p=0.5 is returned when counts are balanced and prior is flat.""" @@ -232,5 +299,5 @@ def test_probability_balanced(self): # balanced counts with a flat prior will yield p = 0.5 data = {"1": 512, "0": 512} - mode, _ = node(data) - self.assertAlmostEqual(mode, 0.5) + processed_data = node(data=np.asarray([data])) + self.assertAlmostEqual(unp.nominal_values(processed_data), 0.5)