diff --git a/qiskit_experiments/data_processing/__init__.py b/qiskit_experiments/data_processing/__init__.py new file mode 100644 index 0000000000..d6866829d4 --- /dev/null +++ b/qiskit_experiments/data_processing/__init__.py @@ -0,0 +1,22 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Qiskit experiments calibration data processing roots.""" + +from .data_action import DataAction +from .nodes import ( + Probability, + ToImag, + ToReal, +) + +from .data_processor import DataProcessor diff --git a/qiskit_experiments/data_processing/data_action.py b/qiskit_experiments/data_processing/data_action.py new file mode 100644 index 0000000000..ff08ff9f2c --- /dev/null +++ b/qiskit_experiments/data_processing/data_action.py @@ -0,0 +1,75 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Defines the steps that can be used to analyse data.""" + +from abc import ABCMeta, abstractmethod +from typing import Any + + +class DataAction(metaclass=ABCMeta): + """ + Abstract action done on measured data to process it. Each subclass of DataAction must + define the way it formats, validates and processes data. + """ + + def __init__(self, validate: bool = True): + """ + Args: + validate: If set to False the DataAction will not validate its input. + """ + self._validate = validate + + @abstractmethod + def _process(self, datum: Any) -> Any: + """ + Applies the data processing step to the datum. + + Args: + datum: A single item of data which will be processed. + + Returns: + processed data: The data that has been processed. + """ + + @abstractmethod + def _format_data(self, datum: Any) -> Any: + """ + Check that the given data has the correct structure. This method may + additionally change the data type, e.g. converting a list to a numpy array. + + Args: + datum: The data instance to check and format. + + Returns: + datum: The data that was checked. + + Raises: + DataProcessorError: If the data does not have the proper format. + """ + + def __call__(self, data: Any) -> Any: + """ + Call the data action of this node on the data. + + Args: + data: The data to process. The action nodes in the data processor will + raise errors if the data does not have the appropriate format. + + Returns: + processed data: The data processed by self. + """ + return self._process(self._format_data(data)) + + def __repr__(self): + """String representation of the node.""" + return f"{self.__class__.__name__}(validate={self._validate})" diff --git a/qiskit_experiments/data_processing/data_processor.py b/qiskit_experiments/data_processing/data_processor.py new file mode 100644 index 0000000000..374751c36b --- /dev/null +++ b/qiskit_experiments/data_processing/data_processor.py @@ -0,0 +1,131 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Actions done on the data to bring it in a usable form.""" + +from typing import Any, Dict, List, Set, Tuple, Union + +from qiskit_experiments.data_processing.data_action import DataAction +from qiskit_experiments.data_processing.exceptions import DataProcessorError + + +class DataProcessor: + """ + A DataProcessor defines a sequence of operations to perform on experimental data. + Calling an instance of DataProcessor applies this sequence on the input argument. + A DataProcessor is created with a list of DataAction instances. Each DataAction + applies its _process method on the data and returns the processed data. The nodes + in the DataProcessor may also perform data validation and some minor formatting. + The output of one data action serves as input for the next data action. + DataProcessor.__call__(datum) usually takes in an entry from the data property of + an ExperimentData object (i.e. a dict containing metadata and memory keys and + possibly counts, like the Result.data property) and produces the formatted data. + DataProcessor.__call__(datum) extracts the data from the given datum under + DataProcessor._input_key (which is specified at initialization) of the given datum. + """ + + def __init__(self, input_key: str, data_actions: List[DataAction] = None): + """Create a chain of data processing actions. + + Args: + input_key: The initial key in the datum Dict[str, Any] under which the data processor + will find the data to process. + data_actions: A list of data processing actions to construct this data processor with. + If None is given an empty DataProcessor will be created. + """ + self._input_key = input_key + self._nodes = data_actions if data_actions else [] + + def append(self, node: DataAction): + """ + Append new data action node to this data processor. + + Args: + node: A DataAction that will process the data. + """ + self._nodes.append(node) + + def __call__(self, datum: Dict[str, Any]) -> Any: + """ + Call self on the given datum. This method sequentially calls the stored data actions + on the datum. + + Args: + datum: A single item of data, typically from an ExperimentData instance, that needs + to be processed. This dict also contains the metadata of each experiment. + + Returns: + processed data: The data processed by the data processor. + """ + return self._call_internal(datum, False) + + def call_with_history( + self, datum: Dict[str, Any], history_nodes: Set = None + ) -> Tuple[Any, List]: + """ + Call self on the given datum. This method sequentially calls the stored data actions + on the datum and also returns the history of the processed data. + + Args: + datum: A single item of data, typically from an ExperimentData instance, that + needs to be processed. + history_nodes: The nodes, specified by index in the data processing chain, to + include in the history. If None is given then all nodes will be included + in the history. + + Returns: + processed data: The datum processed by the data processor. + history: The datum processed at each node of the data processor. + """ + return self._call_internal(datum, True, history_nodes) + + def _call_internal( + self, datum: Dict[str, Any], with_history: bool, history_nodes: Set = None + ) -> Union[Any, Tuple[Any, List]]: + """ + Internal function to process the data with or with storing the history of the computation. + + Args: + datum: A single item of data, typically from an ExperimentData instance, that + needs to be processed. + with_history: if True the history is returned otherwise it is not. + history_nodes: The nodes, specified by index in the data processing chain, to + include in the history. If None is given then all nodes will be included + in the history. + + Returns: + datum_ and history if with_history is True or datum_ if with_history is False. + + Raises: + DataProcessorError: If the input key of the data processor is not contained in datum. + """ + + if self._input_key not in datum: + raise DataProcessorError( + f"The input key {self._input_key} was not found in the input datum." + ) + + datum_ = datum[self._input_key] + + history = [] + for index, node in enumerate(self._nodes): + datum_ = node(datum_) + + if with_history and ( + history_nodes is None or (history_nodes and index in history_nodes) + ): + history.append((node.__class__.__name__, datum_, index)) + + if with_history: + return datum_, history + else: + return datum_ diff --git a/qiskit_experiments/data_processing/exceptions.py b/qiskit_experiments/data_processing/exceptions.py new file mode 100644 index 0000000000..412a3ace60 --- /dev/null +++ b/qiskit_experiments/data_processing/exceptions.py @@ -0,0 +1,19 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Exceptions for data processing.""" + +from qiskit.exceptions import QiskitError + + +class DataProcessorError(QiskitError): + """Errors raised by the data processing module.""" diff --git a/qiskit_experiments/data_processing/nodes.py b/qiskit_experiments/data_processing/nodes.py new file mode 100644 index 0000000000..5b0cda418b --- /dev/null +++ b/qiskit_experiments/data_processing/nodes.py @@ -0,0 +1,173 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Different data analysis steps.""" + +from abc import abstractmethod +from typing import Any, Dict, Optional, Tuple +import numpy as np + +from qiskit_experiments.data_processing.data_action import DataAction +from qiskit_experiments.data_processing.exceptions import DataProcessorError + + +class IQPart(DataAction): + """Abstract class for IQ data post-processing.""" + + def __init__(self, scale: Optional[float] = None, validate: bool = True): + """ + Args: + scale: Float with which to multiply the IQ data. + validate: If set to False the DataAction will not validate its input. + """ + self.scale = scale + super().__init__(validate) + + @abstractmethod + def _process(self, datum: np.array) -> np.array: + """Defines how the IQ point will be processed. + + Args: + datum: A 3D array of shots, qubits, and a complex IQ point as [real, imaginary]. + + Returns: + Processed IQ point. + """ + + def _format_data(self, datum: Any) -> Any: + """Check that the IQ data has the correct format and convert to numpy array. + + Args: + datum: A single item of data which corresponds to single-shot IQ data. It should + have dimension three: shots, qubits, iq-point as [real, imaginary]. + + Returns: + datum as a numpy array. + + Raises: + DataProcessorError: If the datum does not have the correct format. + """ + datum = np.asarray(datum, dtype=float) + + if self._validate and len(datum.shape) != 3: + raise DataProcessorError( + f"Single-shot data given {self.__class__.__name__}" + f"must be a 3D array. Instead, a {len(datum.shape)}D " + f"array was given." + ) + + return datum + + def __repr__(self): + """String representation of the node.""" + return f"{self.__class__.__name__}(validate: {self._validate}, scale: {self.scale})" + + +class ToReal(IQPart): + """IQ data post-processing. Isolate the real part of the IQ data.""" + + def _process(self, datum: np.array) -> np.array: + """Take the real part of the IQ data. + + Args: + datum: A 3D array of shots, qubits, and a complex IQ point as [real, imaginary]. + + Returns: + A 2D array of shots, qubits. Each entry is the real part of the given IQ data. + """ + if self.scale is None: + return datum[:, :, 0] + + return datum[:, :, 0] * self.scale + + +class ToImag(IQPart): + """IQ data post-processing. Isolate the imaginary part of the IQ data.""" + + def _process(self, datum: np.array) -> np.array: + """Take the imaginary part of the IQ data. + + Args: + datum: A 3D array of shots, qubits, and a complex IQ point as [real, imaginary]. + + Returns: + A 2D array of shots, qubits. Each entry is the imaginary part of the given IQ data. + """ + if self.scale is None: + return datum[:, :, 1] + + return datum[:, :, 1] * self.scale + + +class Probability(DataAction): + """Count data post processing. This returns the probabilities of the outcome string + used to initialize an instance of Probability.""" + + def __init__(self, outcome: str, validate: bool = True): + """Initialize a counts to probability data conversion. + + Args: + outcome: The bitstring for which to compute the probability. + validate: If set to False the DataAction will not validate its input. + """ + self._outcome = outcome + super().__init__(validate) + + def _format_data(self, datum: dict) -> dict: + """ + Checks that the given data has a counts format. + + Args: + datum: An instance of data the should be a dict with bit strings as keys + and counts as values. + validate: If True the DataAction checks that the format of the datum is valid. + + Returns: + The datum as given. + + Raises: + DataProcessorError: if the data is not a counts dict. + """ + if self._validate: + if not isinstance(datum, dict): + raise DataProcessorError( + f"Given counts datum {datum} to " + f"{self.__class__.__name__} is not a valid count format." + ) + + for bit_str, count in datum.items(): + if not isinstance(bit_str, str): + raise DataProcessorError( + f"Key {bit_str} is not a valid count key in{self.__class__.__name__}." + ) + + if not isinstance(count, (int, float)): + raise DataProcessorError( + f"Count {bit_str} is not a valid count value in {self.__class__.__name__}." + ) + + return datum + + def _process(self, datum: Dict[str, Any]) -> Tuple[float, float]: + """ + Args: + datum: The data dictionary,taking the data under counts and + adding the corresponding probabilities. + + Returns: + processed data: A dict with the populations. + """ + shots = sum(datum.values()) + p_mean = datum.get(self._outcome, 0.0) / shots + p_var = p_mean * (1 - p_mean) / shots + + return p_mean, p_var diff --git a/test/data_processing/__init__.py b/test/data_processing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/data_processing/test_data_processing.py b/test/data_processing/test_data_processing.py new file mode 100644 index 0000000000..735a3a5298 --- /dev/null +++ b/test/data_processing/test_data_processing.py @@ -0,0 +1,210 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2021. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Data processor tests.""" + +import numpy as np + +from qiskit.result.models import ExperimentResultData, ExperimentResult +from qiskit.result import Result +from qiskit.test import QiskitTestCase +from qiskit.qobj.common import QobjExperimentHeader +from qiskit_experiments import ExperimentData +from qiskit_experiments.base_experiment import BaseExperiment +from qiskit_experiments.data_processing.data_processor import DataProcessor +from qiskit_experiments.data_processing.exceptions import DataProcessorError +from qiskit_experiments.data_processing.nodes import ( + ToReal, + ToImag, + Probability, +) + + +class FakeExperiment(BaseExperiment): + """Fake experiment class for testing.""" + + def __init__(self): + """Initialise the fake experiment.""" + self._type = None + super().__init__((0,), "fake_test_experiment") + + def circuits(self, backend=None, **circuit_options): + """Fake circuits.""" + return [] + + +class DataProcessorTest(QiskitTestCase): + """Class to test DataProcessor.""" + + def setUp(self): + """Setup variables used for testing.""" + self.base_result_args = dict( + backend_name="test_backend", + backend_version="1.0.0", + qobj_id="id-123", + job_id="job-123", + success=True, + ) + + mem1 = ExperimentResultData( + memory=[ + [[1103260.0, -11378508.0], [2959012.0, -16488753.0]], + [[442170.0, -19283206.0], [-5279410.0, -15339630.0]], + [[3016514.0, -14548009.0], [-3404756.0, -16743348.0]], + ] + ) + + mem2 = ExperimentResultData( + memory=[ + [[5131962.0, -16630257.0], [4438870.0, -13752518.0]], + [[3415985.0, -16031913.0], [2942458.0, -15840465.0]], + [[5199964.0, -14955998.0], [4030843.0, -14538923.0]], + ] + ) + + header1 = QobjExperimentHeader( + clbit_labels=[["meas", 0], ["meas", 1]], + creg_sizes=[["meas", 2]], + global_phase=0.0, + memory_slots=2, + metadata={"experiment_type": "fake_test_experiment", "x_values": 0.0}, + ) + + header2 = QobjExperimentHeader( + clbit_labels=[["meas", 0], ["meas", 1]], + creg_sizes=[["meas", 2]], + global_phase=0.0, + memory_slots=2, + metadata={"experiment_type": "fake_test_experiment", "x_values": 1.0}, + ) + + res1 = ExperimentResult(shots=3, success=True, meas_level=1, data=mem1, header=header1) + res2 = ExperimentResult(shots=3, success=True, meas_level=1, data=mem2, header=header2) + + self.result_lvl1 = Result(results=[res1, res2], **self.base_result_args) + + raw_counts = {"0x0": 4, "0x2": 6} + data = ExperimentResultData(counts=dict(**raw_counts)) + header = QobjExperimentHeader( + metadata={"experiment_type": "fake_test_experiment"}, + clbit_labels=[["c", 0], ["c", 1]], + creg_sizes=[["c", 2]], + n_qubits=2, + memory_slots=2, + ) + res = ExperimentResult(shots=9, success=True, meas_level=2, data=data, header=header) + self.exp_data_lvl2 = ExperimentData(FakeExperiment()) + self.exp_data_lvl2.add_data(Result(results=[res], **self.base_result_args)) + + super().setUp() + + def test_empty_processor(self): + """Check that a DataProcessor without steps does nothing.""" + data_processor = DataProcessor("counts") + + datum = data_processor(self.exp_data_lvl2.data[0]) + self.assertEqual(datum, {"00": 4, "10": 6}) + + datum, history = data_processor.call_with_history(self.exp_data_lvl2.data[0]) + self.assertEqual(datum, {"00": 4, "10": 6}) + self.assertEqual(history, []) + + def test_to_real(self): + """Test scaling and conversion to real part.""" + processor = DataProcessor("memory", [ToReal(scale=1e-3)]) + + exp_data = ExperimentData(FakeExperiment()) + exp_data.add_data(self.result_lvl1) + + new_data = processor(exp_data.data[0]) + + expected_old = { + "memory": [ + [[1103260.0, -11378508.0], [2959012.0, -16488753.0]], + [[442170.0, -19283206.0], [-5279410.0, -15339630.0]], + [[3016514.0, -14548009.0], [-3404756.0, -16743348.0]], + ], + "metadata": {"experiment_type": "fake_test_experiment", "x_values": 0.0}, + } + + expected_new = np.array([[1103.26, 2959.012], [442.17, -5279.41], [3016.514, -3404.7560]]) + + self.assertEqual(exp_data.data[0], expected_old) + self.assertTrue(np.allclose(new_data, expected_new)) + + # Test that we can call with history. + new_data, history = processor.call_with_history(exp_data.data[0]) + + self.assertEqual(exp_data.data[0], expected_old) + self.assertTrue(np.allclose(new_data, expected_new)) + + self.assertEqual(history[0][0], "ToReal") + self.assertTrue(np.allclose(history[0][1], expected_new)) + + def test_to_imag(self): + """Test that we can average the data.""" + processor = DataProcessor("memory") + processor.append(ToImag(scale=1e-3)) + + exp_data = ExperimentData(FakeExperiment()) + exp_data.add_data(self.result_lvl1) + + new_data = processor(exp_data.data[0]) + + expected_old = { + "memory": [ + [[1103260.0, -11378508.0], [2959012.0, -16488753.0]], + [[442170.0, -19283206.0], [-5279410.0, -15339630.0]], + [[3016514.0, -14548009.0], [-3404756.0, -16743348.0]], + ], + "metadata": {"experiment_type": "fake_test_experiment", "x_values": 0.0}, + } + + expected_new = np.array( + [ + [-11378.508, -16488.753], + [-19283.206000000002, -15339.630000000001], + [-14548.009, -16743.348], + ] + ) + + self.assertEqual(exp_data.data[0], expected_old) + self.assertTrue(np.allclose(new_data, expected_new)) + + # Test that we can call with history. + new_data, history = processor.call_with_history(exp_data.data[0]) + self.assertEqual(exp_data.data[0], expected_old) + self.assertTrue(np.allclose(new_data, expected_new)) + + self.assertEqual(history[0][0], "ToImag") + self.assertTrue(np.allclose(history[0][1], expected_new)) + + def test_populations(self): + """Test that counts are properly converted to a population.""" + + processor = DataProcessor("counts") + processor.append(Probability("00")) + + new_data = processor(self.exp_data_lvl2.data[0]) + + self.assertEqual(new_data[0], 0.4) + self.assertEqual(new_data[1], 0.4 * (1 - 0.4) / 10) + + def test_validation(self): + """Test the validation mechanism.""" + + for validate, error in [(False, AttributeError), (True, DataProcessorError)]: + processor = DataProcessor("counts") + processor.append(Probability("00", validate=validate)) + + with self.assertRaises(error): + processor({"counts": [0, 1, 2]})