/
core.py
253 lines (210 loc) · 17 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
"""Canned routines for detecting and characterizing instability ("drift") using time-stamped data"""
#***************************************************************************************************
# Copyright 2015, 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
# Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights
# in this software.
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0 or in the LICENSE file in the root pyGSTi directory.
#***************************************************************************************************
# from . import stabilityanalyzer as _sa
# import numpy as _np
# import warnings as _warnings
# import itertools as _itertools
# import copy as _copy
# REPLACED BY THE PROTOCOL
# def do_stability_analysis(ds, significance=0.05, transform='auto', marginalize='auto', mergeoutcomes=None,
# constnumtimes='auto', ids=False, frequencies='auto', freqpointers={},
# freqstest=None, tests='auto', inclass_correction={}, betweenclass_weighting='auto',
# estimator='auto', modelselector=None, verbosity=1):
# """
# Implements instability ("drift") detection and characterization on timeseries data from *any* set of
# quantum circuits on *any* number of qubits. This uses the StabilityAnalyzer object, and directly
# accessing that object allows for some more complex analyzes to be performed. That object also offers
# a more step-by-step analysis procedure, which may be helpful for exploring the optional arguments of this
# analysis.
# Parameters
# ----------
# ds : DataSet or MultiDataSet
# A DataSet containing time-series data to be analyzed for signs of instability.
# significance : float, optional
# The global significance level. With defaults for all other inputs (a wide range of non-default options),
# the family-wise error rate of the set of all hypothesis tests performed is controlled to this value.
# transform : str, optional
# The type of transform to use in the spectral analysis. Options are:
# - 'auto': An attempt is made to choose the best transform given the "meta-data" of the data,
# e.g., the variability in the time-step between data points. For beginners,
# 'auto' is the best option. If you are familiar with the underlying methods, the
# meta-data of the input, and the relative merits of the different transform, then
# it is probably better to choose this yourself -- as the auto-selection is not hugely
# sophisticated.
# - 'dct' : The Type-II Discrete Cosine Transform (with an orthogonal normalization). This is
# the only tested option, and it is our recommended option when the data is
# approximately equally-spaced, i.e., the time-step between each "click" for each
# circuit is almost a constant. (the DCT transform implicitly assumes that this
# time-step is exactly constant)
# - 'dft' : The discrete Fourier transform (with an orthogonal normalization). *** This is an
# experimental feature, and the results are unreliable with this transform ***
# - 'lsp' : The Lomb-Scargle periodogram. *** This is an experimental feature, and the code is
# untested with this transform ***
# marginalize : str or bool, optional
# True, False or 'auto'. Whether or not to marginalize multi-qubit data, to look for instability
# in the marginalized probability distribution over the two outcomes for each qubit. Cannot be
# set to True if mergeoutcomes is not None.
# mergeoutcomes : None or Dict, optional
# If not None, a dictionary of outcome-merging dictionaries. Each dictionary contained as a
# value of `mergeoutcomes` is used to create a new DataSet, where the values have been merged
# according to that dictionary (see the aggregate_dataset_outcomes() function inside datasetconstructions.py).
# The corresponding key is used as the key for that DataSet, when it is stored in a MultiDataSet,
# and the instability analysis is implemented on each DataSet. This is a more general data
# coarse-grainin option than `marginalize`.
# constnumtimes : str or bool, optional
# True, False or 'auto'. If True then data is discarded from the end of the "clickstream" for
# each circuit until all circuits have the same length clickstream, i.e., the same number of
# data aquisition times. If 'auto' then it is set to True or False depending on the meta-data of
# the data and the type of transform being used.
# ids: True or False, optional
# Whether the multiple DataSets should be treat as generated from independent random variables.
# If the input is a DataSet and `marginalize` is False and `mergeoutcomes` is None then this
# input is irrelevant: there is only ever one DataSet being analyzed. But in general multiple
# DataSets are concurrently analyzed. This is irrelevant for independent analyses of the DataSets,
# but the analysis is capable of also implementing a joint analysis of the DataSets. This joint
# analysis is only valid on the assumption of independent DataSets, and so this analysis will not
# be permitted unless `ids` is set to True. Note that the set of N marginalized data from N-qubit
# circuits are generally not independent -- even if the circuits contain no 2-qubit gates then
# crosstalk can causes dependencies. However, as long as the dependencies are weak then settings
# this to True is likely ok.
# frequencies : 'auto' or list, optional
# The frequencies that the power spectra are calculated for. If 'auto' these are automatically
# determined from the meta-data of the time-series data (e.g., using the mean time between data
# points) and the transform being used. If not 'auto', then a list of lists, where each list is
# a set of frequencies that are the frequencies corresponding to one or more power spectra. The
# frequencies that should be paired to a given power spectrum are specified by `freqpointers`.
# These frequencies (whether automatically calculated or explicitly input) have a fundmentally
# different meaning depending on whether the transform is time-stamp aware (here, the LSP) or not
# (here, the DCT and DFT).
# Time-stamp aware transforms take the frequencies to calculate powers at *as an input*, so the
# specified frequencies are, explicitly, the frequencies associated with the powers. The task
# of choosing the frequencies amounts to picking the best set of frequencies at which to interogate
# the true probability trajectory for components. As there are complex factors involved in this
# choice that the code has no way of knowing, sometimes it is best to choose them yourself. E.g.,
# if different frequencies are used for different circuits it isn't possible to (meaningfully)
# averaging power spectra across circuits, but this might be preferable if the time-step is
# sufficiently different between different circuits -- it depends on your aims.
# For time-stamp unaware transforms, these frequencies should be the frequencies that, given
# that we're implementing the, e.g., DCT, the generated power spectrum is *implicitly* with respect
# to. In the case of data on a fixed time-grid, i.e., equally spaced data, then there is a
# precise set of frequencies implicit in the transform (which will be accurately extracted with
# frequencies set to `auto`). Otherwise, these frequencies are explicitly at least slightly
# ad hoc, and choosing these frequencies amounts to choosing those frequencies that "best"
# approximate the properties being interogatted with fitting each, e.g., DCT basis function
# to the (timestamp-free) data. The 'auto' option bases there frequencies solely on the
# mean time step and the number of times, and is a decent option when the time stamps are roughly
# equally spaced for each circuit.
# These frequencies should be in units of 1/t where 't' is the unit of the time stamps.
# freqpointers : dict, optional
# Specifies which frequencies correspond to which power spectra. The keys are power spectra labels,
# and the values are integers that point to the index of `frequencies` (a list of lists) that the
# relevant frquencies are found at. Whenever a power spectra is not included in `freqpointers` then
# this defaults to 0. So if `frequencies` is specified and is a list containing a single list (of
# frequencies) then `freqpointers` can be left as the empty dictionary.
# freqstest : None or list, optional
# If not not None, a list of the frequency indices at which to test the powers. Leave as None to perform
# comprehensive testing of the power spectra.
# tests : 'auto' or tuple, optional
# Specifies the set of hypothesis tests to perform. If 'auto' then an set of tests is automatically
# chosen. This set of tests will be suitable for most purposes, but sometimes it is useful to override
# this. If a tuple, the elements are "test classes", that specifies a set of hypothesis tests to run,
# and each test class is itself specified by a tuple. The tests specified by each test class in this
# tuple are all implemented. A test class is a tuple containing some subset of 'dataset', 'circuit'
# and 'outcome', which specifies a set of power spectra. Specifically, a power spectra has been calculated
# for the clickstream for every combination of eachinput DataSet (e.g., there are multiple DataSets if there
# has been marginalization of multi-qubit data), each Circuit in the DataSet, and each possible outcome in
# the DataSet. For each of "dataset", "circuit" and "outcome" *not* included in a tuple defining a test class,
# the coresponding "axis" of the 3-dimensional array of spectra is averaged over, and these spectra are then
# tested. So the tuple () specifies the "test class" whereby we test the power spectrum obtained by averaging
# all power spectra; the tuple ('dataset','circuit') specifies the "test class" whereby we average only over
# outcomes, obtaining a single power spectrum for each DataSet and Circuit combination, which we test.
# The default option for "tests" is appropriate for most circumstances, and it consists of (), ('dataset')
# and ('dataset', 'circuit') with duplicates removed (e.g., if there is a single DataSet then () is equivalent
# to ('dataset')).
# inclass_correction : dict, optional
# A dictionary with keys 'dataset', 'circuit', 'outcome' and 'spectrum', and values that specify the type of
# multi-test correction used to account for the multiple tests being implemented. This specifies how the
# statistically significance is maintained within the tests implemented in a single "test class".
# betweenclass_weighting : 'auto' or dict, optional
# The weighting to use to maintain statistical significance between the different classes of test being
# implemented. If 'auto' then a standard Bonferroni correction is used.
# estimator : str, optional
# The name of the estimator to use. This is the method used to estimate the parameters of a parameterized
# model for each probability trajectory, after that parameterized model has been selected with the model
# selection methods. Allowed values are:
# - 'auto'. The estimation method is chosen automatically, default to the fast method that is also
# reasonably reliable.
# - 'filter'. Performs a type of signal filtering: implements the transform used for generating power
# spectra (e.g., the DCT), sets the amplitudes to zero for all freuquencies that the model selection
# has not included in the model, inverts the transform, and then performs some minor post-processing
# to guarantee probabilities within [0, 1]. This method is less statically well-founded than 'mle',
# but it is faster and typically gives similar results. This method is not an option for
# non-invertable transforms, such as the Lomb-Scargle periodogram.
# - 'mle'. Implements maximum likelihood estimation, on the parameterized model chosen by the model
# selection. The most statistically well-founded option, but can be slower than 'filter' and relies
# on numerical optimization.
# modelselection : tuple, optional
# The model selection method. If not None, a "test class" tuple, specifying which test results to use to
# decide which frequencies are significant for each circuit, to then construct a parameterized model for
# each probability trajectory. This can be typically set to None, and it will be chosen automatically.
# But if you wish to use specific test results for the model selection then this should be set.
# verbosity : int, optional
# The amount of print-to-screen
# Returns
# -------
# StabilityAnalyzers
# An object containing the results of the instability detection and characterization. This can
# be used to, e.g., generate plots showing any detected drift, and it can also form the basis
# of further analysis.
# """
# if verbosity > 0: print(" - Formatting the data...", end='')
# results = _sa.StabilityAnalyzer(ds, transform=transform, marginalize=marginalize, mergeoutcomes=mergeoutcomes,
# constnumtimes=constnumtimes, ids=ids)
# if verbosity > 0: print("done!")
# # Calculate the power spectra.
# if verbosity > 0: print(" - Calculating power spectra...", end='')
# results.generate_spectra(frequencies=frequencies, freqpointers=freqpointers)
# if verbosity > 0: print("done!")
# # Implement the drift detection with statistical hypothesis testing on the power spectra.
# if verbosity > 0: print(" - Running instability detection...", end='')
# if verbosity > 1: print('')
# results.do_instability_detection(significance=significance, freqstest=freqstest, tests=tests,
# inclass_correction=inclass_correction,
# betweenclass_weighting=betweenclass_weighting,
# saveas='default', default=True, overwrite=False, verbosity=verbosity - 1)
# if verbosity == 1: print("done!")
# # Estimate the drifting probabilities.
# if verbosity > 0: print(" - Running instability characterization...", end='')
# if verbosity > 1: print('')
# # The model selector something slightly more complicated for this method: this function only allows us to
# # set the second part of the modelselector tuple.
# results.run_instability_characterization(estimator=estimator, modelselector=(None, modelselector), default=True,
# verbosity=verbosity - 1)
# if verbosity == 1: print("done!")
# return results
# future
# def do_time_resolved_rb(ds, timeslices='auto', significance=0.05, transform='auto', constnumtimes='auto',
# frequencies='auto', freqpointers={}, freqtest=None, estimator='auto', verbosity=1):
# """
# Implements a time-resolved randomized benchmarking (RB) analysis, on time-series RB data. This data can
# be from any form of RB in which the observed sucess/survial probability is fit to the standard
# exponential form Pm = A + Bp^m.
# """
# mergeoutcomes =
# trrb_tests = ((),)
# trrb_inclass_correction = {}
# trrb_modelselector = ('default', ((),))
# stabilityanalyzer = do_stability_analysis(ds, significance=significance, transform=transform,
# mergeoutcomes=rb_mergeoutcomes, constnumtimes=constnumtimes, ids=True,
# frequencies=frequencies, freqpointers=freqpointers, freqstest=freqtest,
# tests=trrb_tests, inclass_correction=trrb_inclass_correction,
# betweenclass_weighting='auto', estimator=estimator,
# modelselector=trrb_modelselector, verbosity=verbosity - 1)
# return None