/
datasetconstruction.py
581 lines (480 loc) · 25.3 KB
/
datasetconstruction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
"""
Functions for creating datasets
"""
#***************************************************************************************************
# Copyright 2015, 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
# Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights
# in this software.
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0 or in the LICENSE file in the root pyGSTi directory.
#***************************************************************************************************
import numpy as _np
import numpy.random as _rndm
import warnings as _warnings
import collections as _collections
import itertools as _itertools
from ..objects import dataset as _ds
from ..objects import labeldicts as _ld
from ..objects import label as _lbl
from ..protocols import ExperimentDesign as _ExperimentDesign
from . import circuitconstruction as _gstrc
from pprint import pprint
def simulate_data(model_or_dataset, circuit_list, num_samples,
sample_error="multinomial", seed=None, rand_state=None,
alias_dict=None, collision_action="aggregate",
record_zero_counts=True, comm=None, mem_limit=None, times=None):
"""
Creates a DataSet using the probabilities obtained from a model.
Parameters
----------
model_or_dataset : Model or DataSet object
The source of the underlying probabilities used to generate the data.
If a Model, the model whose probabilities generate the data.
If a DataSet, the data set whose frequencies generate the data.
circuit_list : list of (tuples or Circuits) or ExperimentDesign or None
Each tuple or Circuit contains operation labels and
specifies a gate sequence whose counts are included
in the returned DataSet. e.g. ``[ (), ('Gx',), ('Gx','Gy') ]``
If an :class:`ExperimentDesign`, then the design's `.all_circuits_needing_data`
list is used as the circuit list.
num_samples : int or list of ints or None
The simulated number of samples for each circuit. This only has
effect when ``sample_error == "binomial"`` or ``"multinomial"``. If an
integer, all circuits have this number of total samples. If a list,
integer elements specify the number of samples for the corresponding
circuit. If ``None``, then `model_or_dataset` must be a
:class:`~pygsti.objects.DataSet`, and total counts are taken from it
(on a per-circuit basis).
sample_error : string, optional
What type of sample error is included in the counts. Can be:
- "none" - no sample error: counts are floating point numbers such
that the exact probabilty can be found by the ratio of count / total.
- "clip" - no sample error, but clip probabilities to [0,1] so, e.g.,
counts are always positive.
- "round" - same as "clip", except counts are rounded to the nearest
integer.
- "binomial" - the number of counts is taken from a binomial
distribution. Distribution has parameters p = (clipped) probability
of the circuit and n = number of samples. This can only be used
when there are exactly two SPAM labels in model_or_dataset.
- "multinomial" - counts are taken from a multinomial distribution.
Distribution has parameters p_k = (clipped) probability of the gate
string using the k-th SPAM label and n = number of samples.
seed : int, optional
If not ``None``, a seed for numpy's random number generator, which
is used to sample from the binomial or multinomial distribution.
rand_state : numpy.random.RandomState
A RandomState object to generate samples from. Can be useful to set
instead of `seed` if you want reproducible distribution samples across
multiple random function calls but you don't want to bother with
manually incrementing seeds between those calls.
alias_dict : dict, optional
A dictionary mapping single operation labels into tuples of one or more
other operation labels which translate the given circuits before values
are computed using `model_or_dataset`. The resulting Dataset, however,
contains the *un-translated* circuits as keys.
collision_action : {"aggregate", "keepseparate"}
Determines how duplicate circuits are handled by the resulting
`DataSet`. Please see the constructor documentation for `DataSet`.
record_zero_counts : bool, optional
Whether zero-counts are actually recorded (stored) in the returned
DataSet. If False, then zero counts are ignored, except for
potentially registering new outcome labels.
comm : mpi4py.MPI.Comm, optional
When not ``None``, an MPI communicator for distributing the computation
across multiple processors and ensuring that the *same* dataset is
generated on each processor.
mem_limit : int, optional
A rough memory limit in bytes which is used to determine job allocation
when there are multiple processors.
times : iterable, optional
When not None, a list of time-stamps at which data should be sampled.
`num_samples` samples will be simulated at each time value, meaning that
each circuit in `circuit_list` will be evaluated with the given time
value as its *start time*.
Returns
-------
DataSet
A static data set filled with counts for the specified circuits.
"""
NTOL = 10
TOL = 10**-NTOL
if isinstance(model_or_dataset, _ds.DataSet):
dsGen = model_or_dataset
gsGen = None
dataset = _ds.DataSet(collision_action=collision_action,
outcome_label_indices=dsGen.olIndex) # keep same outcome labels
else:
gsGen = model_or_dataset
dsGen = None
dataset = _ds.DataSet(collision_action=collision_action)
if alias_dict:
alias_dict = {_lbl.Label(ky): tuple((_lbl.Label(el) for el in val))
for ky, val in alias_dict.items()} # convert to use Labels
if isinstance(circuit_list, _ExperimentDesign):
circuit_list = circuit_list.all_circuits_needing_data
if gsGen and times is None:
if alias_dict is not None:
trans_circuit_list = [_gstrc.translate_circuit(s, alias_dict)
for s in circuit_list]
else:
trans_circuit_list = circuit_list
all_probs = gsGen.bulk_probabilities(trans_circuit_list, comm=comm, mem_limit=mem_limit)
#all_dprobs = gsGen.bulk_dprobs(circuit_list) #DEBUG - not needed here!!!
else:
trans_circuit_list = circuit_list
if comm is None or comm.Get_rank() == 0: # only root rank computes
if sample_error in ("binomial", "multinomial"):
if rand_state is None:
rndm = _rndm.RandomState(seed) # ok if seed is None
else:
rndm = rand_state
circuit_times = times if times is not None else ["N/A dummy"]
count_lists = _collections.OrderedDict()
for tm in circuit_times:
#print("Time ", tm)
#It would be nice to be able to do something like this (time dependent calc for all probs at ptic time)
#if gsGen and times is not None:
# all_probs = gsGen.bulk_probabilities(trans_circuit_list, comm=comm, mem_limit=mem_limit, time=tm)
for k, (s, trans_s) in enumerate(zip(circuit_list, trans_circuit_list)):
if gsGen:
if times is None:
ps = all_probs[trans_s]
else:
ps = gsGen.probabilities(trans_s, time=tm)
if sample_error in ("binomial", "multinomial"):
#Adjust to probabilities if needed (and warn if not close to in-bounds)
for ol in ps:
if ps[ol] < 0:
if ps[ol] < -TOL: _warnings.warn("Clipping probs < 0 to 0")
ps[ol] = 0.0
elif ps[ol] > 1:
if ps[ol] > (1 + TOL): _warnings.warn("Clipping probs > 1 to 1")
ps[ol] = 1.0
else:
ps = _collections.OrderedDict([(ol, frac) for ol, frac
in dsGen[trans_s].fractions.items()])
if gsGen and sample_error in ("binomial", "multinomial"):
#Check that sum ~= 1 (and nudge if needed) since binomial and
# multinomial random calls assume this.
OVERTOL = 1.0 + TOL
UNDERTOL = 1.0 - TOL
psum = sum(ps.values())
adjusted = False
if psum > OVERTOL:
adjusted = True
_warnings.warn("Adjusting sum(probs) = %g > 1 to 1" % psum)
if psum < UNDERTOL:
adjusted = True
_warnings.warn("Adjusting sum(probs) = %g < 1 to 1" % psum)
if not UNDERTOL <= psum <= OVERTOL:
ps = {lbl: p / psum for lbl, p in ps.items()}
assert(UNDERTOL <= sum(ps.values()) <= OVERTOL), 'psum={}'.format(sum(ps.values()))
if adjusted:
_warnings.warn('Adjustment finished')
if num_samples is None and dsGen is not None:
N = dsGen[trans_s].total # use the number of samples from the generating dataset
#Note: total() accounts for other intermediate-measurment branches automatically
else:
try:
N = num_samples[k] # try to treat num_samples as a list
except:
N = num_samples # if not indexable, num_samples should be a single number
nWeightedSamples = N
counts = {} # don't use an ordered dict here - add_count_dict will sort keys
labels = [ol for ol, _ in sorted(list(ps.items()), key=lambda x: x[1])]
# "outcome labels" - sort by prob for consistent generation
if sample_error == "binomial":
if len(labels) == 1: # Special case when labels[0] == 1.0 (100%)
counts[labels[0]] = nWeightedSamples
else:
assert(len(labels) == 2)
ol0, ol1 = labels[0], labels[1]
counts[ol0] = rndm.binomial(nWeightedSamples, ps[ol0])
counts[ol1] = nWeightedSamples - counts[ol0]
elif sample_error == "multinomial":
countsArray = rndm.multinomial(nWeightedSamples,
[ps[ol] for ol in labels], size=1) # well-ordered list of probs
for i, ol in enumerate(labels):
counts[ol] = countsArray[0, i]
else:
for outcomeLabel, p in ps.items():
pc = _np.clip(p, 0, 1) # Note: *not* used in "none" case
if sample_error == "none":
counts[outcomeLabel] = float(nWeightedSamples * p)
elif sample_error == "clip":
counts[outcomeLabel] = float(nWeightedSamples * pc)
elif sample_error == "round":
counts[outcomeLabel] = int(round(nWeightedSamples * pc))
else:
raise ValueError(
"Invalid sample error parameter: '%s' "
"Valid options are 'none', 'round', 'binomial', or 'multinomial'" % sample_error
)
if s not in count_lists: count_lists[s] = []
count_lists[s].append(counts)
if times is None:
for s, counts_list in count_lists.items():
for counts_dict in counts_list:
dataset.add_count_dict(s, counts_dict, record_zero_counts=record_zero_counts)
else:
for s, counts_list in count_lists.items():
dataset.add_series_data(s, counts_list, times, record_zero_counts=record_zero_counts)
dataset.done_adding_data()
if comm is not None: # broadcast to non-root procs
dataset = comm.bcast(dataset if (comm.Get_rank() == 0) else None, root=0)
return dataset
def aggregate_dataset_outcomes(dataset, label_merge_dict, record_zero_counts=True):
"""
Creates a DataSet which merges certain outcomes in input DataSet.
This is used, for example, to aggregate a 2-qubit, 4-outcome DataSet into a
1-qubit, 2-outcome DataSet.
Parameters
----------
dataset : DataSet object
The input DataSet whose results will be simplified according to the rules
set forth in label_merge_dict
label_merge_dict : dictionary
The dictionary whose keys define the new DataSet outcomes, and whose items
are lists of input DataSet outcomes that are to be summed together. For example,
if a two-qubit DataSet has outcome labels "00", "01", "10", and "11", and
we want to ''aggregate out'' the second qubit, we could use label_merge_dict =
{'0':['00','01'],'1':['10','11']}. When doing this, however, it may be better
to use :function:`filter_dataset` which also updates the circuits.
record_zero_counts : bool, optional
Whether zero-counts are actually recorded (stored) in the returned
(merged) DataSet. If False, then zero counts are ignored, except for
potentially registering new outcome labels.
Returns
-------
merged_dataset : DataSet object
The DataSet with outcomes merged according to the rules given in label_merge_dict.
"""
# strings -> tuple outcome labels in keys and values of label_merge_dict
to_outcome = _ld.OutcomeLabelDict.to_outcome # shorthand
label_merge_dict = {to_outcome(key): list(map(to_outcome, val))
for key, val in label_merge_dict.items()}
new_outcomes = label_merge_dict.keys()
merged_dataset = _ds.DataSet(outcome_labels=new_outcomes)
merge_dict_old_outcomes = [outcome for sublist in label_merge_dict.values() for outcome in sublist]
if not set(dataset.outcome_labels).issubset(merge_dict_old_outcomes):
raise ValueError(
"`label_merge_dict` must account for all the outcomes in original dataset."
" It's missing directives for:\n%s" %
'\n'.join(set(map(str, dataset.outcome_labels)) - set(map(str, merge_dict_old_outcomes)))
)
# New code that works for time-series data.
for key in dataset.keys():
times, linecounts = dataset[key].get_timeseries()
count_dict_list = []
for i in range(len(times)):
count_dict = {}
for new_outcome in new_outcomes:
count_dict[new_outcome] = 0
for old_outcome in label_merge_dict[new_outcome]:
count_dict[new_outcome] += linecounts[i].get(old_outcome, 0)
if record_zero_counts is False:
for new_outcome in new_outcomes:
if count_dict[new_outcome] == 0:
del count_dict[new_outcome]
count_dict_list.append(count_dict)
merged_dataset.add_series_data(key, count_dict_list, times, aux=dataset[key].aux)
# Old code that doesn't work for time-series data.
#for key in dataset.keys():
# linecounts = dataset[key].counts
# count_dict = {}
# for new_outcome in new_outcomes:
# count_dict[new_outcome] = 0
# for old_outcome in label_merge_dict[new_outcome]:
# count_dict[new_outcome] += linecounts.get(old_outcome, 0)
# merged_dataset.add_count_dict(key, count_dict, aux=dataset[key].aux,
# record_zero_counts=record_zero_counts)
merged_dataset.done_adding_data()
return merged_dataset
def _create_qubit_merge_dict(num_qubits, qubits_to_keep):
"""
Creates a dictionary appropriate for use with :function:`aggregate_dataset_outcomes`.
The returned dictionary instructs `aggregate_dataset_outcomes` to aggregate all but
the specified `qubits_to_keep` when the outcome labels are those of
`num_qubits` qubits (i.e. strings of 0's and 1's).
Parameters
----------
num_qubits : int
The total number of qubits
qubits_to_keep : list
A list of integers specifying which qubits should be kept, that is,
*not* aggregated, when the returned dictionary is passed to
`aggregate_dataset_outcomes`.
Returns
-------
dict
"""
outcome_labels = [''.join(map(str, t)) for t in _itertools.product([0, 1], repeat=num_qubits)]
return _create_merge_dict(qubits_to_keep, outcome_labels)
def _create_merge_dict(indices_to_keep, outcome_labels):
"""
Creates a dictionary appropriate for use with :function:`aggregate_dataset_outcomes`.
Each element of `outcome_labels` should be a n-character string (or a
1-tuple of such a string). The returned dictionary's keys will be all the
unique results of keeping only the characters indexed by `indices_to_keep`
from each outcome label. The dictionary's values will be a list of all the
original outcome labels which reduce to the key value when the
non-`indices_to_keep` characters are removed.
For example, if `outcome_labels == ['00','01','10','11']` and
`indices_to_keep == [1]` then this function returns the dict
`{'0': ['00','10'], '1': ['01','11'] }`.
Note: if the elements of `outcome_labels` are 1-tuples then so are the
elements of the returned dictionary's values.
Parameters
----------
indices_to_keep : list
A list of integer indices specifying which character positions should be
kept (i.e. *not* aggregated together by `aggregate_dataset_outcomes`).
outcome_labels : list
A list of the outcome labels to potentially merge. This can be a list
of strings or of 1-tuples containing strings.
Returns
-------
dict
"""
merge_dict = _collections.defaultdict(list)
for ol in outcome_labels:
if isinstance(ol, str):
reduced = ''.join([ol[i] for i in indices_to_keep])
else:
assert(len(ol) == 1) # should be a 1-tuple
reduced = (''.join([ol[0][i] for i in indices_to_keep]),) # a tuple
merge_dict[reduced].append(ol)
return dict(merge_dict) # return a *normal* dict
def filter_dataset(dataset, sectors_to_keep, sindices_to_keep=None,
new_sectors=None, idle=((),), record_zero_counts=True,
filtercircuits=True):
"""
Creates a DataSet that is the restriction of `dataset` to `sectors_to_keep`.
This function aggregates (sums) outcomes in `dataset` which differ only in
sectors (usually qubits - see below) *not* in `sectors_to_keep`, and removes
any operation labels which act specifically on sectors not in
`sectors_to_keep` (e.g. an idle gate acting on *all* sectors because it's
`.sslbls` is None will *not* be removed).
Here "sectors" are state-space labels, present in the circuits of
`dataset`. Each sector also corresponds to a particular character position
within the outcomes labels of `dataset`. Thus, for this function to work,
the outcome labels of `dataset` must all be 1-tuples whose sole element is
an n-character string such that each character represents the outcome of
a single sector. If the state-space labels are integers, then they can
serve as both a label and an outcome-string position. The argument
`new_sectors` may be given to rename the kept state-space labels in the
returned `DataSet`'s circuits.
A typical case is when the state-space is that of *n* qubits, and the
state space labels the intergers 0 to *n-1*. As stated above, in this
case there is no need to specify `sindices_to_keep`. One may want to
"rebase" the indices to 0 in the returned data set using `new_sectors`
(E.g. `sectors_to_keep == [4,5,6]` and `new_sectors == [0,1,2]`).
Parameters
----------
dataset : DataSet object
The input DataSet whose data will be processed.
sectors_to_keep : list or tuple
The state-space labels (strings or integers) of the "sectors" to keep in
the returned DataSet.
sindices_to_keep : list or tuple, optional
The 0-based indices of the labels in `sectors_to_keep` which give the
postiions of the corresponding letters in each outcome string (see above).
If the state space labels are integers (labeling *qubits*) thath are also
letter-positions, then this may be left as `None`. For example, if the
outcome strings of `dataset` are '00','01','10',and '11' and the first
position refers to qubit "Q1" and the second to qubit "Q2" (present in
operation labels), then to extract just "Q2" data `sectors_to_keep` should be
`["Q2"]` and `sindices_to_keep` should be `[1]`.
new_sectors : list or tuple, optional
New sectors names to map the elements of `sectors_to_keep` onto in the
output DataSet's circuits. None means the labels are not renamed.
This can be useful if, for instance, you want to run a 2-qubit protocol
that expects the qubits to be labeled "0" and "1" on qubits "4" and "5"
of a larger set. Simply set `sectors_to_keep == [4,5]` and
`new_sectors == [0,1]`.
idle : string or Label, optional
The operation label to be used when there are no kept components of a
"layer" (element) of a circuit.
record_zero_counts : bool, optional
Whether zero-counts present in the original `dataset` are recorded
(stored) in the returned (filtered) DataSet. If False, then such
zero counts are ignored, except for potentially registering new
outcome labels.
filtercircuits : bool, optional
Whether or not to "filter" the circuits, by removing gates that act
outside of the `sectors_to_keep`.
Returns
-------
filtered_dataset : DataSet object
The DataSet with outcomes and circuits filtered as described above.
"""
if sindices_to_keep is None:
sindices_to_keep = sectors_to_keep
#ds_merged = dataset.aggregate_outcomes(_create_merge_dict(sindices_to_keep,
# dataset.outcome_labels),
# record_zero_counts=record_zero_counts)
ds_merged = dataset.aggregate_std_nqubit_outcomes(sindices_to_keep, record_zero_counts)
if filtercircuits:
ds_merged = ds_merged.process_circuits(lambda s: _gstrc.filter_circuit(
s, sectors_to_keep, new_sectors, idle), aggregate=True)
return ds_merged
def trim_to_constant_numtimesteps(ds):
"""
Trims a :class:`DataSet` so that each circuit's data comprises the same number of timesteps.
Returns a new dataset that has data for the same number of time steps for
every circuit. This is achieved by discarding all time-series data for every
circuit with a time step index beyond 'min-time-step-index', where
'min-time-step-index' is the minimum number of time steps over circuits.
Parameters
----------
ds : DataSet
The dataset to trim.
Returns
-------
DataSet
The trimmed dataset, obtained by potentially discarding some of the data.
"""
trimmedds = ds.copy_nonstatic()
numtimes = []
for circuit in ds.keys():
numtimes.append(ds[circuit].number_of_times)
minnumtimes = min(numtimes)
for circuit in ds.keys():
times, series = ds[circuit].get_timeseries()
trimmedtimes = times[0:minnumtimes]
trimmedseries = series[0:minnumtimes]
trimmedds.add_series_data(circuit, trimmedseries, trimmedtimes, aux=ds.auxInfo[circuit])
trimmedds.done_adding_data()
return trimmedds
def _subsample_timeseries_data(ds, step):
"""
Creates a :class:`DataSet` where each circuit's data is sub-sampled.
Returns a new dataset where, for every circuit, we only keep the data at every
'step' timestep. Specifically, the outcomes at the ith time for each circuit are
kept for each i such that i modulo 'step' is zero.
Parameters
----------
ds : DataSet
The dataset to subsample
step : int
The sub-sampling time step. Only data at every `step` increment
in time is kept.
Returns
-------
DataSet
The subsampled dataset.
"""
subsampled_ds = ds.copy_nonstatic()
for circ in ds.keys():
times, odicts = ds[circ].timeseries_for_outcomes
newtimes = []
newseries = []
for i in range(len(times)):
if (i % step) == 0:
newtimes.append(times[i])
newseries.append({key: counts[i] for key, counts in odicts.items()})
subsampled_ds.add_series_data(circ, newseries, newtimes)
subsampled_ds.done_adding_data()
return subsampled_ds