-
Notifications
You must be signed in to change notification settings - Fork 56
/
core.py
311 lines (240 loc) · 12.3 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
from __future__ import division, print_function, absolute_import, unicode_literals
#*****************************************************************
# pyGSTi 0.9: Copyright 2015 Sandia Corporation
# This Software is released under the GPL license detailed
# in the file "license.txt" in the top-level pyGSTi directory
#*****************************************************************
"""Core integrated routines for detecting and characterizing crosstalk"""
import numpy as _np
from . import objects as _obj
from ... import objects as _pygobjs
from ... import io as _pygio
import pcalg
from gsq.ci_tests import ci_test_dis
import collections
def tuple_replace_at_index(tup, ix, val):
return tup[:ix] + (val,) + tup[ix+1:]
def load_pygsti_dataset(filename):
"""
Loads a pygsti dataset from file.
This is a wrapper that just checks the first line, and replaces it with the newer outcome specification
format if its the old type.
"""
try:
file = open(filename, "r")
except IOError:
print("File not found, or other file IO error.")
# lines = file.readlines()
# file.close()
# if lines[0] == "## Columns = 00 count, 01 count, 10 count, 11 count\n":
# lines[0] = "## Columns = 0:0 count, 0:1 count, 1:0 count, 1:1 count\n"
# file = open(filename, "w")
# file.writelines(lines)
# file.close()
data = _pygio.load_dataset(filename)
return data
def flatten(l):
"""
Flattens an irregualr list.
From https://stackoverflow.com/questions/2158395/flatten-an-irregular-list-of-lists
"""
for el in l:
if isinstance(el, collections.Iterable) and not isinstance(el, (str, bytes)):
yield from flatten(el)
else:
yield el
def do_basic_crosstalk_detection(ds, number_of_regions, settings, confidence=0.95, verbosity=1, name=None,
assume_independent_settings=True):
"""
Implements crosstalk detection on multiqubit data (fine-grained data with entries for each experiment).
Parameters
----------
ds : pyGSTi DataSet or numpy array
The multiqubit data to analyze. If this is a numpy array, it must contain time series data and it must
be 2-dimensional with each entry being a sequence of settings and measurment outcomes for each qubit region.
A region is a set of one or more qubits and crosstalk is assessed between regions. The first n entries are
the outcomes and the following entries are settings.
number_of_regions: int, number of regions in experiment
settings: list of length number_of_regions, indicating the number of settings for each qubit region.
confidence : float, optional
verbosity : int, optional
name : str, optional
Returns
-------
results : CrosstalkResults object
The results of the crosstalk detection analysis. This contains: output skeleton graph and DAG from
PC Algorithm indicating regions with detected crosstalk, all of the input information.
"""
# -------------------------- #
# Format and check the input #
# -------------------------- #
if type(ds) != _pygobjs.dataset.DataSet:
data_shape = _np.shape(ds)
settings_shape = _np.shape(settings)
# Check that the input data is a 2D array
assert(len(data_shape) == 2), \
"Input data format is incorrect!If the input is a numpy array it must be 2-dimensional."
# Check that settings is a list of length number_of_regions
assert( (len(settings_shape) == 1) and (settings_shape[0] == number_of_regions) )
"settings should be a list of the same length as number_of_regions."
# The number of columns in the data must be consistent with the number of settings
assert( data_shape[1] == (sum(settings) + number_of_regions) )
"Mismatch between the number of settings specified for each region and the number of columns in data"
data = ds
num_data = data_shape[0]
num_columns = data_shape[1]
# This converts a DataSet to an array, as the code below uses arrays
if type(ds) == _pygobjs.dataset.DataSet:
opstr = ds.keys()[0]
temp = ds.auxInfo[opstr]['settings']
num_settings = len(temp)
settings_shape = _np.shape(settings)
# Check that settings is a list of length number_of_regions
assert( (len(settings_shape) == 1) and (settings_shape[0] == number_of_regions) )
"settings should be a list of the same length as number_of_regions."
# num columns = number of settings + number of regions (b/c we assume one outcome per region)
num_columns = num_settings+number_of_regions
num_data = len(ds.keys())
data = []
collect_settings = {key: [] for key in range(num_settings)}
for row in range(num_data):
opstr = ds.keys()[row]
templine_set = [0]*num_settings
settings_row = ds.auxInfo[opstr]['settings']
for key in settings_row:
if len(key)==1: # single region/qubit gate
templine_set[key[0]]=settings_row[key]
collect_settings[key[0]].append(settings_row[key])
else: # two-region/two-qubit gate
print("Two qubit gate, not sure what to do!!") #TODO
return
outcomes_row = ds[opstr]
for outcome in outcomes_row:
templine_out = [0]*number_of_regions
for r in range(number_of_regions):
templine_out[r] = int(outcome[0][r])
num_rep = int(outcome[2])
templine_out.append(templine_set)
flattened_line = list(flatten(templine_out))
for r in range(num_rep):
data.append(flattened_line)
num_seqs = [len(set(collect_settings[i])) for i in range(num_settings)]
data = _np.asarray(data)
# --------------------------------------------------------- #
# Prepare a results object, and store the input information #
# --------------------------------------------------------- #
# Initialize an empty results object.
results = _obj.CrosstalkResults()
# Records input information into the results object.
results.name = name
results.data = data
results.number_of_regions = number_of_regions
results.settings = settings
results.number_of_datapoints = num_data
results.number_of_columns = num_columns
results.confidence = confidence
# ------------------------------------------------- #
# Calculate the causal graph skeleton #
# ------------------------------------------------- #
if assume_independent_settings:
# List edges between settings so that these can be ignored when constructing skeleton
ignore_edges = []
for set1 in range(number_of_regions, num_columns) :
for set2 in range(number_of_regions, num_columns) :
if set1 > set2:
ignore_edges.append((set1,set2))
else:
ignore_edges = []
print("Calculating causal graph skeleton ...")
(skel,sep_set) = pcalg.estimate_skeleton(ci_test_dis, data, 1-confidence, ignore_edges)
print("Calculating directed causal graph ...")
g = pcalg.estimate_cpdag(skel_graph=skel, sep_set=sep_set)
# Store skeleton and graph in results object
results.skel = skel
results.sep_set = sep_set
results.graph = g
# Calculate the column index for the first setting for each region
setting_indices = {x: number_of_regions+sum(settings[:x]) for x in range(number_of_regions) };
results.setting_indices = setting_indices
node_labels = {}
cnt=0
for col in range(num_columns) :
if col < number_of_regions :
node_labels[cnt] = r'R$_{%d}$' % col
cnt += 1
# node_labels.append("$%d^O$" % col)
else :
for region in range(number_of_regions):
if col in range(setting_indices[region],
(setting_indices[(region + 1)] if region < (number_of_regions - 1) else num_columns)):
break
node_labels[cnt] = r'S$_{%d}^{(%d)}$' % (region, (col-setting_indices[region]))
cnt += 1
#node_labels.append("%d^S_{%d}$" % (region, (col-setting_indices[region]+1)))
results.node_labels = node_labels
# Generate crosstalk detected matrix and assign weight to each edge according to TVD variation in distribution of
# destination variable when source variable is varied.
print("Examining edges for crosstalk ...")
cmatrix = _np.zeros((number_of_regions, number_of_regions))
edge_weights = _np.zeros(len(g.edges()))
is_edge_ct = _np.zeros(len(g.edges()))
edge_tvds = {}
for idx, edge in enumerate(g.edges()) :
source = edge[0]
dest = edge[1]
if verbosity>1 :
print("** Edge: ", edge, " **")
# For each edge, decide if it represents crosstalk
# Crosstalk is:
# (1) an edge between outcomes on different regions
# (2) an edge between a region's outcome and a setting of another region
if source < number_of_regions and dest < number_of_regions :
cmatrix[source, dest] = 1
is_edge_ct[idx] = 1
print("Crosstalk detected. Regions " + str(source) + " and " + str(dest))
if source < number_of_regions and dest >= number_of_regions :
if dest not in range(setting_indices[source], (setting_indices[(source+1)] if source<(number_of_regions-1) else num_columns)) :
for region in range(number_of_regions) :
if dest in range(setting_indices[region], (setting_indices[(region+1)] if region<(number_of_regions-1) else num_columns)) :
break
cmatrix[source, region] = 1
is_edge_ct[idx] = 1
print("Crosstalk detected. Regions " + str(source) + " and " + str(region))
if source >= number_of_regions and dest < number_of_regions :
if source not in range(setting_indices[dest], (setting_indices[(dest+1)] if dest<(number_of_regions-1) else num_columns)) :
for region in range(number_of_regions) :
if source in range(setting_indices[region], (setting_indices[(region+1)] if region<(number_of_regions-1) else num_columns)) :
break
cmatrix[region, dest] = 1
is_edge_ct[idx] = 1
print("Crosstalk detected. Regions " + str(region) + " and " + str(dest))
# For each edge in causal graph that represents crosstalk, calculate the TVD between distributions of dependent
# variable when other variable is varied
if is_edge_ct[idx] == 1 :
source_levels, level_cnts = _np.unique(data[:, source], return_counts=True)
num_levels = len(source_levels)
if any(level_cnts<10) :
print( " *** Warning: n<10 data points for some levels. TVD calculations may have large error bars.")
tvds = _np.zeros((num_levels, num_levels))
for i in range(num_levels) :
for j in range(i) :
marg1 = data[data[:, source]==source_levels[i], dest]
marg2 = data[data[:, source] == source_levels[j], dest]
n1, n2 = len(marg1), len(marg2)
marg1_levels, marg1_level_cnts = _np.unique(marg1, return_counts=True)
marg2_levels, marg2_level_cnts = _np.unique(marg2, return_counts=True)
tvd_sum = 0.0
for lidx, level in enumerate(marg1_levels) :
temp = _np.where(marg2_levels==level)
if len(temp[0]) == 0 :
tvd_sum += marg1_level_cnts[lidx]/n1
else :
tvd_sum += _np.fabs(marg1_level_cnts[lidx]/n1 - marg2_level_cnts[temp[0][0]]/n2)
tvds[i,j] = tvds[j,i] = tvd_sum/2.0
edge_tvds[idx] = tvds
results.cmatrix = cmatrix
results.is_edge_ct = is_edge_ct
results.crosstalk_detected = _np.sum(is_edge_ct)>0
results.edge_weights = edge_weights
results.edge_tvds = edge_tvds
return results