-
Notifications
You must be signed in to change notification settings - Fork 55
/
distforwardsim.py
485 lines (400 loc) · 28.7 KB
/
distforwardsim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
"""
Defines the DistributableForwardSimulator calculator class
"""
#***************************************************************************************************
# Copyright 2015, 2019 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
# Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights
# in this software.
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0 or in the LICENSE file in the root pyGSTi directory.
#***************************************************************************************************
import numpy as _np
from pygsti.forwardsims.forwardsim import ForwardSimulator as _ForwardSimulator
from pygsti.forwardsims.forwardsim import _array_type_parameter_dimension_letters
from pygsti.tools import mpitools as _mpit
from pygsti.tools import slicetools as _slct
from pygsti.tools import sharedmemtools as _smt
class DistributableForwardSimulator(_ForwardSimulator):
"""
A base class for forward simulators that use distributed COPA layouts.
This class contains implements the methods of :class:`ForwardSimulator` assuming that the
layout is a :class:`DistributableCOPALayout` object, and leaves a set of a simpler methods
for derived classes to implement.
In particular, because a distributed layout divides computations by assigning segments of
the full element- and parameter-dimensions to individual processors, derived classes just
implement the `_bulk_fill_*probs_atom` methods which compute a single section of the entire
output array, and don't need to worry about dealing with the distribution in element and
parameter directions.
Parameters
----------
model : Model, optional
The parent model of this simulator. It's fine if this is `None` at first,
but it will need to be set (by assigning `self.model` before using this simulator.
num_atoms : int, optional
The number of atoms to use when creating a layout (i.e. when calling :meth:`create_layout`).
This determines how many units the element (circuit outcome probability) dimension is divided
into, and doesn't have to correclate with the number of processors. When multiple processors
are used, if `num_atoms` is less than the number of processors it should divide the number of
processors evenly, so that `num_atoms // num_procs` groups of processors can be used to divide
the computation over parameter dimensions.
processor_grid : tuple optional
Specifies how the total number of processors should be divided into a number of
atom-processors, 1st-parameter-deriv-processors, and 2nd-parameter-deriv-processors.
Each level of specification is optional, so this can be a 1-, 2-, or 3- tuple of
integers (or None). Multiplying the elements of `processor_grid` together should give
at most the total number of processors.
param_blk_sizes : tuple, optional
The parameter block sizes along the first or first & second parameter dimensions - so
this can be a 0-, 1- or 2-tuple of integers or `None` values. A block size of `None`
means that there should be no division into blocks, and that each block processor
computes all of its parameter indices at once.
"""
@classmethod
def _array_types_for_method(cls, method_name):
# give array types for this method because it's currently used publically in objective function's hessian
if method_name == '_iter_atom_hprobs_by_rectangle':
return ('abb', 'abb') + cls._array_types_for_method('_bulk_fill_hprobs_dprobs_atom')
if method_name == '_bulk_fill_hprobs_dprobs_atom':
return cls._array_types_for_method('_bulk_fill_probs_block') \
+ cls._array_types_for_method('_bulk_fill_dprobs_block') \
+ cls._array_types_for_method('_bulk_fill_hprobs_block')
return super()._array_types_for_method(method_name)
def __init__(self, model=None, num_atoms=None, processor_grid=None, param_blk_sizes=None):
super().__init__(model)
self._num_atoms = num_atoms
self._processor_grid = processor_grid
self._pblk_sizes = param_blk_sizes
self._default_distribute_method = "circuits"
def _set_param_block_size(self, wrt_filter, wrt_block_size, comm):
if wrt_filter is None:
blkSize = wrt_block_size # could be None
if (comm is not None) and (comm.Get_size() > 1):
comm_blkSize = self.model.num_params / comm.Get_size()
blkSize = comm_blkSize if (blkSize is None) \
else min(comm_blkSize, blkSize) # override with smaller comm_blkSize
else:
blkSize = None # wrt_filter dictates block
return blkSize
def _bulk_fill_probs(self, array_to_fill, layout):
"""Note: we expect that array_to_fill points to the memory specifically for this processor
(a subset of the memory for the host when memory is shared) """
atom_resource_alloc = layout.resource_alloc('atom-processing')
atom_resource_alloc.host_comm_barrier() # ensure all procs have finished w/shared memory before we reinit
for atom in layout.atoms: # layout only holds local atoms
self._bulk_fill_probs_atom(array_to_fill[atom.element_slice], atom, atom_resource_alloc)
atom_resource_alloc.host_comm_barrier() # don't exit until all procs' array_to_fill is ready
# (may need to wait for the host leader to write to this proc's array_to_fill, as _block
# functions just ensure the lead proc eventually writes to the memory))
def _bulk_fill_probs_atom(self, array_to_fill, layout_atom, resource_alloc):
# if atom can be converted to a (sub)-layout, then we can just use machinery of base
# class (note: layouts hold their own resource-alloc, atom's don't)
self._bulk_fill_probs_block(array_to_fill, layout_atom.as_layout(resource_alloc))
def _bulk_fill_dprobs(self, array_to_fill, layout, pr_array_to_fill):
"""Note: we expect that array_to_fill points to the memory specifically for this processor
(a subset of the memory for the host when memory is shared) """
blkSize = layout.param_dimension_blk_sizes[0]
atom_resource_alloc = layout.resource_alloc('atom-processing')
param_resource_alloc = layout.resource_alloc('param-processing')
atom_resource_alloc.host_comm_barrier() # ensure all procs have finished w/shared memory before we reinit
# Note: use *largest* host comm that we fill - so 'atom' comm, not 'param' comm
host_param_slice = None # layout.host_param_slice # array_to_fill is already just this slice of the host mem
global_param_slice = layout.global_param_slice
for atom in layout.atoms:
#assert(_slct.length(atom.element_slice) == atom.num_elements) # for debugging
#print("DEBUG: Atom %d of %d slice=%s" % (iDB, len(layout.atoms), str(atom.element_slice)))
if pr_array_to_fill is not None:
self._bulk_fill_probs_atom(pr_array_to_fill[atom.element_slice], atom, atom_resource_alloc)
if blkSize is None: # avoid unnecessary slice_up_range and block loop logic in 'else' block
#Compute all of our derivative columns at once
self._bulk_fill_dprobs_atom(array_to_fill[atom.element_slice, :], host_param_slice, atom,
global_param_slice, param_resource_alloc)
else: # Divide columns into blocks of at most blkSize
Np = _slct.length(global_param_slice) # total number of parameters we're computing
nBlks = int(_np.ceil(Np / blkSize)) # num blocks required to achieve desired average size == blkSize
blocks = _mpit.slice_up_range(Np, nBlks) # blocks contain indices into final_array[host_param_slice]
for block in blocks:
host_param_slice_part = block # _slct.shift(block, host_param_slice.start) # into host's memory
global_param_slice_part = _slct.shift(block, global_param_slice.start) # actual parameter indices
self._bulk_fill_dprobs_atom(array_to_fill[atom.element_slice, :], host_param_slice_part, atom,
global_param_slice_part, param_resource_alloc)
atom_resource_alloc.host_comm_barrier() # don't exit until all procs' array_to_fill is ready
def _bulk_fill_dprobs_atom(self, array_to_fill, dest_param_slice, layout_atom, param_slice, resource_alloc):
# if atom can be converted to a (sub)-layout, then we can just use machinery of base
# class (note: layouts hold their own resource-alloc, atom's don't)
self._bulk_fill_dprobs_block(array_to_fill, dest_param_slice,
layout_atom.as_layout(resource_alloc), param_slice)
def _bulk_fill_hprobs(self, array_to_fill, layout,
pr_array_to_fill, deriv1_array_to_fill, deriv2_array_to_fill):
"""Note: we expect that array_to_fill points to the memory specifically for this processor
(a subset of the memory for the host when memory is shared) """
blkSize1 = layout.param_dimension_blk_sizes[0]
blkSize2 = layout.param_dimension_blk_sizes[1]
#Assume we're being called with a resource_alloc that's been setup by a distributed layout:
atom_resource_alloc = layout.resource_alloc('atom-processing')
param_resource_alloc = layout.resource_alloc('param-processing')
param2_resource_alloc = layout.resource_alloc('param2-processing')
atom_resource_alloc.host_comm_barrier() # ensure all procs have finished w/shared memory before we reinit
# Note: use *largest* host comm that we fill - so 'atom' comm, not 'param' comm
host_param_slice = None # layout.host_param_slice # array_to_fill is already just this slice of the host mem
host_param2_slice = None # layout.host_param2_slice # array_to_fill is already just this slice of the host mem
global_param_slice = layout.global_param_slice
global_param2_slice = layout.global_param2_slice
for atom in layout.atoms:
if pr_array_to_fill is not None:
self._bulk_fill_probs_atom(pr_array_to_fill[atom.element_slice], atom, atom_resource_alloc)
if blkSize1 is None and blkSize2 is None: # run 'else' block without unnecessary logic
#Compute all our derivative columns at once
if deriv1_array_to_fill is not None:
self._bulk_fill_dprobs_atom(deriv1_array_to_fill[atom.element_slice, :], host_param_slice,
atom, global_param_slice, param_resource_alloc)
if deriv2_array_to_fill is not None:
if deriv1_array_to_fill is not None and global_param_slice == global_param2_slice:
deriv2_array_to_fill[atom.element_slice, :] = deriv1_array_to_fill[atom.element_slice, :]
else:
self._bulk_fill_dprobs_atom(deriv2_array_to_fill[atom.element_slice, :], host_param2_slice,
atom, global_param2_slice, param2_resource_alloc)
self._bulk_fill_hprobs_atom(array_to_fill[atom.element_slice, :, :], host_param_slice,
host_param2_slice, atom, global_param_slice, global_param2_slice,
param2_resource_alloc)
else: # Divide columns into blocks of at most shape (blkSize1, blkSize2)
assert(blkSize1 is not None and blkSize2 is not None), \
"Both (or neither) of the Hessian block sizes must be specified!"
Np1 = _slct.length(global_param_slice)
Np2 = _slct.length(global_param2_slice)
nBlks1 = int(_np.ceil(Np1 / blkSize1))
nBlks2 = int(_np.ceil(Np2 / blkSize2))
# num blocks required to achieve desired average size == blkSize1 or blkSize2
blocks1 = _mpit.slice_up_range(Np1, nBlks1)
blocks2 = _mpit.slice_up_range(Np2, nBlks2)
for block1 in blocks1:
host_param_slice_part = block1 # _slct.shift(block1, host_param_slice.start) # into host's memory
global_param_slice_part = _slct.shift(block1, global_param_slice.start) # actual parameter indices
if deriv1_array_to_fill is not None:
self._bulk_fill_dprobs_atom(deriv1_array_to_fill[atom.element_slice, :], host_param_slice_part,
atom, global_param_slice_part, param_resource_alloc)
for block2 in blocks2:
host_param2_slice_part = block2 # into host's memory
global_param2_slice_part = _slct.shift(block2, global_param2_slice.start) # parameter indices
self._bulk_fill_hprobs_atom(array_to_fill[atom.element_slice, :],
host_param_slice_part, host_param2_slice_part, atom,
global_param_slice_part, global_param2_slice_part,
param2_resource_alloc)
#Fill deriv2_array_to_fill if we need to.
if deriv2_array_to_fill is not None:
if deriv1_array_to_fill is not None and global_param_slice == global_param2_slice:
deriv2_array_to_fill[atom.element_slice, :] = deriv1_array_to_fill[atom.element_slice, :]
else:
for block2 in blocks2:
host_param2_slice_part = block2 # into host's memory
global_param2_slice_part = _slct.shift(block2, global_param2_slice.start) # param indices
self._bulk_fill_dprobs_atom(deriv2_array_to_fill[atom.element_slice, :],
host_param2_slice_part, atom,
global_param2_slice_part, param_resource_alloc)
atom_resource_alloc.host_comm_barrier() # don't exit until all procs' array_to_fill is ready
def _bulk_fill_hprobs_atom(self, array_to_fill, dest_param_slice1, dest_param_slice2, layout_atom,
param_slice1, param_slice2, resource_alloc):
# if atom can be converted to a (sub)-layout, then we can just use machinery of base
# class (note: layouts hold their own resource-alloc, atom's don't)
self._bulk_fill_hprobs_block(array_to_fill, dest_param_slice1, dest_param_slice2,
layout_atom.as_layout(resource_alloc), param_slice1, param_slice2)
def _bulk_fill_hprobs_dprobs_atom(self, array_to_fill, deriv1_array_to_fill, deriv2_array_to_fill, atom,
param_slice1, param_slice2, resource_alloc):
#Note: this function can be called similarly to _bulk_fill_hprobs_atom or _bulk_fill_dprobs_atom
# in that array_to_fill is assumed to already be sized to the atom's elements, i.e.
# we provide array_to_fill and not array_to_fill[atom.alement_slice,...] when calling subroutines.
host_param_slice1 = host_param_slice2 = None # array_to_fill is already just this slice of the host mem
if deriv1_array_to_fill is not None:
self._bulk_fill_dprobs_atom(deriv1_array_to_fill, host_param_slice1, atom,
param_slice1, resource_alloc)
if deriv2_array_to_fill is not None:
if deriv1_array_to_fill is not None and param_slice1 == param_slice2:
deriv2_array_to_fill[:, :] = deriv1_array_to_fill[:, :]
else:
self._bulk_fill_dprobs_atom(deriv2_array_to_fill, host_param_slice2, atom,
param_slice2, resource_alloc)
self._bulk_fill_hprobs_atom(array_to_fill, host_param_slice1, host_param_slice2, atom,
param_slice1, param_slice2, resource_alloc)
def _iter_hprobs_by_rectangle(self, layout, wrt_slices_list, return_dprobs_12):
# Just needed for compatibility - so base `iter_hprobs_by_rectangle` knows to loop over atoms
# Similar to _iter_atom_hprobs_by_rectangle but runs over all atoms before yielding and
# yielded array has leading dim == # of local elements instead of just 1 atom's # elements.
nElements = layout.num_elements
resource_alloc = layout.resource_alloc()
for wrtSlice1, wrtSlice2 in wrt_slices_list:
if return_dprobs_12:
dprobs1, dprobs1_shm = _smt.create_shared_ndarray(resource_alloc, (nElements, _slct.length(wrtSlice1)),
'd', zero_out=True)
dprobs2, dprobs2_shm = _smt.create_shared_ndarray(resource_alloc, (nElements, _slct.length(wrtSlice2)),
'd', zero_out=True)
else:
dprobs1 = dprobs2 = dprobs1_shm = dprobs2_shm = None
hprobs, hprobs_shm = _smt.create_shared_ndarray(
resource_alloc, (nElements, _slct.length(wrtSlice1), _slct.length(wrtSlice2)),
'd', zero_out=True)
for atom in layout.atoms:
self._bulk_fill_hprobs_dprobs_atom(hprobs[atom.element_slice, :, :],
dprobs1[atom.element_slice, :] if (dprobs1 is not None) else None,
dprobs2[atom.element_slice, :] if (dprobs2 is not None) else None,
atom, wrtSlice1, wrtSlice2, resource_alloc)
#Note: we give resource_alloc as our local `resource_alloc` above because all the arrays
# have been allocated based on just this subset of processors, unlike a call to bulk_fill_hprobs(...)
# where the probs & dprobs are memory allocated and filled by a larger group of processors. (the main
# function of these args is to know which procs work together to fill the *same* values and which of
# these are on the *same* host so that only one per host actually writes to the assumed-shared memory.
if return_dprobs_12:
dprobs12 = dprobs1[:, :, None] * dprobs2[:, None, :] # (KM,N,1) * (KM,1,N') = (KM,N,N')
yield wrtSlice1, wrtSlice2, hprobs, dprobs12
else:
yield wrtSlice1, wrtSlice2, hprobs
_smt.cleanup_shared_ndarray(dprobs1_shm)
_smt.cleanup_shared_ndarray(dprobs2_shm)
_smt.cleanup_shared_ndarray(hprobs_shm)
def _iter_atom_hprobs_by_rectangle(self, atom, wrt_slices_list, return_dprobs_12, resource_alloc):
#FUTURE could make a resource_alloc.check_can_allocate_memory call here for ('epp', 'epp')?
nElements = atom.num_elements
for wrtSlice1, wrtSlice2 in wrt_slices_list:
if return_dprobs_12:
dprobs1, dprobs1_shm = _smt.create_shared_ndarray(resource_alloc, (nElements, _slct.length(wrtSlice1)),
'd', zero_out=True)
dprobs2, dprobs2_shm = _smt.create_shared_ndarray(resource_alloc, (nElements, _slct.length(wrtSlice2)),
'd', zero_out=True)
else:
dprobs1 = dprobs2 = dprobs1_shm = dprobs2_shm = None
hprobs, hprobs_shm = _smt.create_shared_ndarray(
resource_alloc, (nElements, _slct.length(wrtSlice1), _slct.length(wrtSlice2)),
'd', zero_out=True)
# Note: no need to index w/ [atom.element_slice,...] (compare with _iter_hprobs_by_rectangles)
# since these arrays are already sized to this particular atom (not to all the host's atoms)
self._bulk_fill_hprobs_dprobs_atom(hprobs, dprobs1, dprobs2, atom,
wrtSlice1, wrtSlice2, resource_alloc)
#Note: we give resource_alloc as our local `resource_alloc` above because all the arrays
# have been allocated based on just this subset of processors, unlike a call to bulk_fill_hprobs(...)
# where the probs & dprobs are memory allocated and filled by a larger group of processors. (the main
# function of these args is to know which procs work together to fill the *same* values and which of
# these are on the *same* host so that only one per host actually writes to the assumed-shared memory.
if return_dprobs_12:
dprobs12 = dprobs1[:, :, None] * dprobs2[:, None, :] # (KM,N,1) * (KM,1,N') = (KM,N,N')
yield wrtSlice1, wrtSlice2, hprobs, dprobs12
else:
yield wrtSlice1, wrtSlice2, hprobs
_smt.cleanup_shared_ndarray(dprobs1_shm)
_smt.cleanup_shared_ndarray(dprobs2_shm)
_smt.cleanup_shared_ndarray(hprobs_shm)
def _bulk_fill_timedep_deriv(self, layout, dataset, ds_circuits, num_total_outcomes,
deriv_array_to_fill, deriv_fill_fn, array_to_fill=None,
fill_fn=None):
"""
A helper method for computing (filling) the derivative of a time-dependent quantity.
A generic method providing the scaffolding used when computing (filling) the
derivative of a time-dependent quantity. In particular, it distributes the
computation among the subtrees of `eval_tree` and relies on the caller to supply
"compute_cache" and "compute_dcache" functions which just need to compute the
quantitiy being filled and its derivative given a sub-tree and a parameter-slice.
Parameters
----------
layout : TermCOPALayout
The layout specifiying the quantities (circuit outcome probabilities) to be
computed, and related information.
dataset : DataSet
the data set passed on to the computation functions.
ds_circuits : list of Circuits
the circuits to use as they should be queried from `dataset` (see
below). This is typically the same list of circuits used to
construct `layout` potentially with some aliases applied.
num_total_outcomes : list or array
a list of the total number of *possible* outcomes for each circuit
(so `len(num_total_outcomes) == len(ds_circuits)`). This is
needed for handling sparse data, where `dataset` may not contain
counts for all the possible outcomes of each circuit.
deriv_array_to_fill : numpy ndarray
an already-allocated ExM numpy array where E is the total number of
computed elements (i.e. layout.num_elements) and M is the
number of model parameters.
deriv_fill_fn : function
a function used to compute the objective funtion jacobian.
array_to_fill : numpy array, optional
when not None, an already-allocated length-E numpy array that is filled
with the per-circuit contributions computed using `fn` below.
fill_fn : function, optional
a function used to compute the objective function.
Returns
-------
None
"""
#Note: this function is similar to _bulk_fill_dprobs, and may be able to consolidated in the FUTURE.
blkSize = layout.param_dimension_blk_sizes[0]
atom_resource_alloc = layout.resource_alloc('atom-processing')
param_resource_alloc = layout.resource_alloc('param-processing')
assert(atom_resource_alloc.host_comm is None), \
"Shared memory is not supported in time-dependent calculations (yet)"
host_param_slice = layout.host_param_slice
global_param_slice = layout.global_param_slice
for atom in layout.atoms:
elInds = atom.element_slice
#NOTE: this block uses atom.orig_indices_by_expcircuit, which is specific to _MapCOPALayoutAtom - TODO
dataset_rows = {i_expanded: dataset[ds_circuits[i]]
for i_expanded, i in atom.orig_indices_by_expcircuit.items()}
num_outcomes = {i_expanded: num_total_outcomes[i]
for i_expanded, i in atom.orig_indices_by_expcircuit.items()}
if array_to_fill is not None:
fill_fn(array_to_fill, elInds, num_outcomes, atom, dataset_rows, atom_resource_alloc)
if blkSize is None: # wrt_filter gives entire computed parameter block
#Fill derivative cache info
deriv_fill_fn(deriv_array_to_fill, elInds, host_param_slice, num_outcomes, atom,
dataset_rows, global_param_slice, param_resource_alloc)
#profiler.mem_check("bulk_fill_dprobs: post fill")
else: # Divide columns into blocks of at most blkSize
Np = _slct.length(host_param_slice) # total number of parameters we're computing
nBlks = int(_np.ceil(Np / blkSize))
# num blocks required to achieve desired average size == blkSize
blocks = _mpit.slice_up_range(Np, nBlks)
for block in blocks:
host_param_slice_part = _slct.shift(block, host_param_slice.start) # into host's memory
global_param_slice_part = _slct.shift(block, global_param_slice.start) # actual parameter indices
deriv_fill_fn(deriv_array_to_fill, elInds, host_param_slice_part, num_outcomes, atom,
dataset_rows, global_param_slice_part, param_resource_alloc)
#profiler.mem_check("bulk_fill_dprobs: post fill blk")
def _run_on_atoms(self, layout, fn, resource_alloc):
"""Runs `fn` on all the atoms of `layout`, returning a list of the local (current processor) return values."""
local_results = [] # list of the return values just from the atoms run on *this* processor
for atom in layout.atoms:
local_results.append(fn(atom, resource_alloc))
return local_results
def _compute_processor_distribution(self, array_types, nprocs, num_params, num_circuits, default_natoms):
""" Computes commonly needed processor-grid info for distributed layout creation (a helper function)"""
parameter_dim_letters = _array_type_parameter_dimension_letters()
param_dim_cnts = [sum([array_type.count(l) for l in parameter_dim_letters]) for array_type in array_types]
max_param_dims = max(param_dim_cnts) if len(param_dim_cnts) > 0 else 0
bNp1Matters = bool(max_param_dims > 0)
bNp2Matters = bool(max_param_dims > 1)
param_dimensions = (num_params,) * (int(bNp1Matters) + int(bNp2Matters))
param_blk_sizes = (None,) * len(param_dimensions) if (self._pblk_sizes is None) \
else self._pblk_sizes[0:len(param_dimensions)] # automatically set these?
if self._processor_grid is not None:
assert(_np.prod(self._processor_grid) <= nprocs), "`processor_grid` must multiply to # of procs!"
na = self._processor_grid[0]
natoms = max(na, self._num_atoms) if (self._num_atoms is not None) else na
npp = ()
if bNp1Matters: npp += (self._processor_grid[1],)
if bNp2Matters: npp += (self._processor_grid[2],)
else:
if self._num_atoms is not None:
natoms = self._num_atoms
else:
natoms = default_natoms
natoms = min(natoms, num_circuits) # don't have more atoms than circuits
pblk = nprocs
if bNp2Matters:
na = _np.gcd(pblk, natoms); pblk //= na
np1 = _np.gcd(pblk, num_params); pblk //= np1
np2 = _mpit.closest_divisor(pblk, num_params); pblk //= np2 # last dim: don't demand we divide evenly
npp = (np1, np2)
elif bNp1Matters:
na = _np.gcd(pblk, natoms); pblk //= na
np1 = _mpit.closest_divisor(pblk, num_params); pblk //= np1 # last dim: don't demand we divide evenly
npp = (np1,)
else:
na = _mpit.closest_divisor(pblk, natoms); pblk //= na # last dim: don't demand we divide atoms evenly
npp = ()
return natoms, na, npp, param_dimensions, param_blk_sizes