/
python_executor_stacks.py
1044 lines (905 loc) · 44.2 KB
/
python_executor_stacks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2019, The TensorFlow Federated Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# pytype: skip-file
# This modules disables the Pytype analyzer, see
# https://github.com/tensorflow/federated/blob/main/docs/pytype.md for more
# information.
"""A collection of constructors for basic types of executor stacks."""
from concurrent import futures
import math
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
import warnings
from absl import logging
import attr
import cachetools
import grpc
import tensorflow as tf
from tensorflow_federated.python.common_libs import py_typecheck
from tensorflow_federated.python.core.impl.compiler import local_computation_factory_base
from tensorflow_federated.python.core.impl.compiler import tensorflow_computation_factory
from tensorflow_federated.python.core.impl.executors import eager_tf_executor
from tensorflow_federated.python.core.impl.executors import executor_base
from tensorflow_federated.python.core.impl.executors import executor_factory
from tensorflow_federated.python.core.impl.executors import executors_errors
from tensorflow_federated.python.core.impl.executors import federated_composing_strategy
from tensorflow_federated.python.core.impl.executors import federated_resolving_strategy
from tensorflow_federated.python.core.impl.executors import federating_executor
from tensorflow_federated.python.core.impl.executors import reference_resolving_executor
from tensorflow_federated.python.core.impl.executors import remote_executor
from tensorflow_federated.python.core.impl.executors import remote_executor_grpc_stub
from tensorflow_federated.python.core.impl.executors import remote_executor_stub
from tensorflow_federated.python.core.impl.executors import sequence_executor
from tensorflow_federated.python.core.impl.executors import sizing_executor
from tensorflow_federated.python.core.impl.executors import thread_delegating_executor
from tensorflow_federated.python.core.impl.types import placements
# Place a limit on the maximum size of the executor caches managed by the
# ExecutorFactories, to prevent unbounded thread and memory growth in the case
# of rapidly-changing cross-round cardinalities.
_EXECUTOR_CACHE_SIZE = 10
def _get_hashable_key(cardinalities: executor_factory.CardinalitiesType):
return tuple(sorted((str(k), v) for k, v in cardinalities.items()))
class ResourceManagingExecutorFactory(executor_factory.ExecutorFactory):
"""Implementation of executor factory holding an executor per cardinality."""
def __init__(self,
executor_stack_fn: Callable[[executor_factory.CardinalitiesType],
executor_base.Executor]):
"""Initializes `ResourceManagingExecutorFactory`.
`ResourceManagingExecutorFactory` manages a mapping from `cardinalities`
to `executor_base.Executors`, closing and destroying the executors in this
mapping when asked.
Args:
executor_stack_fn: Callable taking a mapping from
`placements.PlacementLiteral` to integers, and returning an
`executor_base.Executor`. The returned executor will be configured to
handle these cardinalities.
"""
py_typecheck.check_callable(executor_stack_fn)
self._executor_stack_fn = executor_stack_fn
self._executors = cachetools.LRUCache(_EXECUTOR_CACHE_SIZE)
def create_executor(
self, cardinalities: executor_factory.CardinalitiesType
) -> executor_base.Executor:
"""Constructs or gets existing executor.
Returns a previously-constructed executor if this method has already been
invoked with `cardinalities`. If not, invokes `self._executor_stack_fn`
with `cardinalities` and returns the result.
Args:
cardinalities: `dict` with `placements.PlacementLiteral` keys and integer
values, specifying the population size at each placement. The executor
stacks returned from this method are not themselves polymorphic; a
concrete stack must have fixed sizes at each placement.
Returns:
Instance of `executor_base.Executor` as described above.
"""
py_typecheck.check_type(cardinalities, dict)
key = _get_hashable_key(cardinalities)
ex = self._executors.get(key)
if ex is not None:
return ex
ex = self._executor_stack_fn(cardinalities)
py_typecheck.check_type(ex, executor_base.Executor)
self._executors[key] = ex
return ex
def clean_up_executor(self,
cardinalities: executor_factory.CardinalitiesType):
"""Calls `close` on constructed executors, resetting internal cache.
If a caller holds a name bound to any of the executors returned from
`create_executor` with the specified cardinalities, this executor
should be assumed to be in an invalid state, and should not be used after
this method is called. Instead, callers should again invoke
`create_executor`.
Args:
cardinalities: The cardinalities of the executor to be cleaned up.
"""
key = _get_hashable_key(cardinalities)
ex = self._executors.get(key)
if ex is None:
return
ex.close()
del self._executors[key]
@attr.s(auto_attribs=True, eq=False, order=False, frozen=True)
class SizeInfo(object):
"""Structure for size information from SizingExecutorFactory.get_size_info().
Attribues:
`broadcast_history`: 2D ragged list of 2-tuples which represents the
broadcast history.
`aggregate_history`: 2D ragged list of 2-tuples which represents the
aggregate history.
`broadcast_bits`: A list of shape [number_of_execs] representing the
number of broadcasted bits passed through each executor.
`aggregate_bits`: A list of shape [number_of_execs] representing the
number of aggregated bits passed through each executor.
"""
broadcast_history: Dict[Any, sizing_executor.SizeAndDTypes]
aggregate_history: Dict[Any, sizing_executor.SizeAndDTypes]
broadcast_bits: List[int]
aggregate_bits: List[int]
class SizingExecutorFactory(ResourceManagingExecutorFactory):
"""A executor factory holding an executor per cardinality."""
def __init__(
self,
executor_stack_fn: Callable[[executor_factory.CardinalitiesType],
Tuple[executor_base.Executor,
List[sizing_executor.SizingExecutor]]]):
"""Initializes `SizingExecutorFactory`.
Args:
executor_stack_fn: Similar to base class but the second return value of
the callable is used to expose the SizingExecutors.
"""
super().__init__(executor_stack_fn)
# Sizing executors are intended to record the entire history of execution,
# and therefore we don't want to be silently clearing them. So we leave
# sizing_executors as a proper dict.
self._sizing_executors = {}
def create_executor(
self, cardinalities: executor_factory.CardinalitiesType
) -> executor_base.Executor:
"""See base class."""
py_typecheck.check_type(cardinalities, dict)
key = _get_hashable_key(cardinalities)
ex = self._executors.get(key)
if ex is not None:
return ex
ex, sizing_executors = self._executor_stack_fn(cardinalities)
self._sizing_executors[key] = []
for executor in sizing_executors:
if not isinstance(executor, sizing_executor.SizingExecutor):
raise ValueError('Expected all input executors to be sizing executors')
self._sizing_executors[key].append(executor)
py_typecheck.check_type(ex, executor_base.Executor)
self._executors[key] = ex
return ex
def get_size_info(self) -> SizeInfo:
"""Returns information about the transferred data of each SizingExecutor.
Returns the history of broadcast and aggregation for each executor as well
as the number of aggregated bits that has been passed through.
Returns:
An instance of `SizeInfo`.
"""
size_ex_dict = self._sizing_executors
def _extract_history(sizing_exs: List[sizing_executor.SizingExecutor]):
broadcast_history, aggregate_history = [], []
for ex in sizing_exs:
broadcast_history.extend(ex.broadcast_history)
aggregate_history.extend(ex.aggregate_history)
return broadcast_history, aggregate_history
broadcast_history, aggregate_history = {}, {}
for key, size_exs in size_ex_dict.items():
current_broadcast_history, current_aggregate_history = _extract_history(
size_exs)
broadcast_history[key] = current_broadcast_history
aggregate_history[key] = current_aggregate_history
broadcast_bits = [
self._calculate_bit_size(hist) for hist in broadcast_history.values()
]
aggregate_bits = [
self._calculate_bit_size(hist) for hist in aggregate_history.values()
]
return SizeInfo(
broadcast_history=broadcast_history,
aggregate_history=aggregate_history,
broadcast_bits=broadcast_bits,
aggregate_bits=aggregate_bits)
def _bits_per_element(self, dtype: tf.dtypes.DType) -> int:
"""Returns the number of bits that a tensorflow DType uses per element."""
if dtype == tf.string:
return 8
elif dtype == tf.bool:
return 1
return dtype.size * 8
def _calculate_bit_size(self, history: sizing_executor.SizeAndDTypes) -> int:
"""Takes a list of 2 element lists and calculates the number of bits represented.
The input list should follow the format of self.broadcast_history or
self.aggregate_history. That is, each 2 element list should be
[num_elements, dtype].
Args:
history: The history of values passed through the executor.
Returns:
The number of bits represented in the history.
"""
bit_size = 0
for num_elements, dtype in history:
bit_size += num_elements * self._bits_per_element(dtype)
return bit_size
# pylint:disable=missing-function-docstring
def _wrap_executor_in_threading_stack(ex: executor_base.Executor,
support_sequence_ops: bool = False,
can_resolve_references=True):
threaded_ex = thread_delegating_executor.ThreadDelegatingExecutor(ex)
if support_sequence_ops:
if not can_resolve_references:
raise ValueError(
'Support for sequence ops requires ability to resolve references.')
threaded_ex = sequence_executor.SequenceExecutor(
reference_resolving_executor.ReferenceResolvingExecutor(threaded_ex))
if can_resolve_references:
threaded_ex = reference_resolving_executor.ReferenceResolvingExecutor(
threaded_ex)
return threaded_ex
class UnplacedExecutorFactory(executor_factory.ExecutorFactory):
"""ExecutorFactory to construct executors which cannot understand placement.
This factory constructs executors which represent "local execution": work
that happens at the clients, at the server, or without placements. As such,
this executor manages the placement of work on local executors.
"""
def __init__(self,
*,
support_sequence_ops: bool = False,
can_resolve_references: bool = True,
server_device: Optional[tf.config.LogicalDevice] = None,
client_devices: Optional[Sequence[tf.config.LogicalDevice]] = (),
leaf_executor_fn=eager_tf_executor.EagerTFExecutor):
self._support_sequence_ops = support_sequence_ops
self._can_resolve_references = can_resolve_references
self._server_device = server_device
self._client_devices = client_devices
self._client_device_index = 0
self._leaf_executor_fn = leaf_executor_fn
def _get_next_client_device(self) -> Optional[tf.config.LogicalDevice]:
if not self._client_devices:
return None
device = self._client_devices[self._client_device_index]
self._client_device_index = (self._client_device_index + 1) % len(
self._client_devices)
return device
def create_executor(
self,
*,
cardinalities: Optional[executor_factory.CardinalitiesType] = None,
placement: Optional[placements.PlacementLiteral] = None
) -> executor_base.Executor:
if cardinalities:
raise ValueError(
'Unplaced executors cannot accept nonempty cardinalities as '
'arguments. Received cardinalities: {}.'.format(cardinalities))
if placement == placements.CLIENTS:
device = self._get_next_client_device()
elif placement == placements.SERVER:
device = self._server_device
else:
device = None
leaf_ex = self._leaf_executor_fn(device=device)
return _wrap_executor_in_threading_stack(
leaf_ex,
support_sequence_ops=self._support_sequence_ops,
can_resolve_references=self._can_resolve_references)
def clean_up_executor(self,
cardinalities: executor_factory.CardinalitiesType):
# Does not hold any executors internally, so nothing to clean up.
pass
class FederatingExecutorFactory(executor_factory.ExecutorFactory):
"""Executor factory for stacks which delegate placed computations.
`FederatingExecutorFactory` validates cardinality requests and manages
the relationship between the clients and the client executors. Additionally,
`FederatingExecutorFactory` allows for the measurement of tensors crossing
the federated communication boundary via the
`sizing_executor.SizingExecutor`.
This factory is initialized with:
* An integer number of clients per thread, indicating the number of client
executors that should be run in a single thread. Setting this parameter to
a high number can aid with OOMs on accelerators, or speed up the
computation in the case of extremely lightweight client work.
* An `UnplacedExecutorFactory` to use to construct the executors for
computations after they have had their placement ingested and stripped by
the `FederatingExecutor`. That is, this factory produces the executors
used to run client, server and unplaced computations.
* A number of default clients. If client cardinalities cannot be inferred
from data, this value will be used as the default number of clients to
be supported by the returned executors.
* A boolean `use_sizing` to indicate whether to wire instances of
`sizing_executors.SizingExecutor` on top of the client stacks.
* An optional instance of `LocalComputationFactory` to use to construct
local computations used as parameters in certain federated operators
(such as `tff.federated_sum`, etc.). Defaults to a TensorFlow factory.
"""
def __init__(self,
*,
clients_per_thread: int,
unplaced_ex_factory: UnplacedExecutorFactory,
default_num_clients: int = 0,
use_sizing: bool = False,
local_computation_factory: local_computation_factory_base
.LocalComputationFactory = tensorflow_computation_factory
.TensorFlowComputationFactory(),
federated_strategy_factory=federated_resolving_strategy
.FederatedResolvingStrategy.factory):
py_typecheck.check_type(clients_per_thread, int)
py_typecheck.check_type(unplaced_ex_factory, UnplacedExecutorFactory)
py_typecheck.check_type(
local_computation_factory,
local_computation_factory_base.LocalComputationFactory)
self._clients_per_thread = clients_per_thread
self._unplaced_executor_factory = unplaced_ex_factory
py_typecheck.check_type(default_num_clients, int)
if default_num_clients < 0:
raise ValueError('Number of clients must be nonnegative.')
self._default_num_clients = default_num_clients
self._use_sizing = use_sizing
if self._use_sizing:
self._sizing_executors = []
else:
self._sizing_executors = None
self._federated_strategy_factory = federated_strategy_factory
self._local_computation_factory = local_computation_factory
@property
def sizing_executors(self) -> List[sizing_executor.SizingExecutor]:
if not self._use_sizing:
raise ValueError('This federated factory is not configured to produce '
'size information. Construct a new federated factory, '
'passing argument `use_sizing=True`.')
else:
return self._sizing_executors
def _validate_requested_clients(
self, cardinalities: executor_factory.CardinalitiesType) -> int:
num_requested_clients = cardinalities.get(placements.CLIENTS)
if num_requested_clients is None:
return self._default_num_clients
return num_requested_clients
def create_executor(
self, cardinalities: executor_factory.CardinalitiesType
) -> executor_base.Executor:
"""Constructs a federated executor with requested cardinalities."""
num_clients = self._validate_requested_clients(cardinalities)
num_client_executors = math.ceil(num_clients / self._clients_per_thread)
client_stacks = [
self._unplaced_executor_factory.create_executor(
cardinalities={}, placement=placements.CLIENTS)
for _ in range(num_client_executors)
]
if self._use_sizing:
client_stacks = [
sizing_executor.SizingExecutor(ex) for ex in client_stacks
]
self._sizing_executors.extend(client_stacks)
federating_strategy_factory = self._federated_strategy_factory(
{
placements.CLIENTS: [
client_stacks[k % len(client_stacks)]
for k in range(num_clients)
],
placements.SERVER:
self._unplaced_executor_factory.create_executor(
placement=placements.SERVER),
},
local_computation_factory=self._local_computation_factory)
unplaced_executor = self._unplaced_executor_factory.create_executor()
executor = federating_executor.FederatingExecutor(
federating_strategy_factory, unplaced_executor)
return _wrap_executor_in_threading_stack(executor)
def clean_up_executor(self,
cardinalities: executor_factory.CardinalitiesType):
# Does not hold any executors internally, so nothing to clean up.
pass
def create_minimal_length_flat_stack_fn(
max_clients_per_stack: int,
federated_stack_factory: executor_factory.ExecutorFactory
) -> Callable[[executor_factory.CardinalitiesType],
List[executor_base.Executor]]:
"""Creates a function returning a list of executors to run `cardinalities`.
This list is of minimal length among all lists subject to the constraint that
each executor can be responsible for executing no more than
`max_clients_per_stack`. For example, given a `cardinalities` argument of 10
and `max_clients_per_stack` of 3, this function may return a list of
executors 3 of which run 3 clients and 1 of which runs 1, or 2 of which run 3
and 2 of which run 2.
Args:
max_clients_per_stack: Integer determining the maximum number of clients a
single executor in the list returned by the function may execute.
federated_stack_factory: The `executor_factory.ExecutorFactory` for use in
actually constructing these executors.
Returns:
A callable taking a parameter of type `executor_factory.CardinalitiesType`,
and returning a list of `executor_base.Executors` which in total can execute
the cardinalities specified by the argument. This callable will raise if it
is passed a cardinalitites dict with a negative number of clients.
"""
def create_executor_list(
cardinalities: executor_factory.CardinalitiesType
) -> List[executor_base.Executor]:
num_clients = cardinalities.get(placements.CLIENTS, 0)
if num_clients < 0:
raise ValueError('Number of clients cannot be negative.')
elif num_clients < 1:
return [
federated_stack_factory.create_executor(cardinalities=cardinalities)
]
executors = []
while num_clients > 0:
n = min(num_clients, max_clients_per_stack)
sub_executor_cardinalities = {**cardinalities}
sub_executor_cardinalities[placements.CLIENTS] = n
executors.append(
federated_stack_factory.create_executor(sub_executor_cardinalities))
num_clients -= n
return executors
return create_executor_list
class ComposingExecutorFactory(executor_factory.ExecutorFactory):
"""Factory class encapsulating executor compositional logic.
This class is responsible for aggregating lists of executors into a
compositional hierarchy based on the `max_fanout` parameter.
"""
def __init__(self,
*,
max_fanout: int,
unplaced_ex_factory: UnplacedExecutorFactory,
flat_stack_fn: Callable[[executor_factory.CardinalitiesType],
Sequence[executor_base.Executor]],
local_computation_factory: local_computation_factory_base
.LocalComputationFactory = tensorflow_computation_factory
.TensorFlowComputationFactory()):
if max_fanout < 2:
raise ValueError('Max fanout must be greater than 1.')
self._flat_stack_fn = flat_stack_fn
self._max_fanout = max_fanout
self._unplaced_ex_factory = unplaced_ex_factory
self._local_computation_factory = local_computation_factory
def create_executor(
self, cardinalities: executor_factory.CardinalitiesType
) -> executor_base.Executor:
"""Creates an executor hierarchy of maximum width `self._max_fanout`.
First creates a flat list of executors to aggregate, using the
`flat_stack_fn` passed in at initialization. Then composes this list
into a hierarchy of width at most `self._max_fanout`.
Args:
cardinalities: A mapping from placements to integers specifying the
cardinalities at each placement
Returns:
An `executor_base.Executor` satisfying the conditions above.
"""
executors = self._flat_stack_fn(cardinalities)
return self._aggregate_stacks(executors)
def clean_up_executor(self,
cardinalities: executor_factory.CardinalitiesType):
"""Holds no executors internally, so passes on cleanup."""
pass
def _create_composing_stack(
self, *, target_executors: Sequence[executor_base.Executor]
) -> executor_base.Executor:
server_executor = self._unplaced_ex_factory.create_executor(
placement=placements.SERVER)
composing_strategy_factory = federated_composing_strategy.FederatedComposingStrategy.factory(
server_executor,
target_executors,
local_computation_factory=self._local_computation_factory)
unplaced_executor = self._unplaced_ex_factory.create_executor()
composing_executor = federating_executor.FederatingExecutor(
composing_strategy_factory, unplaced_executor)
threaded_composing_executor = _wrap_executor_in_threading_stack(
composing_executor, can_resolve_references=False)
return threaded_composing_executor
def _aggregate_stacks(
self,
executors: Sequence[executor_base.Executor],
) -> executor_base.Executor:
"""Hierarchically aggregates a sequence of executors via composing strategy.
Constructs as many levels as it takes to support all executors, reducing
by the factor of `self._max_fanout` in each iteration, for up to
`log(len(address_list)) / log(max_fanout)` iterations.
Args:
executors: Sequence of `executor_base.Executors` to aggregate into a
composing hierarchy.
Returns:
A single `executor_base.Executor` representing the aggregated hierarchy.
The particular architecture of this hierarchy depends on the interplay
between `self._max_fanout` and the length of the sequence of executors.
Raises:
RuntimeError: If hierarchy construction fails.
"""
if len(executors) <= 1:
return reference_resolving_executor.ReferenceResolvingExecutor(
self._create_composing_stack(target_executors=executors))
while len(executors) > 1:
new_executors = []
offset = 0
while offset < len(executors):
new_offset = offset + self._max_fanout
target_executors = executors[offset:new_offset]
composing_executor = self._create_composing_stack(
target_executors=target_executors)
new_executors.append(composing_executor)
offset = new_offset
executors = new_executors
if len(executors) != 1:
raise RuntimeError('Expected 1 executor, got {}.'.format(len(executors)))
return reference_resolving_executor.ReferenceResolvingExecutor(executors[0])
def normalize_num_clients_and_default_num_clients(
num_clients: Optional[int], default_num_clients: int) -> int:
if num_clients is not None:
warnings.warn('num_clients is deprecated; please use default_num_clients '
'instead.')
py_typecheck.check_type(num_clients, int)
return num_clients
return default_num_clients
def local_executor_factory(
default_num_clients: int = 0,
max_fanout=100,
clients_per_thread=1,
server_tf_device=None,
client_tf_devices=tuple(),
reference_resolving_clients=True,
support_sequence_ops=False,
leaf_executor_fn=eager_tf_executor.EagerTFExecutor,
local_computation_factory=tensorflow_computation_factory
.TensorFlowComputationFactory(),
) -> executor_factory.ExecutorFactory:
"""Constructs an executor factory to execute computations locally.
Note: The `tff.federated_secure_sum_bitwidth()` intrinsic is not implemented
by this executor.
Args:
default_num_clients: The number of clients to run by default if cardinality
cannot be inferred from arguments.
max_fanout: The maximum fanout at any point in the aggregation hierarchy. If
`num_cients > max_fanout`, the constructed executor stack will consist of
multiple levels of aggregators. The height of the stack will be on the
order of `log(num_cients) / log(max_fanout)`.
clients_per_thread: Integer number of clients for each of TFF's threads to
run in sequence. Increasing `clients_per_thread` therefore reduces the
concurrency of the TFF runtime, which can be useful if client work is very
lightweight or models are very large and multiple copies cannot fit in
memory.
server_tf_device: A `tf.config.LogicalDevice` to place server and other
computation without explicit TFF placement.
client_tf_devices: List/tuple of `tf.config.LogicalDevice` to place clients
for simulation. Possibly accelerators returned by
`tf.config.list_logical_devices()`.
reference_resolving_clients: Boolean indicating whether executors
representing clients must be able to handle unplaced TFF lambdas.
support_sequence_ops: Boolean indicating whether this executor supports
sequence ops (currently False by default).
leaf_executor_fn: A function that constructs leaf-level executors. Default
is the eager TF executor (other possible options: XLA, IREE). Should
accept the `device` keyword argument if the executor is to be configured
with explicitly chosen devices.
local_computation_factory: An instance of `LocalComputationFactory` to use
to construct local computations used as parameters in certain federated
operators (such as `tff.federated_sum`, etc.). Defaults to a TensorFlow
computation factory that generates TensorFlow code.
Returns:
An instance of `executor_factory.ExecutorFactory` encapsulating the
executor construction logic specified above.
Raises:
ValueError: If the number of clients is specified and not one or larger.
"""
if server_tf_device is not None:
py_typecheck.check_type(server_tf_device, tf.config.LogicalDevice)
py_typecheck.check_type(client_tf_devices, (tuple, list))
py_typecheck.check_type(max_fanout, int)
py_typecheck.check_type(clients_per_thread, int)
if max_fanout < 2:
raise ValueError('Max fanout must be greater than 1.')
unplaced_ex_factory = UnplacedExecutorFactory(
support_sequence_ops=support_sequence_ops,
can_resolve_references=reference_resolving_clients,
server_device=server_tf_device,
client_devices=client_tf_devices,
leaf_executor_fn=leaf_executor_fn)
federating_executor_factory = FederatingExecutorFactory(
clients_per_thread=clients_per_thread,
unplaced_ex_factory=unplaced_ex_factory,
default_num_clients=default_num_clients,
use_sizing=False,
local_computation_factory=local_computation_factory)
flat_stack_fn = create_minimal_length_flat_stack_fn(
max_fanout, federating_executor_factory)
full_stack_factory = ComposingExecutorFactory(
max_fanout=max_fanout,
unplaced_ex_factory=unplaced_ex_factory,
flat_stack_fn=flat_stack_fn,
local_computation_factory=local_computation_factory)
def _factory_fn(cardinalities):
if cardinalities.get(placements.CLIENTS, 0) < max_fanout:
return federating_executor_factory.create_executor(cardinalities)
return full_stack_factory.create_executor(cardinalities)
return ResourceManagingExecutorFactory(_factory_fn)
def thread_debugging_executor_factory(
default_num_clients: int = 0,
clients_per_thread=1,
leaf_executor_fn=eager_tf_executor.EagerTFExecutor,
) -> executor_factory.ExecutorFactory:
r"""Constructs a simplified execution stack to execute local computations.
The constructed executors support a limited set of TFF's computations. In
particular, the debug executor can only resolve references at the top level
of the stack, and therefore assumes that all local computation is expressed
in pure TensorFlow.
The debug executor makes particular guarantees about the structure of the
stacks it constructs which are intended to make them maximally easy to reason
about. That is, the debug executor will essentially execute exactly the
computation it is passed (in particular, this implies that there are no
caching layers), and uses its declared inability to execute arbitrary TFF
lambdas to reduce the complexity of the constructed stack. Every debugging
executor will have a similar structure, the simplest structure that can
execute the full expressivity of TFF while running each client in a dedicated
thread:
ReferenceResolvingExecutor
|
FederatingExecutor
/ ... \
ThreadDelegatingExecutor ThreadDelegatingExecutor
| |
EagerTFExecutor EagerTFExecutor
This structure can be useful in understanding the concurrency pattern of TFF
execution, and where the TFF runtime infers data dependencies.
Args:
default_num_clients: The number of clients to run by default if cardinality
cannot be inferred from arguments.
clients_per_thread: Integer number of clients for each of TFF's threads to
run in sequence. Increasing `clients_per_thread` therefore reduces the
concurrency of the TFF runtime, which can be useful if client work is very
lightweight or models are very large and multiple copies cannot fit in
memory.
leaf_executor_fn: A function that constructs leaf-level executors. Default
is the eager TF executor (other possible options: XLA, IREE). Should
accept the `device` keyword argument if the executor is to be configured
with explicitly chosen devices.
Returns:
An instance of `executor_factory.ExecutorFactory` encapsulating the
executor construction logic specified above.
Raises:
ValueError: If the number of clients is specified and not one or larger.
"""
py_typecheck.check_type(clients_per_thread, int)
unplaced_ex_factory = UnplacedExecutorFactory(
can_resolve_references=False, leaf_executor_fn=leaf_executor_fn)
federating_executor_factory = FederatingExecutorFactory(
clients_per_thread=clients_per_thread,
unplaced_ex_factory=unplaced_ex_factory,
default_num_clients=default_num_clients,
use_sizing=False)
return ResourceManagingExecutorFactory(
federating_executor_factory.create_executor)
def sizing_executor_factory(
default_num_clients: int = 0,
max_fanout: int = 100,
clients_per_thread: int = 1,
leaf_executor_fn=eager_tf_executor.EagerTFExecutor,
) -> executor_factory.ExecutorFactory:
"""Constructs an executor factory to execute computations locally with sizing.
Args:
default_num_clients: The number of clients to run by default if cardinality
cannot be inferred from arguments.
max_fanout: The maximum fanout at any point in the aggregation hierarchy. If
`num_clients > max_fanout`, the constructed executor stack will consist of
multiple levels of aggregators. The height of the stack will be on the
order of `log(num_clients) / log(max_fanout)`.
clients_per_thread: Integer number of clients for each of TFF's threads to
run in sequence. Increasing `clients_per_thread` therefore reduces the
concurrency of the TFF runtime, which can be useful if client work is very
lightweight or models are very large and multiple copies cannot fit in
memory.
leaf_executor_fn: A function that constructs leaf-level executors. Default
is the eager TF executor (other possible options: XLA, IREE). Should
accept the `device` keyword argument if the executor is to be configured
with explicitly chosen devices.
Returns:
An instance of `executor_factory.ExecutorFactory` encapsulating the
executor construction logic specified above.
Raises:
ValueError: If the number of clients is specified and not one or larger.
"""
py_typecheck.check_type(max_fanout, int)
py_typecheck.check_type(clients_per_thread, int)
if max_fanout < 2:
raise ValueError('Max fanout must be greater than 1.')
unplaced_ex_factory = UnplacedExecutorFactory(
leaf_executor_fn=leaf_executor_fn)
federating_executor_factory = FederatingExecutorFactory(
clients_per_thread=clients_per_thread,
unplaced_ex_factory=unplaced_ex_factory,
default_num_clients=default_num_clients,
use_sizing=True)
flat_stack_fn = create_minimal_length_flat_stack_fn(
max_fanout, federating_executor_factory)
full_stack_factory = ComposingExecutorFactory(
max_fanout=max_fanout,
unplaced_ex_factory=unplaced_ex_factory,
flat_stack_fn=flat_stack_fn)
def _factory_fn(
cardinalities: executor_factory.CardinalitiesType
) -> executor_base.Executor:
if cardinalities.get(placements.CLIENTS, 0) < max_fanout:
executor = federating_executor_factory.create_executor(cardinalities)
else:
executor = full_stack_factory.create_executor(cardinalities)
sizing_executor_list = federating_executor_factory.sizing_executors
return executor, sizing_executor_list
return SizingExecutorFactory(_factory_fn)
class ReconstructOnChangeExecutorFactory(executor_factory.ExecutorFactory):
"""ExecutorFactory exposing hook to construct executors on environment change.
When the initialization parameter `change_query` returns `True`,
ReconstructOnChangeExecutorFactory` constructs a new executor, bypassing
any previously constructed executors.
"""
def __init__(self,
underlying_stack: executor_factory.ExecutorFactory,
change_query: Callable[[executor_factory.CardinalitiesType],
bool] = lambda _: True):
self._change_query = change_query
self._underlying_stack = underlying_stack
self._executors = cachetools.LRUCache(_EXECUTOR_CACHE_SIZE)
def create_executor(
self, cardinalities: executor_factory.CardinalitiesType
) -> executor_base.Executor:
"""Returns a new or existing executor, depending on `change_query`.
`create_executor` constructs a new executor whenever `change_query` returns
`True` when called with argument `cardinalities`. If `change_query` returns
`False`, `create_executor` is free to inspect its internal executor cache
and return a previously constructed executor if one is available.
Args:
cardinalities: A mapping from placement literals to ints.
Returns:
An `executor_base.Executor` obeying the semantics above.
"""
py_typecheck.check_type(cardinalities, dict)
key = _get_hashable_key(cardinalities)
if self._change_query(cardinalities):
reconstructed = self._underlying_stack.create_executor(cardinalities)
self._executors[key] = reconstructed
return reconstructed
elif self._executors.get(key):
return self._executors[key]
else:
constructed = self._underlying_stack.create_executor(cardinalities)
self._executors[key] = constructed
return constructed
def clean_up_executor(self,
cardinalities: executor_factory.CardinalitiesType):
key = _get_hashable_key(cardinalities)
ex = self._executors.get(key)
if ex is None:
return
ex.close()
del self._executors[key]
self._underlying_stack.clean_up_executor(cardinalities)
class _CardinalitiesOrReadyListChanged():
"""Callable checking for changes to either the argument or a ready list.
Note: the contents of the provided list are expected to change over time.
Elements of the list must offer an `is_ready` property which will be checked
each time.
"""
def __init__(self, maybe_ready_list):
self._previous_cardinalities = None
self._previous_ready_list = ()
self._maybe_ready_list = maybe_ready_list
def __call__(self, cardinalities: executor_factory.CardinalitiesType) -> bool:
cardinalities_changed = self._previous_cardinalities != cardinalities
ready_list = tuple(x for x in self._maybe_ready_list if x.is_ready)
ready_list_changed = ready_list != self._previous_ready_list
self._previous_cardinalities = cardinalities
self._previous_ready_list = ready_list
return cardinalities_changed or ready_list_changed
def _configure_remote_workers(default_num_clients, stubs, thread_pool_executor,
dispose_batch_size):
""""Configures `default_num_clients` across `remote_executors`."""
available_stubs = [stub for stub in stubs if stub.is_ready]
logging.info('%s TFF workers available out of a total of %s.',
len(available_stubs), len(stubs))
if not available_stubs:
raise executors_errors.RetryableError(
'No workers are ready; try again to reconnect.')
remaining_clients = default_num_clients
live_workers = []
for stub_idx, stub in enumerate(available_stubs):
remaining_stubs = len(available_stubs) - stub_idx
default_num_clients_to_host = remaining_clients // remaining_stubs
remaining_clients -= default_num_clients_to_host
if default_num_clients_to_host > 0:
ex = remote_executor.RemoteExecutor(stub, thread_pool_executor,
dispose_batch_size)
ex.set_cardinalities({placements.CLIENTS: default_num_clients_to_host})
live_workers.append(ex)
return [
_wrap_executor_in_threading_stack(e, can_resolve_references=False)
for e in live_workers
]
def remote_executor_factory(
channels: List[grpc.Channel],
thread_pool_executor: Optional[futures.Executor] = None,
dispose_batch_size: int = 20,
max_fanout: int = 100,
default_num_clients: int = 0,
) -> executor_factory.ExecutorFactory:
"""Create an executor backed by remote workers.
Args:
channels: A list of `grpc.Channels` hosting services which can execute TFF
work.
thread_pool_executor: Optional concurrent.futures.Executor used to wait for
the reply to a streaming RPC message. Uses the default Executor if not
specified.
dispose_batch_size: The batch size for requests to dispose of remote worker
values. Lower values will result in more requests to the remote worker,
but will result in values being cleaned up sooner and therefore may result
in lower memory usage on the remote worker.
max_fanout: The maximum fanout at any point in the aggregation hierarchy. If
`num_clients > max_fanout`, the constructed executor stack will consist of
multiple levels of aggregators. The height of the stack will be on the
order of `log(default_num_clients) / log(max_fanout)`.
default_num_clients: The number of clients to use for simulations where the
number of clients cannot be inferred. Usually the number of clients will
be inferred from the number of values passed to computations which accept
client-placed values. However, when this inference isn't possible (such as
in the case of a no-argument or non-federated computation) this default
will be used instead.
Returns:
An instance of `executor_factory.ExecutorFactory` encapsulating the
executor construction logic specified above.
"""
py_typecheck.check_type(channels, list)
if not channels:
raise ValueError('The list of channels cannot be empty.')
if thread_pool_executor is not None:
py_typecheck.check_type(thread_pool_executor, futures.Executor)
py_typecheck.check_type(dispose_batch_size, int)
py_typecheck.check_type(max_fanout, int)
py_typecheck.check_type(default_num_clients, int)
stubs = [
remote_executor_grpc_stub.RemoteExecutorGrpcStub(channel)
for channel in channels
]
return remote_executor_factory_from_stubs(stubs, thread_pool_executor,
dispose_batch_size, max_fanout,
default_num_clients)
def remote_executor_factory_from_stubs(
stubs: List[Union[remote_executor_grpc_stub.RemoteExecutorGrpcStub,
remote_executor_stub.RemoteExecutorStub]],
thread_pool_executor: Optional[futures.Executor] = None,
dispose_batch_size: int = 20,
max_fanout: int = 100,
default_num_clients: int = 0,
) -> executor_factory.ExecutorFactory:
"""Create an executor backed by remote workers.
Args:
stubs: A list stubs to the TFF executor service, running on remote machines.
thread_pool_executor: Optional concurrent.futures.Executor used to wait for
the reply to a streaming RPC message. Uses the default Executor if not