-
Notifications
You must be signed in to change notification settings - Fork 74k
/
hlo.proto
608 lines (486 loc) · 19.6 KB
/
hlo.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This proto file defines messages which represent the HLO module. This is a
// full fidelity serialization of the c++ HLO constructs.
//
// Many of the protos below are simple 1-to-1 serializations of the
// corresponding C++ classes, e.g., HloModule, HloComputation, and
// HloInstruction.
//
// FIELD NAMES ARE IMPORTANT
//
// Unlike most protos, you can't safely change the names of fields, even if you
// keep the numeric ids the same. This is because we sometimes serialize these
// protos as JSON, which includes the field names in the serialization.
syntax = "proto3";
package xla;
import "tensorflow/compiler/xla/xla_data.proto";
option cc_enable_arenas = true;
enum CustomCallSchedule {
SCHEDULE_NONE = 0;
SCHEDULE_LATEST = 1;
SCHEDULE_EARLIEST = 2;
}
// Serialization of HloInstruction.
// Next ID: 77
message HloInstructionProto {
reserved 10;
reserved "parameter_name";
reserved 12;
reserved "fused_instructions_computation";
reserved 4;
reserved "operand_names";
reserved 5;
reserved "control_predecessor_names";
reserved 6;
reserved "called_computation_names";
reserved 44;
reserved "replica_group_ids";
// Use backend_config instead for custom_call_opaque.
reserved 53;
reserved "custom_call_opaque";
// Use backend_config instead for all_reduce_barrier.
reserved 46;
reserved "all_reduce_barrier";
string name = 1;
string opcode = 2;
xla.ShapeProto shape = 3;
xla.OpMetadata metadata = 7;
// Literal, only present for kConstant.
xla.LiteralProto literal = 8;
// Parameter number is only present for kParameter.
int64 parameter_number = 9;
// Fusion state, only present for kFusion.
string fusion_kind = 11;
// Index for kGetTupleElement.
int64 tuple_index = 13;
// Dimensions present for some operations that require reshaping or
// broadcasting, including Reshape, Reduce, ReduceWindow, and Reverse.
repeated int64 dimensions = 14;
// Describes the window in a windowed operation such as convolution.
xla.Window window = 15;
// Describes the dimension numbers used for a convolution.
xla.ConvolutionDimensionNumbers convolution_dimension_numbers = 16;
// The number of feature groups. Used for a convolution. Must be a divisor of
// the input feature dimension and output feature dimension. If not specified,
// it will use a default value of 1.
int64 feature_group_count = 50;
int64 batch_group_count = 58;
// Describes the [begin, end) index range and stride for slices.
message SliceDimensions {
int64 start = 1;
int64 limit = 2;
int64 stride = 3;
}
repeated SliceDimensions slice_dimensions = 17;
// The bit sizes for a reduce-precision operation.
int32 exponent_bits = 18;
int32 mantissa_bits = 19;
// Describes the [start, start + size) range size for a dynamic slice
// ('start' is specified dynamically in the second operand of the operation).
repeated int64 dynamic_slice_sizes = 20;
// The padding configuration that describes the edge padding and interior
// padding of this pad instruction. Only set for pad instructions.
xla.PaddingConfig padding_config = 21;
// Outfeed configuration information, only present for kOutfeed.
bytes outfeed_config = 22;
// The distribution requested for random number generation.
// Only present for kRng.
xla.RandomDistribution distribution = 23;
// A small float number added to the variance to avoid divide-by-zero error.
// Only present for kBatchNormTraining.
float epsilon = 24;
// An integer value representing the index of the feature dimension.
// Only present for kBatchNormTraining.
int64 feature_index = 25;
// Represents a unique identifier for each Send/Recv instruction pair or
// optionally for collective instructions (AllReduce, CollectivePermute,
// AllToAll). Non-positive channel_id is equivalent to no channel id.
int64 channel_id = 26;
// The string representation of the infeed configuration.
bytes infeed_config = 27;
// Name of a external target (eg, global symbol) to call, only present for
// kCustomCall.
string custom_call_target = 28;
// Shape of outfeed request.
xla.ShapeProto outfeed_shape = 29;
// Describes the dimension numbers used for a dot operation
xla.DotDimensionNumbers dot_dimension_numbers = 30;
// FFT type (FFT, IFFT, etc).
xla.FftType fft_type = 31;
// FFT length.
repeated int64 fft_length = 32;
// Comparison direction only used for kCompare.
string comparison_direction = 63;
// Gather dimension numbers.
xla.GatherDimensionNumbers gather_dimension_numbers = 33;
repeated int64 gather_slice_sizes = 34;
// Compute Host.
string channel_name = 41;
int64 cost_estimate_ns = 42;
// The id of this instruction.
int64 id = 35;
repeated int64 operand_ids = 36;
repeated int64 control_predecessor_ids = 37;
repeated int64 called_computation_ids = 38;
xla.OpSharding sharding = 40;
// Backend configuration for the instruction. Has backend-specific meaning.
bytes backend_config = 43;
// Cross replica op fields.
repeated ReplicaGroup replica_groups = 49;
// Deprecated, but keeping it for backward compatibility. Use channel_id.
// Non-positive all_reduce_id is equivalent to no all_reduce_id.
int64 all_reduce_id = 45 [deprecated = true];
// If true, interprets ids in ReplicaGroup as global device ids, which is
// a linearized id of `replica_id * partition_count + partition_id`.
bool use_global_device_ids = 71;
// Whether this Send/Recv instruction transfers data to/from the host. Only
// present for Send and Recv instructions and their SendDone and RecvDone
// partners.
bool is_host_transfer = 47;
// Whether this Sort instruction should be stable.
bool is_stable = 60;
xla.ScatterDimensionNumbers scatter_dimension_numbers = 48;
// Precision configuration for the instruction. Has backend-specific meaning.
xla.PrecisionConfig precision_config = 51;
// Collective permute field.
repeated SourceTarget source_target_pairs = 52;
// Sharding for kDomain instructions.
xla.OpSharding domain_entry_sharding = 54;
xla.OpSharding domain_exit_sharding = 55;
// For custom call this indicates that the layouts are constrained. If
// constrain_layout is true then the 'shape' field must contain a layout, and
// 'operand_shapes_with_layout' must contain a shape with layout for each
// operand.
bool constrain_layout = 56;
repeated xla.ShapeProto operand_shapes_with_layout = 57;
// Options for TriangularSolve
xla.TriangularSolveOptions triangular_solve_options = 59;
// Options for Cholesky
xla.CholeskyOptions cholesky_options = 62;
// Describes how parameters behave with regards to replicas.
xla.ParameterReplication parameter_replication = 61;
// If set, the given instruction is run in parallel on e.g. multiple CPU
// cores. The outermost dimension gets split up into
// outer_dimension_partitions[0] pieces, the next-outermost dim gets split
// into outer_dimension_partitions[1] pieces, etc.
//
// It's illegal to partition a dimension into more shards than there are
// elements in that dimension.
repeated int64 outer_dimension_partitions = 64;
// Whether the kCustomCall instruction has side-effects, only present for
// kCustomCall.
bool custom_call_has_side_effect = 65;
// A list of CustomCallOutputOperandAliasing pairs that specifies aliasing
// buffers between output and operands for kCustomCall.
repeated xla.CustomCallOutputOperandAliasing
custom_call_output_operand_aliasing = 74;
// Specifies the desired schedule for the custom-call. The field is only
// present for custom-call.
CustomCallSchedule custom_call_schedule = 76;
// The delta value for kRngGetAndUpdateState.
int64 delta = 66;
// Specifies if the gather/scatter indices are guaranteed to be sorted by the
// caller.
bool indices_are_sorted = 67;
// Frontend attributes to pass to the XLA backend.
xla.FrontendAttributes frontend_attributes = 68;
// Specifies if all elements updated are guaranteed to be unique by
// the caller.
bool unique_indices = 69;
// RNG algorithm used by kRngBitGenerator.
xla.RandomAlgorithm rng_algorithm = 70;
// The comparison type used for kCompare.
string comparison_type = 72;
// Specifies if this is a cross-program-prefetch, used by kCopyStart.
bool is_cross_program_prefetch = 73;
// If a convolution is dynamic, a dynamic padding type will be specified.
xla.PaddingType padding_type = 75;
}
// Serialization of HloComputation.
message HloComputationProto {
reserved 3;
reserved "root_name";
string name = 1;
// The array of instructions is always in a valid dependency order, where
// operands appear before their users.
repeated HloInstructionProto instructions = 2;
// The program shape (with layout) of this computation.
xla.ProgramShapeProto program_shape = 4;
// The id of this computation.
int64 id = 5;
// The id of the root of the computation.
int64 root_id = 6;
}
// Serialization of an HLO schedule. An HLO schedule contains a total order of
// instructions for each non-fusion computation in the module.
message HloScheduleProto {
message InstructionSequence {
repeated int64 instruction_ids = 1;
}
// Map from computation id to sequence.
map<int64, InstructionSequence> sequences = 1;
}
enum Kind {
// Define a UNDEFINED_ALIAS equal to zero to get around the default-0 proto3
// behavior and missing has_*() APIs.
UNDEFINED_ALIAS = 0;
// The buffers may or may not alias at runtime.
MAY_ALIAS = 1;
// The buffers must alias at runtime.
MUST_ALIAS = 2;
}
message HloInputOutputAliasProto {
// The following proto describes a pair of aliased an input
// (described by parameter number and a ShapeIndex of the parameter)
// and an output (described by a ShapeIndex of the root
// instruction). For example:
//
// entry = {
// output_shape_index={1},
// parameter_number=0,
// parameter_shape_index={1, 2},
// }
//
// This entry indicates that the first paremter's {1, 2} element is
// aliased with the {1} element of the root instruction.
message AliasEntryProto {
// ShapeIndex of the root hlo.
repeated int64 output_shape_index = 1;
// Number of the parameter in entry computation.
int64 parameter_number = 2;
// ShapeIndex of the parameter instruction.
repeated int64 parameter_shape_index = 3;
// The kind of alias to be setup.
Kind kind = 4;
}
repeated AliasEntryProto entries = 1;
}
message DynamicParameterBindingProto {
// A list of bindings which indicates that the `target_dim_num` in
// the subshape `target_param_index` of parameter `target_param_num`
// is a dynamic dimension and its real dynamic size is represented
// by `dynamic_param_index` in parameter `dynamic_param_num`.
//
// As an example, imagine we have a program:
//
// ENTRY main {
// a = f32[] parameter(0)
// b = f32[10] parameter(1)
// ROOT root = (f32[], f32[10]) tuple(%a, %b)
// }
//
// Let's say 'b' (param index 1) is a dynamic shape whose input has
// an upperbound of 10 and real size is determined at runtime.'a'
// represents the real size of b's first dimension.
//
// In this case, the fields are set in the following way:
// dynamic_param_num = 1
// dynamic_param_index = {}
// target_param_num = 0
// target_param_index = {}
// target_param_dim = 0
message Binding {
int64 dynamic_param_num = 1;
repeated int64 dynamic_param_index = 2;
int64 target_param_num = 3;
repeated int64 target_param_index = 4;
int64 target_param_dim_num = 5;
}
repeated Binding entries = 1;
}
message CrossProgramPrefetch {
int64 parameter = 1;
repeated int64 index = 2;
}
// Serialization of HloModule.
message HloModuleProto {
string name = 1;
string entry_computation_name = 2;
int64 entry_computation_id = 6;
// The array of computations is always in a valid dependency order, where
// callees appear before their callers.
repeated HloComputationProto computations = 3;
// The host program shape (with layout) of the entry computation.
xla.ProgramShapeProto host_program_shape = 4;
// The id of this module.
int64 id = 5;
// The schedule for this module.
HloScheduleProto schedule = 7;
// Describes alias information between inputs and outputs.
HloInputOutputAliasProto input_output_alias = 8;
DynamicParameterBindingProto dynamic_parameter_binding = 9;
repeated CrossProgramPrefetch cross_program_prefetches = 10;
// True if the module contains dynamic computation.
bool is_dynamic = 11;
}
// Serialization of LogicalBuffer.
message LogicalBufferProto {
// Location represents an instruction and its shape index, which uniquely
// identifies a point where a buffer is needed.
message Location {
// NOTE: module_name isn't necessary, since all LogicalBuffers are
// associated with a single HloModule.
string computation_name = 1;
string instruction_name = 2;
repeated int64 shape_index = 3;
}
int64 id = 1;
int64 size = 2;
// The location where the buffer is defined.
Location defined_at = 3;
int64 color = 4;
}
// Serialization of BufferAllocation.
message BufferAllocationProto {
// Assigned represents a single LogicalBuffer that is assigned to this
// BufferAllocation.
message Assigned {
int64 logical_buffer_id = 1;
int64 offset = 2;
int64 size = 3;
}
int64 index = 1;
int64 size = 2;
bool is_thread_local = 3;
bool is_tuple = 11;
bool is_entry_computation_parameter = 5;
bool is_constant = 12;
int64 parameter_number = 6;
repeated int64 parameter_shape_index = 10;
bool maybe_live_out = 7;
int64 color = 8;
repeated Assigned assigned = 9;
}
// A trace of a HeapSimulator run.
message HeapSimulatorTrace {
// The trace includes a list of events, where each event describes one action
// performed by the heap simulator.
message Event {
enum Kind {
ALLOC = 0; // A memory region was allocated for the buffer.
FREE = 1; // A memory region was freed for the buffer.
// A buffer was shared with another (canonical) buffer. This is similar to
// ALLOC, except that instead of allocating a new region of memory, the
// memory region of the canonical buffer is directly re-used. Multiple
// buffers may share with the same canonical buffer. The lifetime of the
// canonical buffer is extended to the union of all lifetimes.
SHARE_WITH = 2;
}
Kind kind = 1;
// The id of the LogicalBuffer that the event applies to.
int64 buffer_id = 2;
// The HloInstruction that the simulation was processing that caused this
// event to occur, identified by its computation and instruction name. E.g.
// buffers defined by instruction A are allocated when processing A.
string computation_name = 3;
string instruction_name = 4;
// The id of the canonical LogicalBuffer that the buffer shares with. Only
// set for SHARE_WITH events.
int64 share_with_canonical_id = 5;
}
repeated Event events = 1;
bool whole_module_simulation = 2;
int64 buffer_allocation_index = 3;
}
// An abstraction representing a set of HLO module built to run concurrently
// across different devices.
message HloModuleGroupProto {
string name = 1;
repeated HloModuleProto hlo_modules = 2;
}
// Serialization of BufferAssignment.
message BufferAssignmentProto {
// Alias represents a source LogicalBuffer, and the buffer location that
// aliases it.
message BufferAlias {
int64 source_buffer_id = 1;
LogicalBufferProto.Location location = 2;
}
repeated LogicalBufferProto logical_buffers = 1;
repeated BufferAlias buffer_aliases = 2;
repeated BufferAllocationProto buffer_allocations = 3;
repeated HeapSimulatorTrace heap_simulator_traces = 4;
}
// Grouping message that contains all of the information above.
message HloProto {
reserved 2;
reserved "hlo_ordering";
HloModuleProto hlo_module = 1;
BufferAssignmentProto buffer_assignment = 3;
}
// Encapsulates HloProto together with the arguments, result, and
// execution_platform. This message is used for purposes such as
// analysis/replay/file-storage.
message HloSnapshot {
// The hlo graph.
HloProto hlo = 1;
// The arguments passed to the graph.
repeated LiteralProto arguments = 2;
// The result of the graph.
LiteralProto result = 3;
// The name of the platform used to run the graph.
string execution_platform = 4;
}
// Metadata for an HLO module. Dumped after HLO passes and before LLO lowering
// with filename module_####.metadata.textproto, where #### is
// canonical_module_id.
message HloModuleMetadataProto {
// Uniquely identifies an HloModuleMetadata. Equal to the first unique_id
// of the module (a module may go through multiple unique_ids). If a module
// is partitioned into multiple modules, those modules will each have a new
// HloModuleMetadata with a different canonical_module_id.
int64 canonical_module_id = 1;
// Name of the module group that the module is part of.
string module_group_name = 2;
// The canonical module id of the module that this one is partitioned from,
// if applicable.
int64 original_module_id = 3;
// The canonical module ids of the modules that this one is partitioned into,
// if applicable.
repeated int64 partitioned_module_ids = 4;
// Metadata for the HLO passes that are run on the module.
repeated HloPassMetadata pass_metadata = 5;
}
// Metadata for one run of an HLO pass on a module. Provides more information
// when processing debug dumps of HloProtos about the order of HLO passes and
// various other stats like duration. `pass_id` may also be used to identify a
// particular run of a pass in debug info that propagates through stages of
// compilation.
message HloPassMetadata {
// For a given module, pass_id uniquely identifies a run of an HLO pass on
// that module. Note that a pass_id may not always refer to the same pass
// because the order of passes during compilation may change. For finding
// metadata for a particular pass, pass_name and pipeline_name would be more
// reliable, although note that they may not be unique.
int64 pass_id = 1;
string pass_name = 2;
string pipeline_name = 3;
// Filenames of the dumps of the module after this pass ran. Module may be
// dumped in multiple formats, and the order of formats in this field will
// stay consistent across passes.
repeated string dump_filenames = 4;
// Return value of pass.Run(). True if this pass changed the module, or, in
// the case where the module was run through this pass as part of a module
// group, true if this pass changed any module in the same module group.
bool module_changed = 5;
// The unique_id of the module that this pass is run on. May be different from
// the canonical_module_id of the HloModuleMetadata that this HloPassMetadata
// is inside.
int64 module_id = 6;
// If the module went through this pass as part of a module group, this is
// set as the ids of all the modules in the module group. Empty otherwise.
repeated int64 module_group_module_ids = 7;
// Timestamp before and after the pass is run. Note they may be equal.
int64 start_timestamp_usec = 8;
int64 end_timestamp_usec = 9;
}