/
gcs.fbs
376 lines (345 loc) · 11.5 KB
/
gcs.fbs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
enum Language:int {
PYTHON = 0,
CPP = 1,
JAVA = 2
}
// These indexes are mapped to strings in ray_redis_module.cc.
enum TablePrefix:int {
UNUSED = 0,
TASK,
RAYLET_TASK,
CLIENT,
OBJECT,
ACTOR,
FUNCTION,
TASK_RECONSTRUCTION,
HEARTBEAT,
HEARTBEAT_BATCH,
ERROR_INFO,
DRIVER,
PROFILE,
TASK_LEASE,
ACTOR_CHECKPOINT,
ACTOR_CHECKPOINT_ID,
NODE_RESOURCE,
}
// The channel that Add operations to the Table should be published on, if any.
enum TablePubsub:int {
NO_PUBLISH = 0,
TASK,
RAYLET_TASK,
CLIENT,
OBJECT,
ACTOR,
HEARTBEAT,
HEARTBEAT_BATCH,
ERROR_INFO,
TASK_LEASE,
DRIVER,
NODE_RESOURCE,
}
// Enum for the entry type in the ClientTable
enum EntryType:int {
INSERTION = 0,
DELETION,
RES_CREATEUPDATE,
RES_DELETE,
}
table Arg {
// Object ID for pass-by-reference arguments. Normally there is only one
// object ID in this list which represents the object that is being passed.
// However to support reducers in a MapReduce workload, we also support
// passing multiple object IDs for each argument.
// Note that this is a long string that concatenate all of the object IDs.
object_ids: string;
// Data for pass-by-value arguments.
data: string;
}
table TaskInfo {
// ID of the driver that created this task.
driver_id: string;
// Task ID of the task.
task_id: string;
// Task ID of the parent task.
parent_task_id: string;
// A count of the number of tasks submitted by the parent task before this one.
parent_counter: int;
// The ID of the actor to create if this is an actor creation task.
actor_creation_id: string;
// The dummy object ID of the actor creation task if this is an actor method.
actor_creation_dummy_object_id: string;
// The max number of times this actor should be recontructed.
// If this number of 0 or negative, the actor won't be reconstructed on failure.
max_actor_reconstructions: int;
// Actor ID of the task. This is the actor that this task is executed on
// or NIL_ACTOR_ID if the task is just a normal task.
actor_id: string;
// The ID of the handle that was used to submit the task. This should be
// unique across handles with the same actor_id.
actor_handle_id: string;
// Number of tasks that have been submitted to this actor so far.
actor_counter: int;
// If this is an actor task, then this will be populated with all of the new
// actor handles that were forked from this handle since the last task on
// this handle was submitted.
// Note that this is a long string that concatenate all of the new_actor_handle IDs.
new_actor_handles: string;
// Task arguments.
args: [Arg];
// Number of return objects.
num_returns: int;
// The required_resources vector indicates the quantities of the different
// resources required by this task.
required_resources: [ResourcePair];
// The resources required for placing this task on a node. If this is empty,
// then the placement resources are equal to the required_resources.
required_placement_resources: [ResourcePair];
// The language that this task belongs to.
language: Language;
// Function descriptor, which is a list of strings that can
// uniquely describe a function.
// For a Python function, it should be: [module_name, class_name, function_name]
// For a Java function, it should be: [class_name, method_name, type_descriptor]
function_descriptor: [string];
// The dynamic options used in the worker command when starting the worker process for
// an actor creation task. If the list isn't empty, the options will be used to replace
// the placeholder strings (`RAY_WORKER_OPTION_0`, `RAY_WORKER_OPTION_1`, etc) in the
// worker command.
dynamic_worker_options: [string];
}
table ResourcePair {
// The name of the resource.
key: string;
// The quantity of the resource.
value: double;
}
enum GcsChangeMode:int {
APPEND_OR_ADD = 0,
REMOVE,
}
table GcsEntry {
change_mode: GcsChangeMode;
id: string;
entries: [string];
}
table FunctionTableData {
language: Language;
name: string;
data: string;
}
table ObjectTableData {
// The size of the object.
object_size: long;
// The node manager ID that this object appeared on or was evicted by.
manager: string;
}
table TaskReconstructionData {
// The number of times this task has been reconstructed so far.
num_reconstructions: int;
// The node manager that is trying to reconstruct the task.
node_manager_id: string;
}
enum SchedulingState:int {
NONE = 0,
WAITING = 1,
SCHEDULED = 2,
QUEUED = 4,
RUNNING = 8,
DONE = 16,
LOST = 32,
RECONSTRUCTING = 64
}
table TaskTableData {
// The state of the task.
scheduling_state: SchedulingState;
// A raylet ID.
raylet_id: string;
// A string of bytes representing the task's TaskExecutionDependencies.
execution_dependencies: string;
// The number of times the task was spilled back by raylets.
spillback_count: long;
// A string of bytes representing the task specification.
task_info: string;
// TODO(pcm): This is at the moment duplicated in task_info, remove that one
updated: bool;
}
table TaskTableTestAndUpdate {
test_raylet_id: string;
test_state_bitmask: SchedulingState;
update_state: SchedulingState;
}
table ClassTableData {
}
enum ActorState:int {
// Actor is alive.
ALIVE = 0,
// Actor is dead, now being reconstructed.
// After reconstruction finishes, the state will become alive again.
RECONSTRUCTING = 1,
// Actor is already dead and won't be reconstructed.
DEAD = 2
}
table ActorTableData {
// The ID of the actor that was created.
actor_id: string;
// The dummy object ID returned by the actor creation task. If the actor
// dies, then this is the object that should be reconstructed for the actor
// to be recreated.
actor_creation_dummy_object_id: string;
// The ID of the driver that created the actor.
driver_id: string;
// The ID of the node manager that created the actor.
node_manager_id: string;
// Current state of this actor.
state: ActorState;
// Max number of times this actor should be reconstructed.
max_reconstructions: int;
// Remaining number of reconstructions.
remaining_reconstructions: int;
}
table ErrorTableData {
// The ID of the driver that the error is for.
driver_id: string;
// The type of the error.
type: string;
// The error message.
error_message: string;
// The timestamp of the error message.
timestamp: double;
}
table CustomSerializerData {
}
table ConfigTableData {
}
table ProfileEvent {
// The type of the event.
event_type: string;
// The start time of the event.
start_time: double;
// The end time of the event. If the event is a point event, then this should
// be the same as the start time.
end_time: double;
// Additional data associated with the event. This data must be serialized
// using JSON.
extra_data: string;
}
table ProfileTableData {
// The type of the component that generated the event, e.g., worker or
// object_manager, or node_manager.
component_type: string;
// An identifier for the component that generated the event.
component_id: string;
// An identifier for the node that generated the event.
node_ip_address: string;
// This is a batch of profiling events. We batch these together for
// performance reasons because a single task may generate many events, and
// we don't want each event to require a GCS command.
profile_events: [ProfileEvent];
}
table RayResource {
// The type of the resource.
resource_name: string;
// The total capacity of this resource type.
resource_capacity: double;
}
table ClientTableData {
// The client ID of the client that the message is about.
client_id: string;
// The IP address of the client's node manager.
node_manager_address: string;
// The IPC socket name of the client's raylet.
raylet_socket_name: string;
// The IPC socket name of the client's plasma store.
object_store_socket_name: string;
// The port at which the client's node manager is listening for TCP
// connections from other node managers.
node_manager_port: int;
// The port at which the client's object manager is listening for TCP
// connections from other object managers.
object_manager_port: int;
// Enum to store the entry type in the log
entry_type: EntryType = INSERTION;
resources_total_label: [string];
resources_total_capacity: [double];
}
table HeartbeatTableData {
// Node manager client id
client_id: string;
// Resource capacity currently available on this node manager.
resources_available_label: [string];
resources_available_capacity: [double];
// Total resource capacity configured for this node manager.
resources_total_label: [string];
resources_total_capacity: [double];
// Aggregate outstanding resource load on this node manager.
resource_load_label: [string];
resource_load_capacity: [double];
}
table HeartbeatBatchTableData {
batch: [HeartbeatTableData];
}
// Data for a lease on task execution.
table TaskLeaseData {
// Node manager client ID.
node_manager_id: string;
// The time that the lease was last acquired at. NOTE(swang): This is the
// system clock time according to the node that added the entry and is not
// synchronized with other nodes.
acquired_at: long;
// The period that the lease is active for.
timeout: long;
}
table DriverTableData {
// The driver ID.
driver_id: string;
// Whether it's dead.
is_dead: bool;
}
// This table stores the actor checkpoint data. An actor checkpoint
// is the snapshot of an actor's state in the actor registration.
// See `actor_registration.h` for more detailed explanation of these fields.
table ActorCheckpointData {
// ID of this actor.
actor_id: string;
// The dummy object ID of actor's most recently executed task.
execution_dependency: string;
// A list of IDs of this actor's handles.
handle_ids: [string];
// The task counters of the above handles.
task_counters: [long];
// The frontier dependencies of the above handles.
frontier_dependencies: [string];
// A list of unreleased dummy objects from this actor.
unreleased_dummy_objects: [string];
// The numbers of dependencies for the above unreleased dummy objects.
num_dummy_object_dependencies: [int];
}
// This table stores the actor-to-available-checkpoint-ids mapping.
table ActorCheckpointIdData {
// ID of this actor.
actor_id: string;
// IDs of this actor's available checkpoints.
// Note, this is a long string that concatenates all the IDs.
checkpoint_ids: string;
// A list of the timestamps for each of the above `checkpoint_ids`.
timestamps: [long];
}
// This enum type is used as object's metadata to indicate the object's creating
// task has failed because of a certain error.
// TODO(hchen): We may want to make these errors more specific. E.g., we may want
// to distinguish between intentional and expected actor failures, and between
// worker process failure and node failure.
enum ErrorType:int {
// Indicates that a task failed because the worker died unexpectedly while executing it.
WORKER_DIED = 1,
// Indicates that a task failed because the actor died unexpectedly before finishing it.
ACTOR_DIED = 2,
// Indicates that an object is lost and cannot be reconstructed.
// Note, this currently only happens to actor objects. When the actor's state is already
// after the object's creating task, the actor cannot re-run the task.
// TODO(hchen): we may want to reuse this error type for more cases. E.g.,
// 1) A object that was put by the driver.
// 2) The object's creating task is already cleaned up from GCS (this currently
// crashes raylet).
OBJECT_UNRECONSTRUCTABLE = 3,
}