-
Notifications
You must be signed in to change notification settings - Fork 524
/
hummock.proto
601 lines (502 loc) · 15.7 KB
/
hummock.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
syntax = "proto3";
package hummock;
import "catalog.proto";
import "common.proto";
import "compactor.proto";
option java_package = "com.risingwave.proto";
option optimize_for = SPEED;
message SstableInfo {
uint64 id = 1;
KeyRange key_range = 2;
uint64 file_size = 3;
repeated uint32 table_ids = 4;
uint64 meta_offset = 5;
uint64 stale_key_count = 6;
uint64 total_key_count = 7;
// When a SST is divided, its divide_version will increase one.
uint64 divide_version = 8;
uint64 min_epoch = 9;
uint64 max_epoch = 10;
}
enum LevelType {
UNSPECIFIED = 0;
NONOVERLAPPING = 1;
OVERLAPPING = 2;
}
message OverlappingLevel {
repeated Level sub_levels = 1;
uint64 total_file_size = 2;
}
message Level {
uint32 level_idx = 1;
LevelType level_type = 2;
repeated SstableInfo table_infos = 3;
uint64 total_file_size = 4;
uint64 sub_level_id = 5;
}
message InputLevel {
uint32 level_idx = 1;
LevelType level_type = 2;
repeated SstableInfo table_infos = 3;
}
message IntraLevelDelta {
uint32 level_idx = 1;
uint64 l0_sub_level_id = 2;
repeated uint64 removed_table_ids = 3;
repeated SstableInfo inserted_table_infos = 4;
}
message GroupConstruct {
CompactionConfig group_config = 1;
// If `parent_group_id` is not 0, it means `parent_group_id` splits into `parent_group_id` and this group, so this group is not empty initially.
uint64 parent_group_id = 2;
repeated uint32 table_ids = 3;
uint64 group_id = 4;
}
message GroupMetaChange {
repeated uint32 table_ids_add = 1;
repeated uint32 table_ids_remove = 2;
}
message GroupDestroy {}
message GroupDelta {
oneof delta_type {
IntraLevelDelta intra_level = 1;
GroupConstruct group_construct = 2;
GroupDestroy group_destroy = 3;
GroupMetaChange group_meta_change = 4;
}
}
message UncommittedEpoch {
uint64 epoch = 1;
repeated SstableInfo tables = 2;
}
message HummockVersion {
message Levels {
repeated Level levels = 1;
OverlappingLevel l0 = 2;
uint64 group_id = 3;
uint64 parent_group_id = 4;
repeated uint32 member_table_ids = 5;
}
uint64 id = 1;
// Levels of each compaction group
map<uint64, Levels> levels = 2;
uint64 max_committed_epoch = 3;
// Snapshots with epoch less than the safe epoch have been GCed.
// Reads against such an epoch will fail.
uint64 safe_epoch = 4;
}
message HummockVersionDelta {
message GroupDeltas {
repeated GroupDelta group_deltas = 1;
}
uint64 id = 1;
uint64 prev_id = 2;
// Levels of each compaction group
map<uint64, GroupDeltas> group_deltas = 3;
uint64 max_committed_epoch = 4;
// Snapshots with epoch less than the safe epoch have been GCed.
// Reads against such an epoch will fail.
uint64 safe_epoch = 5;
bool trivial_move = 6;
repeated uint64 gc_sst_ids = 7;
}
message HummockVersionDeltas {
repeated HummockVersionDelta version_deltas = 1;
}
// We will have two epoch after decouple
message HummockSnapshot {
// Epoch with checkpoint, we will read durable data with it.
uint64 committed_epoch = 1;
// Epoch without checkpoint, we will read real-time data with it. But it may be rolled back.
uint64 current_epoch = 2;
}
message VersionUpdatePayload {
oneof payload {
HummockVersionDeltas version_deltas = 1;
HummockVersion pinned_version = 2;
}
}
message UnpinVersionBeforeRequest {
uint32 context_id = 1;
uint64 unpin_version_before = 2;
}
message UnpinVersionBeforeResponse {
common.Status status = 1;
}
message GetCurrentVersionRequest {}
message GetCurrentVersionResponse {
common.Status status = 1;
HummockVersion current_version = 2;
}
message UnpinVersionRequest {
uint32 context_id = 1;
}
message UnpinVersionResponse {
common.Status status = 1;
}
message PinSnapshotRequest {
uint32 context_id = 1;
}
message PinSpecificSnapshotRequest {
uint32 context_id = 1;
uint64 epoch = 2;
}
message GetAssignedCompactTaskNumRequest {}
message GetAssignedCompactTaskNumResponse {
uint32 num_tasks = 1;
}
message PinSnapshotResponse {
common.Status status = 1;
HummockSnapshot snapshot = 2;
}
message GetEpochRequest {}
message GetEpochResponse {
common.Status status = 1;
HummockSnapshot snapshot = 2;
}
message UnpinSnapshotRequest {
uint32 context_id = 1;
}
message UnpinSnapshotResponse {
common.Status status = 1;
}
message UnpinSnapshotBeforeRequest {
uint32 context_id = 1;
HummockSnapshot min_snapshot = 3;
}
message UnpinSnapshotBeforeResponse {
common.Status status = 1;
}
// When `right_exclusive=false`, it represents [left, right], of which both boundary are open. When `right_exclusive=true`,
// it represents [left, right), of which right is exclusive.
message KeyRange {
bytes left = 1;
bytes right = 2;
bool right_exclusive = 3;
}
message TableOption {
uint32 retention_seconds = 1;
}
message CompactTask {
enum TaskStatus {
UNSPECIFIED = 0;
PENDING = 1;
SUCCESS = 2;
HEARTBEAT_CANCELED = 3;
NO_AVAIL_CANCELED = 4;
ASSIGN_FAIL_CANCELED = 5;
SEND_FAIL_CANCELED = 6;
MANUAL_CANCELED = 7;
INVALID_GROUP_CANCELED = 8;
EXECUTE_FAILED = 9;
JOIN_HANDLE_FAILED = 10;
TRACK_SST_ID_FAILED = 11;
}
// SSTs to be compacted, which will be removed from LSM after compaction
repeated InputLevel input_ssts = 1;
// In ideal case, the compaction will generate `splits.len()` tables which have key range
// corresponding to that in [`splits`], respectively
repeated KeyRange splits = 2;
// low watermark in 'ts-aware compaction'
uint64 watermark = 3;
// compaction output, which will be added to [`target_level`] of LSM after compaction
repeated SstableInfo sorted_output_ssts = 4;
// task id assigned by hummock storage service
uint64 task_id = 5;
// compaction output will be added to [`target_level`] of LSM after compaction
uint32 target_level = 6;
bool gc_delete_keys = 7;
TaskStatus task_status = 9;
// compaction group the task belongs to
uint64 compaction_group_id = 12;
// existing_table_ids for compaction drop key
repeated uint32 existing_table_ids = 13;
uint32 compression_algorithm = 14;
uint64 target_file_size = 15;
uint32 compaction_filter_mask = 16;
map<uint32, TableOption> table_options = 17;
uint64 current_epoch_time = 18;
uint64 target_sub_level_id = 19;
enum TaskType {
TYPE_UNSPECIFIED = 0;
DYNAMIC = 1;
SPACE_RECLAIM = 2;
MANUAL = 3;
SHARED_BUFFER = 4;
TTL = 5;
}
// Identifies whether the task is space_reclaim, if the compact_task_type increases, it will be refactored to enum
TaskType task_type = 20;
}
message LevelHandler {
message RunningCompactTask {
uint64 task_id = 1;
repeated uint64 ssts = 2;
uint64 total_file_size = 3;
uint32 target_level = 4;
}
uint32 level = 1;
repeated RunningCompactTask tasks = 3;
}
message CompactStatus {
uint64 compaction_group_id = 1;
repeated LevelHandler level_handlers = 2;
}
// Config info of compaction group.
message CompactionGroup {
uint64 id = 1;
CompactionConfig compaction_config = 4;
}
// Complete info of compaction group.
// The info is the aggregate of `HummockVersion` and `CompactionGroupConfig`
message CompactionGroupInfo {
uint64 id = 1;
uint64 parent_id = 2;
repeated uint32 member_table_ids = 3;
CompactionConfig compaction_config = 4;
}
message CompactTaskAssignment {
CompactTask compact_task = 1;
uint32 context_id = 2;
}
message GetCompactionTasksRequest {}
message GetCompactionTasksResponse {
common.Status status = 1;
CompactTask compact_task = 2;
}
message ReportCompactionTasksRequest {
uint32 context_id = 1;
CompactTask compact_task = 2;
map<uint32, TableStats> table_stats_change = 3;
}
message ReportCompactionTasksResponse {
common.Status status = 1;
}
message HummockPinnedVersion {
uint32 context_id = 1;
uint64 min_pinned_id = 2;
}
message HummockPinnedSnapshot {
uint32 context_id = 1;
uint64 minimal_pinned_snapshot = 2;
}
message GetNewSstIdsRequest {
uint32 number = 1;
}
message GetNewSstIdsResponse {
common.Status status = 1;
// inclusive
uint64 start_id = 2;
// exclusive
uint64 end_id = 3;
}
// This is a heartbeat message. Task will be considered dead if
// `CompactTaskProgress` is not received for a timeout
// or `num_ssts_sealed`/`num_ssts_uploaded` do not increase for a timeout.
message CompactTaskProgress {
uint64 task_id = 1;
uint32 num_ssts_sealed = 2;
uint32 num_ssts_uploaded = 3;
}
message ReportCompactionTaskProgressRequest {
uint32 context_id = 1;
repeated CompactTaskProgress progress = 2;
}
message ReportCompactionTaskProgressResponse {
common.Status status = 1;
}
message SubscribeCompactTasksRequest {
uint32 context_id = 1;
uint64 max_concurrent_task_number = 2;
}
message ValidationTask {
repeated SstableInfo sst_infos = 1;
map<uint64, uint32> sst_id_to_worker_id = 2;
uint64 epoch = 3;
}
message SubscribeCompactTasksResponse {
oneof task {
CompactTask compact_task = 1;
VacuumTask vacuum_task = 2;
FullScanTask full_scan_task = 3;
ValidationTask validation_task = 4;
CancelCompactTask cancel_compact_task = 5;
}
}
// Delete SSTs in object store
message VacuumTask {
repeated uint64 sstable_ids = 1;
}
// Scan object store to get candidate orphan SSTs.
message FullScanTask {
uint64 sst_retention_time_sec = 1;
}
// Cancel compact task
message CancelCompactTask {
uint32 context_id = 1;
uint64 task_id = 2;
}
message ReportVacuumTaskRequest {
VacuumTask vacuum_task = 1;
}
message ReportVacuumTaskResponse {
common.Status status = 1;
}
message TriggerManualCompactionRequest {
uint64 compaction_group_id = 1;
KeyRange key_range = 2;
uint32 table_id = 3;
uint32 level = 4;
repeated uint64 sst_ids = 5;
}
message TriggerManualCompactionResponse {
common.Status status = 1;
}
message ReportFullScanTaskRequest {
repeated uint64 sst_ids = 1;
}
message ReportFullScanTaskResponse {
common.Status status = 1;
}
message TriggerFullGCRequest {
uint64 sst_retention_time_sec = 1;
}
message TriggerFullGCResponse {
common.Status status = 1;
}
message ListVersionDeltasRequest {
uint64 start_id = 1;
uint32 num_limit = 2;
uint64 committed_epoch_limit = 3;
}
message ListVersionDeltasResponse {
HummockVersionDeltas version_deltas = 1;
}
message PinnedVersionsSummary {
repeated HummockPinnedVersion pinned_versions = 1;
map<uint32, common.WorkerNode> workers = 2;
}
message PinnedSnapshotsSummary {
repeated HummockPinnedSnapshot pinned_snapshots = 1;
map<uint32, common.WorkerNode> workers = 2;
}
message RiseCtlGetPinnedVersionsSummaryRequest {}
message RiseCtlGetPinnedVersionsSummaryResponse {
PinnedVersionsSummary summary = 1;
}
message RiseCtlGetPinnedSnapshotsSummaryRequest {}
message RiseCtlGetPinnedSnapshotsSummaryResponse {
PinnedSnapshotsSummary summary = 1;
}
message InitMetadataForReplayRequest {
repeated catalog.Table tables = 1;
repeated CompactionGroupInfo compaction_groups = 2;
}
message InitMetadataForReplayResponse {}
message ReplayVersionDeltaRequest {
HummockVersionDelta version_delta = 1;
}
message ReplayVersionDeltaResponse {
HummockVersion version = 1;
repeated uint64 modified_compaction_groups = 2;
}
message TriggerCompactionDeterministicRequest {
uint64 version_id = 1;
repeated uint64 compaction_groups = 2;
}
message TriggerCompactionDeterministicResponse {}
message DisableCommitEpochRequest {}
message DisableCommitEpochResponse {
HummockVersion current_version = 1;
}
message RiseCtlListCompactionGroupRequest {}
message RiseCtlListCompactionGroupResponse {
common.Status status = 1;
repeated CompactionGroupInfo compaction_groups = 2;
}
message RiseCtlUpdateCompactionConfigRequest {
message MutableConfig {
oneof mutable_config {
uint64 max_bytes_for_level_base = 1;
uint64 max_bytes_for_level_multiplier = 2;
uint64 max_compaction_bytes = 3;
uint64 sub_level_max_compaction_bytes = 4;
uint64 level0_tier_compact_file_number = 6;
uint64 target_file_size_base = 7;
uint32 compaction_filter_mask = 8;
uint32 max_sub_compaction = 9;
}
}
repeated uint64 compaction_group_ids = 1;
repeated MutableConfig configs = 2;
}
message RiseCtlUpdateCompactionConfigResponse {
common.Status status = 1;
}
message SetCompactorRuntimeConfigRequest {
uint32 context_id = 1;
compactor.CompactorRuntimeConfig config = 2;
}
message SetCompactorRuntimeConfigResponse {}
message PinVersionRequest {
uint32 context_id = 1;
}
message PinVersionResponse {
HummockVersion pinned_version = 1;
}
service HummockManagerService {
rpc UnpinVersionBefore(UnpinVersionBeforeRequest) returns (UnpinVersionBeforeResponse);
rpc GetCurrentVersion(GetCurrentVersionRequest) returns (GetCurrentVersionResponse);
rpc ListVersionDeltas(ListVersionDeltasRequest) returns (ListVersionDeltasResponse);
rpc ReplayVersionDelta(ReplayVersionDeltaRequest) returns (ReplayVersionDeltaResponse);
rpc GetAssignedCompactTaskNum(GetAssignedCompactTaskNumRequest) returns (GetAssignedCompactTaskNumResponse);
rpc TriggerCompactionDeterministic(TriggerCompactionDeterministicRequest) returns (TriggerCompactionDeterministicResponse);
rpc DisableCommitEpoch(DisableCommitEpochRequest) returns (DisableCommitEpochResponse);
rpc ReportCompactionTasks(ReportCompactionTasksRequest) returns (ReportCompactionTasksResponse);
rpc ReportCompactionTaskProgress(ReportCompactionTaskProgressRequest) returns (ReportCompactionTaskProgressResponse);
rpc PinSnapshot(PinSnapshotRequest) returns (PinSnapshotResponse);
rpc PinSpecificSnapshot(PinSpecificSnapshotRequest) returns (PinSnapshotResponse);
rpc GetEpoch(GetEpochRequest) returns (GetEpochResponse);
rpc UnpinSnapshot(UnpinSnapshotRequest) returns (UnpinSnapshotResponse);
rpc UnpinSnapshotBefore(UnpinSnapshotBeforeRequest) returns (UnpinSnapshotBeforeResponse);
rpc GetNewSstIds(GetNewSstIdsRequest) returns (GetNewSstIdsResponse);
rpc SubscribeCompactTasks(SubscribeCompactTasksRequest) returns (stream SubscribeCompactTasksResponse);
rpc ReportVacuumTask(ReportVacuumTaskRequest) returns (ReportVacuumTaskResponse);
rpc TriggerManualCompaction(TriggerManualCompactionRequest) returns (TriggerManualCompactionResponse);
rpc ReportFullScanTask(ReportFullScanTaskRequest) returns (ReportFullScanTaskResponse);
rpc TriggerFullGC(TriggerFullGCRequest) returns (TriggerFullGCResponse);
rpc RiseCtlGetPinnedVersionsSummary(RiseCtlGetPinnedVersionsSummaryRequest) returns (RiseCtlGetPinnedVersionsSummaryResponse);
rpc RiseCtlGetPinnedSnapshotsSummary(RiseCtlGetPinnedSnapshotsSummaryRequest) returns (RiseCtlGetPinnedSnapshotsSummaryResponse);
rpc RiseCtlListCompactionGroup(RiseCtlListCompactionGroupRequest) returns (RiseCtlListCompactionGroupResponse);
rpc RiseCtlUpdateCompactionConfig(RiseCtlUpdateCompactionConfigRequest) returns (RiseCtlUpdateCompactionConfigResponse);
rpc InitMetadataForReplay(InitMetadataForReplayRequest) returns (InitMetadataForReplayResponse);
rpc SetCompactorRuntimeConfig(SetCompactorRuntimeConfigRequest) returns (SetCompactorRuntimeConfigResponse);
rpc PinVersion(PinVersionRequest) returns (PinVersionResponse);
}
message CompactionConfig {
enum CompactionMode {
UNSPECIFIED = 0;
RANGE = 1;
}
uint64 max_bytes_for_level_base = 1;
uint64 max_level = 2;
uint64 max_bytes_for_level_multiplier = 3;
uint64 max_compaction_bytes = 4;
uint64 sub_level_max_compaction_bytes = 5;
uint64 level0_tier_compact_file_number = 7;
CompactionMode compaction_mode = 8;
repeated string compression_algorithm = 9;
uint64 target_file_size_base = 10;
uint32 compaction_filter_mask = 11;
uint32 max_sub_compaction = 12;
uint64 max_space_reclaim_bytes = 13;
}
message TableStats {
int64 total_key_size = 1;
int64 total_value_size = 2;
int64 total_key_count = 3;
}
message HummockVersionStats {
uint64 hummock_version_id = 1;
map<uint32, TableStats> table_stats = 2;
}