/
pfs.proto
539 lines (455 loc) · 15.6 KB
/
pfs.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
syntax = "proto3";
package pfs;
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";
import "gogoproto/gogo.proto";
import "client/auth/auth.proto";
//// PFS Data structures (stored in etcd)
message Repo {
string name = 1;
}
message Branch {
Repo repo = 1;
string name = 2;
}
message BranchInfo {
Branch branch = 4;
Commit head = 2;
repeated Branch provenance = 3;
repeated Branch subvenance = 5;
repeated Branch direct_provenance = 6;
// Deprecated field left for backward compatibility.
string name = 1;
}
message BranchInfos {
repeated BranchInfo branch_info = 1;
}
message File {
Commit commit = 1;
string path = 2;
}
message Block {
string hash = 1;
}
message Object {
string hash = 1;
}
message Tag {
string name = 1;
}
// RepoInfo is the main data structure representing a Repo in etcd
message RepoInfo {
reserved 4;
Repo repo = 1;
google.protobuf.Timestamp created = 2;
uint64 size_bytes = 3;
string description = 5;
repeated Branch branches = 7;
// Set by ListRepo and InspectRepo if Pachyderm's auth system is active, but
// not stored in etcd. To set a user's auth scope for a repo, use the
// Pachyderm Auth API (in src/client/auth/auth.proto)
RepoAuthInfo auth_info = 6;
}
// RepoAuthInfo includes the caller's access scope for a repo, and is returned
// by ListRepo and InspectRepo but not persisted in etcd. It's used by the
// Pachyderm dashboard to render repo access appropriately. To set a user's auth
// scope for a repo, use the Pachyderm Auth API (in src/client/auth/auth.proto)
message RepoAuthInfo {
// The callers access level to the relevant repo (e.g. may be OWNER even if
// the user isn't an OWNER of the repo, if they're an admin for the cluster)
auth.Scope access_level = 1;
}
// Commit is a reference to a commit (e.g. the collection of branches and the
// collection of currently-open commits in etcd are collections of Commit
// protos)
message Commit {
Repo repo = 1;
string id = 2 [(gogoproto.customname) = "ID"];
}
// CommitRange represents chain of commits with Lower being an ancestor of
// Upper or, in the case of a range of size 1, the same commit.
message CommitRange {
Commit lower = 1;
Commit upper = 2;
}
// CommitInfo is the main data structure representing a commit in etcd
message CommitInfo {
Commit commit = 1;
// description is a user-provided script describing this commit
string description = 8;
Commit parent_commit = 2;
repeated Commit child_commits = 11;
google.protobuf.Timestamp started = 3;
google.protobuf.Timestamp finished = 4;
uint64 size_bytes = 5;
// Commits on which this commit is provenant. provenance[i] is a commit in
// branch_provenance[i] (a branch name, and one of the branches on which this
// commit's branch is provenant)
repeated Commit provenance = 6;
repeated Branch branch_provenance = 10;
// ReadyProvenance is the number of provenant commits which have been
// finished, if ReadyProvenance == len(Provenance) then the commit is ready
// to be processed by pps.
int64 ready_provenance = 12;
repeated CommitRange subvenance = 9;
// this is the block that stores the serialized form of a tree that
// represents the entire file system hierarchy of the repo at this commit
// If this is nil, then the commit is either open (in which case 'finished'
// will also be nil) or is the output commit of a failed job (in which case
// 'finished' will have a value -- the end time of the job)
Object tree = 7;
}
enum FileType {
RESERVED = 0;
FILE = 1;
DIR = 2;
}
message FileInfo {
File file = 1;
FileType file_type = 2;
uint64 size_bytes = 3;
// the base names (i.e. just the filenames, not the full paths) of
// the children
repeated string children = 6;
repeated Object objects = 8;
bytes hash = 7;
}
message ByteRange {
uint64 lower = 1;
uint64 upper = 2;
}
message BlockRef {
Block block = 1;
ByteRange range = 2;
}
message ObjectInfo {
Object object = 1;
BlockRef block_ref = 2;
}
// PFS API
message CreateRepoRequest {
reserved 2;
Repo repo = 1;
string description = 3;
bool update = 4;
}
message InspectRepoRequest {
Repo repo = 1;
}
message ListRepoRequest {
reserved 1;
}
message ListRepoResponse {
repeated RepoInfo repo_info = 1;
}
message DeleteRepoRequest {
Repo repo = 1;
bool force = 2;
bool all = 3;
}
// CommitState describes the states a commit can be in.
// The states are increasingly specific, i.e. a commit that is FINISHED also counts as STARTED.
enum CommitState {
STARTED = 0; // The commit has been started, all commits satisfy this state.
READY = 1; // The commit has been started, and all of its provenant commits have been finished.
FINISHED = 2; // The commit has been finished.
}
message StartCommitRequest {
// Parent.ID may be empty in which case the commit that Branch points to will be used as the parent.
// If branch is empty, or if branch does not exist, the commit will have no parent.
Commit parent = 1;
// description is a user-provided string describing this commit
string description = 4;
string branch = 3;
repeated Commit provenance = 2;
}
message BuildCommitRequest {
Commit parent = 1;
string branch = 4;
repeated Commit provenance = 2;
Object tree = 3;
// ID sets the ID of the created commit.
string ID = 5;
}
message FinishCommitRequest {
Commit commit = 1;
// description is a user-provided string describing this commit. Setting this
// will overwrite the description set in StartCommit
string description = 2;
Object tree = 3;
// If set, 'commit' will be closed (its 'finished' field will be set to the
// current time) but its 'tree' will be left nil.
bool empty = 4;
}
message InspectCommitRequest {
Commit commit = 1;
// BlockState causes inspect commit to block until the commit is in the desired state.
CommitState block_state = 2;
}
message ListCommitRequest {
Repo repo = 1;
Commit from = 2;
Commit to = 3;
uint64 number = 4;
}
message CommitInfos {
repeated CommitInfo commit_info = 1;
}
message CreateBranchRequest {
Commit head = 1;
// s_branch matches the field number and type of SetBranchRequest.Branch in
// Pachyderm 1.6--so that operations (generated by pachyderm 1.6's
// Admin.Export) can be deserialized by pachyderm 1.7 correctly
string s_branch = 2;
Branch branch = 3;
repeated Branch provenance = 4;
}
message InspectBranchRequest {
Branch branch = 1;
}
message ListBranchRequest {
Repo repo = 1;
}
message DeleteBranchRequest {
Branch branch = 1;
bool force = 2;
}
message DeleteCommitRequest {
Commit commit = 1;
}
message FlushCommitRequest {
repeated Commit commits = 1;
repeated Repo to_repos = 2;
}
message SubscribeCommitRequest {
Repo repo = 1;
string branch = 2;
// only commits created since this commit are returned
Commit from = 3;
// Don't return commits until they're in (at least) the desired state.
CommitState state = 4;
}
message GetFileRequest {
File file = 1;
int64 offset_bytes = 2;
int64 size_bytes = 3;
}
enum Delimiter {
NONE = 0;
JSON = 1;
LINE = 2;
}
// An OverwriteIndex specifies the index of objects from which new writes
// are applied to. Existing objects starting from the index are deleted.
// We want a separate message for ObjectIndex because we want to be able to
// distinguish between a zero index and a non-existent index.
message OverwriteIndex {
int64 index = 1;
}
message PutFileRequest {
reserved 2;
File file = 1;
bytes value = 3;
string url = 5;
// applies only to URLs that can be recursively walked, for example s3:// URLs
bool recursive = 6;
// Delimiter causes data to be broken up into separate files with File.Path
// as a prefix.
Delimiter delimiter = 7;
// TargetFileDatums specifies the target number of datums in each written
// file it may be lower if data does not split evenly, but will never be
// higher, unless the value is 0.
int64 target_file_datums = 8;
// TargetFileBytes specifies the target number of bytes in each written
// file, files may have more or fewer bytes than the target.
int64 target_file_bytes = 9;
// overwrite_index is the object index where the write starts from. All
// existing objects starting from the index are deleted.
OverwriteIndex overwrite_index = 10;
}
// PutFileRecord is used to record PutFile requests in etcd temporarily.
message PutFileRecord {
int64 size_bytes = 1;
string object_hash = 2;
OverwriteIndex overwrite_index = 3;
}
message PutFileRecords {
bool split = 1;
repeated PutFileRecord records = 2;
bool tombstone = 3;
}
message CopyFileRequest {
File src = 1;
File dst = 2;
bool overwrite = 3;
}
message InspectFileRequest {
File file = 1;
}
message ListFileRequest {
// File is the parent directory of the files we want to list. This fixes the
// repo, the commit/branch, and path prefix of files we're interested it
File file = 1;
// Full indicates whether the result should include file contents, which may
// be large (i.e. the list of children for directories, and the list of object
// references for regular files)
bool full = 2;
}
message GlobFileRequest {
Commit commit = 1;
string pattern = 2;
}
// FileInfos is the result of both ListFile and GlobFile
message FileInfos {
repeated FileInfo file_info = 1;
}
message DiffFileRequest {
File new_file = 1;
// OldFile may be left nil in which case the same path in the parent of
// NewFile's commit will be used.
File old_file = 2;
bool shallow = 3;
}
message DiffFileResponse {
repeated FileInfo new_files = 1;
repeated FileInfo old_files = 2;
}
message DeleteFileRequest {
File file = 1;
}
service API {
// Repo rpcs
// CreateRepo creates a new repo.
// An error is returned if the repo already exists.
rpc CreateRepo(CreateRepoRequest) returns (google.protobuf.Empty) {}
// InspectRepo returns info about a repo.
rpc InspectRepo(InspectRepoRequest) returns (RepoInfo) {}
// ListRepo returns info about all repos.
rpc ListRepo(ListRepoRequest) returns (ListRepoResponse) {}
// DeleteRepo deletes a repo.
rpc DeleteRepo(DeleteRepoRequest) returns (google.protobuf.Empty) {}
// Commit rpcs
// StartCommit creates a new write commit from a parent commit.
rpc StartCommit(StartCommitRequest) returns (Commit) {}
// FinishCommit turns a write commit into a read commit.
rpc FinishCommit(FinishCommitRequest) returns (google.protobuf.Empty) {}
// InspectCommit returns the info about a commit.
rpc InspectCommit(InspectCommitRequest) returns (CommitInfo) {}
// ListCommit returns info about all commits. This is deprecated in favor of
// ListCommitStream.
rpc ListCommit(ListCommitRequest) returns (CommitInfos) {}
// ListCommitStream is like ListCommit, but returns its results in a GRPC stream
rpc ListCommitStream(ListCommitRequest) returns (stream CommitInfo) {}
// DeleteCommit deletes a commit.
rpc DeleteCommit(DeleteCommitRequest) returns (google.protobuf.Empty) {}
// FlushCommit waits for downstream commits to finish
rpc FlushCommit(FlushCommitRequest) returns (stream CommitInfo) {}
// SubscribeCommit subscribes for new commits on a given branch
rpc SubscribeCommit(SubscribeCommitRequest) returns (stream CommitInfo) {}
// BuildCommit builds a commit that's backed by the given tree
rpc BuildCommit(BuildCommitRequest) returns (Commit) {}
// CreateBranch creates a new branch
rpc CreateBranch(CreateBranchRequest) returns (google.protobuf.Empty) {}
// InspectBranch returns info about a branch.
rpc InspectBranch(InspectBranchRequest) returns (BranchInfo) {}
// ListBranch returns info about the heads of branches.
rpc ListBranch(ListBranchRequest) returns (BranchInfos) {}
// DeleteBranch deletes a branch; note that the commits still exist.
rpc DeleteBranch(DeleteBranchRequest) returns (google.protobuf.Empty) {}
// File rpcs
// PutFile writes the specified file to pfs.
rpc PutFile(stream PutFileRequest) returns (google.protobuf.Empty) {}
// CopyFile copies the contents of one file to another.
rpc CopyFile(CopyFileRequest) returns (google.protobuf.Empty) {}
// GetFile returns a byte stream of the contents of the file.
rpc GetFile(GetFileRequest) returns (stream google.protobuf.BytesValue) {}
// InspectFile returns info about a file.
rpc InspectFile(InspectFileRequest) returns (FileInfo) {}
// ListFile returns info about all files. This is deprecated in favor of
// ListFileStream
rpc ListFile(ListFileRequest) returns (FileInfos) {}
// ListFileStream is a streaming version of ListFile
// TODO(msteffen): When the dash has been updated to use ListFileStream,
// replace ListFile with this RPC (https://github.com/pachyderm/dash/issues/201)
rpc ListFileStream(ListFileRequest) returns (stream FileInfo) {}
// GlobFile returns info about all files. This is deprecated in favor of
// GlobFileStream
rpc GlobFile(GlobFileRequest) returns (FileInfos) {}
// GlobFileStream is a streaming version of GlobFile
// TODO(msteffen): When the dash has been updated to use GlobFileStream,
// replace GlobFile with this RPC (https://github.com/pachyderm/dash/issues/201)
rpc GlobFileStream(GlobFileRequest) returns (stream FileInfo) {}
// DiffFile returns the differences between 2 paths at 2 commits.
rpc DiffFile(DiffFileRequest) returns (DiffFileResponse) {}
// DeleteFile deletes a file.
rpc DeleteFile(DeleteFileRequest) returns (google.protobuf.Empty) {}
// DeleteAll deletes everything
rpc DeleteAll(google.protobuf.Empty) returns (google.protobuf.Empty) {}
}
message PutObjectRequest {
bytes value = 1;
repeated Tag tags = 2;
}
message GetObjectsRequest {
repeated Object objects = 1;
uint64 offset_bytes = 2;
// The number of bytes we intend to read.
uint64 size_bytes = 3;
// The total amount of bytes in these objects. It's OK if it's not
// entirely accurate or if it's unknown (in which case it'd be set to 0).
// It's used primarily as a hint for cache eviction.
uint64 total_size = 4;
}
message TagObjectRequest {
Object object = 1;
repeated Tag tags = 2;
}
message ListObjectsRequest {}
message ListTagsRequest {
string prefix = 1;
bool include_object = 2;
}
message ListTagsResponse {
Tag tag = 1;
Object object = 2;
}
message DeleteObjectsRequest {
repeated Object objects = 1;
}
message DeleteObjectsResponse {}
message DeleteTagsRequest {
repeated Tag tags = 1;
}
message DeleteTagsResponse {}
message CheckObjectRequest {
Object object = 1;
}
message CheckObjectResponse {
bool exists = 1;
}
message Objects {
repeated Object objects = 1;
}
service ObjectAPI {
rpc PutObject(stream PutObjectRequest) returns (Object) {}
rpc PutObjectSplit(stream PutObjectRequest) returns (Objects) {}
rpc GetObject(Object) returns (stream google.protobuf.BytesValue) {}
rpc GetObjects(GetObjectsRequest) returns (stream google.protobuf.BytesValue) {}
rpc TagObject(TagObjectRequest) returns (google.protobuf.Empty) {}
rpc InspectObject(Object) returns (ObjectInfo) {}
// CheckObject checks if an object exists in the blob store without
// actually reading the object.
rpc CheckObject(CheckObjectRequest) returns (CheckObjectResponse) {}
rpc ListObjects(ListObjectsRequest) returns (stream Object) {}
rpc DeleteObjects(DeleteObjectsRequest) returns (DeleteObjectsResponse) {}
rpc GetTag(Tag) returns (stream google.protobuf.BytesValue) {}
rpc InspectTag(Tag) returns (ObjectInfo) {}
rpc ListTags(ListTagsRequest) returns (stream ListTagsResponse) {}
rpc DeleteTags(DeleteTagsRequest) returns (DeleteTagsResponse) {}
rpc Compact(google.protobuf.Empty) returns (google.protobuf.Empty) {}
}
message ObjectIndex {
map<string, BlockRef> objects = 1;
map<string, Object> tags = 2;
}