-
Notifications
You must be signed in to change notification settings - Fork 568
/
common.go
100 lines (92 loc) · 2.84 KB
/
common.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package common
import (
"context"
"encoding/base64"
"encoding/binary"
"encoding/hex"
"path"
"time"
"github.com/pachyderm/pachyderm/v2/src/internal/client"
"github.com/pachyderm/pachyderm/v2/src/pfs"
"github.com/pachyderm/pachyderm/v2/src/pps"
)
const (
// MetaPrefix is the prefix for the meta path.
MetaPrefix = "meta"
// MetaFileName is the name of the meta file.
MetaFileName = "meta"
// PFSPrefix is the prefix for the pfs path.
PFSPrefix = "pfs"
// OutputPrefix is the prefix for the output path.
OutputPrefix = "out"
// TmpFileName is the name of the tmp file.
TmpFileName = "tmp"
// EnvFileName is the name of the env file.
EnvFileName = ".env"
TTL = 15 * time.Minute
)
func MetaFilePath(id string) string {
return path.Join(MetaPrefix, id, MetaFileName)
}
// DatumID computes the ID of a datum.
func DatumID(inputs []*Input) string {
hash := pfs.NewHash()
for _, input := range inputs {
hash.Write([]byte(input.Name))
_ = binary.Write(hash, binary.BigEndian, int64(len(input.Name)))
file := input.FileInfo.File
hash.Write([]byte(file.Commit.Repo.Name))
_ = binary.Write(hash, binary.BigEndian, int64(len(file.Commit.Repo.Name)))
hash.Write([]byte(input.Branch))
_ = binary.Write(hash, binary.BigEndian, int64(len(input.Branch)))
hash.Write([]byte(input.FileInfo.File.Path))
_ = binary.Write(hash, binary.BigEndian, int64(len(input.FileInfo.File.Path)))
}
return hex.EncodeToString(hash.Sum(nil))
}
// HashDatum computes the hash of a datum.
func HashDatum(pipelineSalt string, inputs []*Input) string {
hash := pfs.NewHash()
id := DatumID(inputs)
hash.Write([]byte(id))
for _, input := range inputs {
hash.Write([]byte(input.FileInfo.Hash))
}
hash.Write([]byte(pipelineSalt))
return hex.EncodeToString(hash.Sum(nil))
}
// MatchDatum checks if a datum matches a filter. To match each string in
// filter must correspond match at least 1 datum's Path or Hash. Order of
// filter and inputs is irrelevant.
func MatchDatum(filter []string, inputs []*pps.InputFile) bool {
// All paths in request.DataFilters must appear somewhere in the log
// line's inputs, or it's filtered
matchesData := true
dataFilters:
for _, dataFilter := range filter {
for _, input := range inputs {
if dataFilter == input.Path ||
dataFilter == base64.StdEncoding.EncodeToString(input.Hash) ||
dataFilter == hex.EncodeToString(input.Hash) {
continue dataFilters // Found, move to next filter
}
}
matchesData = false
break
}
return matchesData
}
// TODO: Trim non-meta file shards?
func Shard(ctx context.Context, c pfs.APIClient, fileSetIDs []string) ([]*pfs.PathRange, error) {
var result []*pfs.PathRange
for _, fileSetID := range fileSetIDs {
shards, err := client.ShardFileSet(ctx, c, fileSetID)
if err != nil {
return nil, err
}
if len(shards) > len(result) {
result = shards
}
}
return result, nil
}