-
Notifications
You must be signed in to change notification settings - Fork 10
/
hash.go
451 lines (415 loc) · 16.9 KB
/
hash.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
package getproviders
import (
"crypto/sha256"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"golang.org/x/mod/sumdb/dirhash"
)
// Hash is a specially-formatted string representing a checksum of a package
// or the contents of the package.
//
// A Hash string is always starts with a scheme, which is a short series of
// alphanumeric characters followed by a colon, and then the remainder of the
// string has a different meaning depending on the scheme prefix.
//
// The currently-valid schemes are defined as the constants of type HashScheme
// in this package.
//
// Callers outside of this package must not create Hash values via direct
// conversion. Instead, use either the HashScheme.New method on one of the
// HashScheme contents (for a hash of a particular scheme) or the ParseHash
// function (if hashes of any scheme are acceptable).
type Hash string
// NilHash is the zero value of Hash. It isn't a valid hash, so all of its
// methods will panic.
const NilHash = Hash("")
// ParseHash parses the string representation of a Hash into a Hash value.
//
// A particular version of Terraform only supports a fixed set of hash schemes,
// but this function intentionally allows unrecognized schemes so that we can
// silently ignore other schemes that may be introduced in the future. For
// that reason, the Scheme method of the returned Hash may return a value that
// isn't in one of the HashScheme constants in this package.
//
// This function doesn't verify that the value portion of the given hash makes
// sense for the given scheme. Invalid values are just considered to not match
// any packages.
//
// If this function returns an error then the returned Hash is invalid and
// must not be used.
func ParseHash(s string) (Hash, error) {
colon := strings.Index(s, ":")
if colon < 1 { // 1 because a zero-length scheme is not allowed
return NilHash, fmt.Errorf("hash string must start with a scheme keyword followed by a colon")
}
return Hash(s), nil
}
// MustParseHash is a wrapper around ParseHash that panics if it returns an
// error.
func MustParseHash(s string) Hash {
hash, err := ParseHash(s)
if err != nil {
panic(err.Error())
}
return hash
}
// Scheme returns the scheme of the recieving hash. If the receiver is not
// using valid syntax then this method will panic.
func (h Hash) Scheme() HashScheme {
colon := strings.Index(string(h), ":")
if colon < 0 {
panic(fmt.Sprintf("invalid hash string %q", h))
}
return HashScheme(h[:colon+1])
}
// HasScheme returns true if the given scheme matches the receiver's scheme,
// or false otherwise.
//
// If the receiver is not using valid syntax then this method will panic.
func (h Hash) HasScheme(want HashScheme) bool {
return h.Scheme() == want
}
// Value returns the scheme-specific value from the recieving hash. The
// meaning of this value depends on the scheme.
//
// If the receiver is not using valid syntax then this method will panic.
func (h Hash) Value() string {
colon := strings.Index(string(h), ":")
if colon < 0 {
panic(fmt.Sprintf("invalid hash string %q", h))
}
return string(h[colon+1:])
}
// String returns a string representation of the receiving hash.
func (h Hash) String() string {
return string(h)
}
// GoString returns a Go syntax representation of the receiving hash.
//
// This is here primarily to help with producing descriptive test failure
// output; these results are not particularly useful at runtime.
func (h Hash) GoString() string {
if h == NilHash {
return "getproviders.NilHash"
}
switch scheme := h.Scheme(); scheme {
case HashScheme1:
return fmt.Sprintf("getproviders.HashScheme1.New(%q)", h.Value())
case HashSchemeZip:
return fmt.Sprintf("getproviders.HashSchemeZip.New(%q)", h.Value())
default:
// This fallback is for when we encounter lock files or API responses
// with hash schemes that the current version of Terraform isn't
// familiar with. They were presumably introduced in a later version.
return fmt.Sprintf("getproviders.HashScheme(%q).New(%q)", scheme, h.Value())
}
}
// HashScheme is an enumeration of schemes that are allowed for values of type
// Hash.
type HashScheme string
const (
// HashScheme1 is the scheme identifier for the first hash scheme.
//
// Use HashV1 (or one of its wrapper functions) to calculate hashes with
// this scheme.
HashScheme1 HashScheme = HashScheme("h1:")
// HashSchemeZip is the scheme identifier for the legacy hash scheme that
// applies to distribution archives (.zip files) rather than package
// contents, and can therefore only be verified against the original
// distribution .zip file, not an extracted directory.
//
// Use PackageHashLegacyZipSHA to calculate hashes with this scheme.
HashSchemeZip HashScheme = HashScheme("zh:")
)
// New creates a new Hash value with the receiver as its scheme and the given
// raw string as its value.
//
// It's the caller's responsibility to make sure that the given value makes
// sense for the selected scheme.
func (hs HashScheme) New(value string) Hash {
return Hash(string(hs) + value)
}
// PackageHash computes a hash of the contents of the package at the given
// location, using whichever hash algorithm is the current default.
//
// Currently, this method returns version 1 hashes as produced by the
// function PackageHashV1, but this function may switch to other versions in
// later releases. Call PackageHashV1 directly if you specifically need a V1
// hash.
//
// PackageHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageHash(loc PackageLocation) (Hash, error) {
return PackageHashV1(loc)
}
// PackageMatchesHash returns true if the package at the given location matches
// the given hash, or false otherwise.
//
// If it cannot read from the given location, or if the given hash is in an
// unsupported format, PackageMatchesHash returns an error.
//
// There is currently only one hash format, as implemented by HashV1. However,
// if others are introduced in future PackageMatchesHash may accept multiple
// formats, and may generate errors for any formats that become obsolete.
//
// PackageMatchesHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageMatchesHash(loc PackageLocation, want Hash) (bool, error) {
switch want.Scheme() {
case HashScheme1:
got, err := PackageHashV1(loc)
if err != nil {
return false, err
}
return got == want, nil
case HashSchemeZip:
archiveLoc, ok := loc.(PackageLocalArchive)
if !ok {
return false, fmt.Errorf(`ziphash scheme ("zh:" prefix) is not supported for unpacked provider packages`)
}
got, err := PackageHashLegacyZipSHA(archiveLoc)
if err != nil {
return false, err
}
return got == want, nil
default:
return false, fmt.Errorf("unsupported hash format (this may require a newer version of Terraform)")
}
}
// PackageMatchesAnyHash returns true if the package at the given location
// matches at least one of the given hashes, or false otherwise.
//
// If it cannot read from the given location, PackageMatchesAnyHash returns an
// error. Unlike the singular PackageMatchesHash, PackageMatchesAnyHash
// considers unsupported hash formats as successfully non-matching, rather
// than returning an error.
//
// PackageMatchesAnyHash can be used only with the two local package location
// types PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageMatchesAnyHash(loc PackageLocation, allowed []Hash) (bool, error) {
// It's likely that we'll have multiple hashes of the same scheme in
// the "allowed" set, in which case we'll avoid repeatedly re-reading the
// given package by caching its result for each of the two
// currently-supported hash formats. These will be NilHash until we
// encounter the first hash of the corresponding scheme.
var v1Hash, zipHash Hash
for _, want := range allowed {
switch want.Scheme() {
case HashScheme1:
if v1Hash == NilHash {
got, err := PackageHashV1(loc)
if err != nil {
return false, err
}
v1Hash = got
}
if v1Hash == want {
return true, nil
}
case HashSchemeZip:
archiveLoc, ok := loc.(PackageLocalArchive)
if !ok {
// A zip hash can never match an unpacked directory
continue
}
if zipHash == NilHash {
got, err := PackageHashLegacyZipSHA(archiveLoc)
if err != nil {
return false, err
}
zipHash = got
}
if zipHash == want {
return true, nil
}
default:
// If it's not a supported format then it can't match.
continue
}
}
return false, nil
}
// PreferredHashes examines all of the given hash strings and returns the one
// that the current version of Terraform considers to provide the strongest
// verification.
//
// Returns an empty string if none of the given hashes are of a supported
// format. If PreferredHash returns a non-empty string then it will be one
// of the hash strings in "given", and that hash is the one that must pass
// verification in order for a package to be considered valid.
func PreferredHashes(given []Hash) []Hash {
// For now this is just filtering for the two hash formats we support,
// both of which are considered equally "preferred". If we introduce
// a new scheme like "h2:" in future then, depending on the characteristics
// of that new version, it might make sense to rework this function so
// that it only returns "h1:" hashes if the input has no "h2:" hashes,
// so that h2: is preferred when possible and h1: is only a fallback for
// interacting with older systems that haven't been updated with the new
// scheme yet.
var ret []Hash
for _, hash := range given {
switch hash.Scheme() {
case HashScheme1, HashSchemeZip:
ret = append(ret, hash)
}
}
return ret
}
// PackageHashLegacyZipSHA implements the old provider package hashing scheme
// of taking a SHA256 hash of the containing .zip archive itself, rather than
// of the contents of the archive.
//
// The result is a hash string with the "zh:" prefix, which is intended to
// represent "zip hash". After the prefix is a lowercase-hex encoded SHA256
// checksum, intended to exactly match the formatting used in the registry
// API (apart from the prefix) so that checksums can be more conveniently
// compared by humans.
//
// Because this hashing scheme uses the official provider .zip file as its
// input, it accepts only PackageLocalArchive locations.
func PackageHashLegacyZipSHA(loc PackageLocalArchive) (Hash, error) {
archivePath, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
f, err := os.Open(archivePath)
if err != nil {
return "", err
}
defer f.Close()
h := sha256.New()
_, err = io.Copy(h, f)
if err != nil {
return "", err
}
gotHash := h.Sum(nil)
return HashSchemeZip.New(fmt.Sprintf("%x", gotHash)), nil
}
// HashLegacyZipSHAFromSHA is a convenience method to produce the schemed-string
// hash format from an already-calculated hash of a provider .zip archive.
//
// This just adds the "zh:" prefix and encodes the string in hex, so that the
// result is in the same format as PackageHashLegacyZipSHA.
func HashLegacyZipSHAFromSHA(sum [sha256.Size]byte) Hash {
return HashSchemeZip.New(fmt.Sprintf("%x", sum[:]))
}
// PackageHashV1 computes a hash of the contents of the package at the given
// location using hash algorithm 1. The resulting Hash is guaranteed to have
// the scheme HashScheme1.
//
// The hash covers the paths to files in the directory and the contents of
// those files. It does not cover other metadata about the files, such as
// permissions.
//
// This function is named "PackageHashV1" in anticipation of other hashing
// algorithms being added in a backward-compatible way in future. The result
// from PackageHashV1 always begins with the prefix "h1:" so that callers can
// distinguish the results of potentially multiple different hash algorithms in
// future.
//
// PackageHashV1 can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func PackageHashV1(loc PackageLocation) (Hash, error) {
// Our HashV1 is really just the Go Modules hash version 1, which is
// sufficient for our needs and already well-used for identity of
// Go Modules distribution packages. It is also blocked from incompatible
// changes by being used in a wide array of go.sum files already.
//
// In particular, it also supports computing an equivalent hash from
// an unpacked zip file, which is not important for Terraform workflow
// today but is likely to become so in future if we adopt a top-level
// lockfile mechanism that is intended to be checked in to version control,
// rather than just a transient lock for a particular local cache directory.
// (In that case we'd need to check hashes of _packed_ packages, too.)
//
// Internally, dirhash.Hash1 produces a string containing a sequence of
// newline-separated path+filehash pairs for all of the files in the
// directory, and then finally produces a hash of that string to return.
// In both cases, the hash algorithm is SHA256.
switch loc := loc.(type) {
case PackageLocalDir:
// We'll first dereference a possible symlink at our PackageDir location,
// as would be created if this package were linked in from another cache.
packageDir, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
// The dirhash.HashDir result is already in our expected h1:...
// format, so we can just convert directly to Hash.
s, err := dirhash.HashDir(packageDir, "", dirhash.Hash1)
return Hash(s), err
case PackageLocalArchive:
archivePath, err := filepath.EvalSymlinks(string(loc))
if err != nil {
return "", err
}
// The dirhash.HashDir result is already in our expected h1:...
// format, so we can just convert directly to Hash.
s, err := dirhash.HashZip(archivePath, dirhash.Hash1)
return Hash(s), err
default:
return "", fmt.Errorf("cannot hash package at %s", loc.String())
}
}
// Hash computes a hash of the contents of the package at the location
// associated with the reciever, using whichever hash algorithm is the current
// default.
//
// This method will change to use new hash versions as they are introduced
// in future. If you need a specific hash version, call the method for that
// version directly instead, such as HashV1.
//
// Hash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) Hash() (Hash, error) {
return PackageHash(m.Location)
}
// MatchesHash returns true if the package at the location associated with
// the receiver matches the given hash, or false otherwise.
//
// If it cannot read from the given location, or if the given hash is in an
// unsupported format, MatchesHash returns an error.
//
// MatchesHash can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) MatchesHash(want Hash) (bool, error) {
return PackageMatchesHash(m.Location, want)
}
// MatchesAnyHash returns true if the package at the location associated with
// the receiver matches at least one of the given hashes, or false otherwise.
//
// If it cannot read from the given location, MatchesHash returns an error.
// Unlike the signular MatchesHash, MatchesAnyHash considers an unsupported
// hash format to be a successful non-match.
func (m PackageMeta) MatchesAnyHash(acceptable []Hash) (bool, error) {
return PackageMatchesAnyHash(m.Location, acceptable)
}
// HashV1 computes a hash of the contents of the package at the location
// associated with the receiver using hash algorithm 1.
//
// The hash covers the paths to files in the directory and the contents of
// those files. It does not cover other metadata about the files, such as
// permissions.
//
// HashV1 can be used only with the two local package location types
// PackageLocalDir and PackageLocalArchive, because it needs to access the
// contents of the indicated package in order to compute the hash. If given
// a non-local location this function will always return an error.
func (m PackageMeta) HashV1() (Hash, error) {
return PackageHashV1(m.Location)
}