Skip to content

Commit

Permalink
Add digest set implementation
Browse files Browse the repository at this point in the history
Set represents a unique set of digests which allow for efficient lookup.
Dumping short codes is a function which takes in a digest set.
Any operation involving short codes may be considered secure if the list of digests added to the set is the complete list of referenceable digests.
Contains benchmarks for Add, Lookup, and Dump.

Signed-off-by: Derek McGowan <derek@mcgstyle.net>
  • Loading branch information
dmcgowan committed May 10, 2020
1 parent 4b56074 commit 77570c9
Show file tree
Hide file tree
Showing 2 changed files with 467 additions and 0 deletions.
195 changes: 195 additions & 0 deletions digestset/set.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
package digest

import (
"errors"
"sort"
"strings"
)

var (
// ErrDigestNotFound is used when a matching digest
// could not be found in a set.
ErrDigestNotFound = errors.New("digest not found")

// ErrDigestAmbiguous is used when multiple digests
// are found in a set. None of the matching digests
// should be considered valid matches.
ErrDigestAmbiguous = errors.New("ambiguous digest string")
)

// Set is used to hold a unique set of digests which
// may be easily referenced by easily referenced by a string
// representation of the digest as well as short representation.
// The uniqueness of the short representation is based on other
// digests in the set. If digests are ommited from this set,
// collisions in a larger set may not be detected, therefore it
// is important to always do short representation lookups on
// the complete set of digests. To mitigate collisions, an
// appropriately long short code should be used.
type Set struct {
entries digestEntries
}

// NewSet creates an empty set of digests
// which may have digests added.
func NewSet() *Set {
return &Set{
entries: digestEntries{},
}
}

// checkShortMatch checks whether two digests match as either whole
// values or short values. This function does not test equality,
// rather whether the second value could match against the first
// value.
func checkShortMatch(alg, hex, shortAlg, shortHex string) bool {
if len(hex) == len(shortHex) {
if hex != shortHex {
return false
}
if len(shortAlg) > 0 && alg != shortAlg {
return false
}
} else if !strings.HasPrefix(hex, shortHex) {
return false
} else if len(shortAlg) > 0 && alg != shortAlg {
return false
}
return true
}

// Lookup looks for a digest matching the given string representation.
// If no digests could be found ErrDigestNotFound will be returned
// with an empty digest value. If multiple matches are found
// ErrDigestAmbiguous will be returned with an empty digest value.
func (dst *Set) Lookup(d string) (Digest, error) {
if len(dst.entries) == 0 {
return "", ErrDigestNotFound
}
var (
searchFunc func(int) bool
alg string
hex string
)
dgst, err := ParseDigest(d)
if err == ErrDigestInvalidFormat {
hex = d
searchFunc = func(i int) bool {
return dst.entries[i].val >= d
}
} else {
hex = dgst.Hex()
alg = dgst.Algorithm()
searchFunc = func(i int) bool {
if dst.entries[i].val == hex {
return dst.entries[i].alg >= alg
}
return dst.entries[i].val >= hex
}
}
idx := sort.Search(len(dst.entries), searchFunc)
if idx == len(dst.entries) || !checkShortMatch(dst.entries[idx].alg, dst.entries[idx].val, alg, hex) {
return "", ErrDigestNotFound
}
if dst.entries[idx].alg == alg && dst.entries[idx].val == hex {
return dst.entries[idx].digest, nil
}
if idx+1 < len(dst.entries) && checkShortMatch(dst.entries[idx+1].alg, dst.entries[idx+1].val, alg, hex) {
return "", ErrDigestAmbiguous
}

return dst.entries[idx].digest, nil
}

// Add adds the given digests to the set. An error will be returned
// if the given digest is invalid. If the digest already exists in the
// table, this operation will be a no-op.
func (dst *Set) Add(d Digest) error {
if err := d.Validate(); err != nil {
return err
}
entry := &digestEntry{alg: d.Algorithm(), val: d.Hex(), digest: d}
searchFunc := func(i int) bool {
if dst.entries[i].val == entry.val {
return dst.entries[i].alg >= entry.alg
}
return dst.entries[i].val >= entry.val
}
idx := sort.Search(len(dst.entries), searchFunc)
if idx == len(dst.entries) {
dst.entries = append(dst.entries, entry)
return nil
} else if dst.entries[idx].digest == d {
return nil
}

entries := append(dst.entries, nil)
copy(entries[idx+1:], entries[idx:len(entries)-1])
entries[idx] = entry
dst.entries = entries
return nil
}

// ShortCodeTable returns a map of Digest to unique short codes. The
// length represents the minimum value, the maximum length may be the
// entire value of digest if uniqueness cannot be achieved without the
// full value. This function will attempt to make short codes as short
// as possible to be unique.
func ShortCodeTable(dst *Set, length int) map[Digest]string {
m := make(map[Digest]string, len(dst.entries))
l := length
resetIdx := 0
for i := 0; i < len(dst.entries); i++ {
var short string
extended := true
for extended {
extended = false
if len(dst.entries[i].val) <= l {
short = dst.entries[i].digest.String()
} else {
short = dst.entries[i].val[:l]
for j := i + 1; j < len(dst.entries); j++ {
if checkShortMatch(dst.entries[j].alg, dst.entries[j].val, "", short) {
if j > resetIdx {
resetIdx = j
}
extended = true
} else {
break
}
}
if extended {
l++
}
}
}
m[dst.entries[i].digest] = short
if i >= resetIdx {
l = length
}
}
return m
}

type digestEntry struct {
alg string
val string
digest Digest
}

type digestEntries []*digestEntry

func (d digestEntries) Len() int {
return len(d)
}

func (d digestEntries) Less(i, j int) bool {
if d[i].val != d[j].val {
return d[i].val < d[j].val
}
return d[i].alg < d[j].alg
}

func (d digestEntries) Swap(i, j int) {
d[i], d[j] = d[j], d[i]
}

0 comments on commit 77570c9

Please sign in to comment.