Skip to content

Commit

Permalink
keymigrate: fix conversion of transaction hash keys (#8352)
Browse files Browse the repository at this point in the history
* keymigrate: fix conversion of transaction hash keys

In the legacy database format, keys were generally stored with a string prefix
to partition the key space. Transaction hashes, however, were not prefixed: The
hash of a transaction was the entire key for its record.

When the key migration script scans its input, it checks the format of each
key to determine whether it has already been converted, so that it is safe to run
the script over an already-converted database.

After checking for known prefixes, the migration script used two heuristics to
distinguish ABCI events and transaction hashes: For ABCI events, whose keys
used the form "name/value/height/index", it checked for the right number of
separators. For hashes, it checked that the length is exactly 32 bytes (the
length of a SHA-256 digest) AND that the value does not contain a "/".

This last check is problematic: Any hash containing the byte 0x2f (the code
point for "/") would be incorrectly filtered out from conversion. This leads to
some transaction hashes not being converted.

To fix this problem, this changes how the script recognizes keys:

1. Use a more rigorous syntactic check to filter out ABCI metadata.
2. Use only the length to identify hashes among what remains.

This change is still not a complete fix: It is possible, though unlikely, that
a valid hash could happen to look exactly like an ABCI metadata key. However,
the chance of that happening is vastly smaller than the chance of generating a
hash that contains at least one "/" byte.

Similarly, it is possible that an already-converted key of some other type
could be mistaken for a hash (not a converted hash, ironically, but another
type of the right length). Again, we can't do anything about that.

(cherry picked from commit 34e7276)
  • Loading branch information
M. J. Fromberger authored and mergify-bot committed Apr 14, 2022
1 parent 8579cc3 commit 5ee7343
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 55 deletions.
164 changes: 116 additions & 48 deletions scripts/keymigrate/migrate.go
Expand Up @@ -39,7 +39,7 @@ func getAllLegacyKeys(db dbm.DB) ([]keyID, error) {

// make sure it's a key with a legacy format, and skip
// all other keys, to make it safe to resume the migration.
if !keyIsLegacy(k) {
if !checkKeyType(k).isLegacy() {
continue
}

Expand All @@ -58,55 +58,123 @@ func getAllLegacyKeys(db dbm.DB) ([]keyID, error) {
return out, nil
}

func keyIsLegacy(key keyID) bool {
for _, prefix := range []keyID{
// core "store"
keyID("consensusParamsKey:"),
keyID("abciResponsesKey:"),
keyID("validatorsKey:"),
keyID("stateKey"),
keyID("H:"),
keyID("P:"),
keyID("C:"),
keyID("SC:"),
keyID("BH:"),
// light
keyID("size"),
keyID("lb/"),
// evidence
keyID([]byte{0x00}),
keyID([]byte{0x01}),
// tx index
keyID("tx.height/"),
keyID("tx.hash/"),
} {
if bytes.HasPrefix(key, prefix) {
return true
// keyType is an enumeration for the structural type of a key.
type keyType int

func (t keyType) isLegacy() bool { return t != nonLegacyKey }

const (
nonLegacyKey keyType = iota // non-legacy key (presumed already converted)
consensusParamsKey
abciResponsesKey
validatorsKey
stateStoreKey // state storage record
blockMetaKey // H:
blockPartKey // P:
commitKey // C:
seenCommitKey // SC:
blockHashKey // BH:
lightSizeKey // size
lightBlockKey // lb/
evidenceCommittedKey // \x00
evidencePendingKey // \x01
txHeightKey // tx.height/... (special case)
abciEventKey // name/value/height/index
txHashKey // 32-byte transaction hash (unprefixed)
)

var prefixes = []struct {
prefix []byte
ktype keyType
}{
{[]byte("consensusParamsKey:"), consensusParamsKey},
{[]byte("abciResponsesKey:"), abciResponsesKey},
{[]byte("validatorsKey:"), validatorsKey},
{[]byte("stateKey"), stateStoreKey},
{[]byte("H:"), blockMetaKey},
{[]byte("P:"), blockPartKey},
{[]byte("C:"), commitKey},
{[]byte("SC:"), seenCommitKey},
{[]byte("BH:"), blockHashKey},
{[]byte("size"), lightSizeKey},
{[]byte("lb/"), lightBlockKey},
{[]byte("\x00"), evidenceCommittedKey},
{[]byte("\x01"), evidencePendingKey},
}

// checkKeyType classifies a candidate key based on its structure.
func checkKeyType(key keyID) keyType {
for _, p := range prefixes {
if bytes.HasPrefix(key, p.prefix) {
return p.ktype
}
}

// this means it's a tx index...
if bytes.Count(key, []byte("/")) >= 3 {
return true
// A legacy event key has the form:
//
// <name> / <value> / <height> / <index>
//
// Transaction hashes are stored as a raw binary hash with no prefix.
//
// Because a hash can contain any byte, it is possible (though unlikely)
// that a hash could have the correct form for an event key, in which case
// we would translate it incorrectly. To reduce the likelihood of an
// incorrect interpretation, we parse candidate event keys and check for
// some structural properties before making a decision.
//
// Note, though, that nothing prevents event names or values from containing
// additional "/" separators, so the parse has to be forgiving.
parts := bytes.Split(key, []byte("/"))
if len(parts) >= 4 {
// Special case for tx.height.
if len(parts) == 4 && bytes.Equal(parts[0], []byte("tx.height")) {
return txHeightKey
}

// The name cannot be empty, but we don't know where the name ends and
// the value begins, so insist that there be something.
var n int
for _, part := range parts[:len(parts)-2] {
n += len(part)
}
// Check whether the last two fields could be .../height/index.
if n > 0 && isDecimal(parts[len(parts)-1]) && isDecimal(parts[len(parts)-2]) {
return abciEventKey
}
}

return keyIsHash(key)
// If we get here, it's not an event key. Treat it as a hash if it is the
// right length. Note that it IS possible this could collide with the
// translation of some other key (though not a hash, since encoded hashes
// will be longer). The chance of that is small, but there is nothing we can
// do to detect it.
if len(key) == 32 {
return txHashKey
}
return nonLegacyKey
}

func keyIsHash(key keyID) bool {
return len(key) == 32 && !bytes.Contains(key, []byte("/"))
// isDecimal reports whether buf is a non-empty sequence of Unicode decimal
// digits.
func isDecimal(buf []byte) bool {
for _, c := range buf {
if c < '0' || c > '9' {
return false
}
}
return len(buf) != 0
}

func migrateKey(key keyID) (keyID, error) {
switch {
case bytes.HasPrefix(key, keyID("H:")):
switch checkKeyType(key) {
case blockMetaKey:
val, err := strconv.Atoi(string(key[2:]))
if err != nil {
return nil, err
}

return orderedcode.Append(nil, int64(0), int64(val))
case bytes.HasPrefix(key, keyID("P:")):
case blockPartKey:
parts := bytes.Split(key[2:], []byte(":"))
if len(parts) != 2 {
return nil, fmt.Errorf("block parts key has %d rather than 2 components",
Expand All @@ -123,21 +191,21 @@ func migrateKey(key keyID) (keyID, error) {
}

return orderedcode.Append(nil, int64(1), int64(valOne), int64(valTwo))
case bytes.HasPrefix(key, keyID("C:")):
case commitKey:
val, err := strconv.Atoi(string(key[2:]))
if err != nil {
return nil, err
}

return orderedcode.Append(nil, int64(2), int64(val))
case bytes.HasPrefix(key, keyID("SC:")):
case seenCommitKey:
val, err := strconv.Atoi(string(key[3:]))
if err != nil {
return nil, err
}

return orderedcode.Append(nil, int64(3), int64(val))
case bytes.HasPrefix(key, keyID("BH:")):
case blockHashKey:
hash := string(key[3:])
if len(hash)%2 == 1 {
hash = "0" + hash
Expand All @@ -148,34 +216,34 @@ func migrateKey(key keyID) (keyID, error) {
}

return orderedcode.Append(nil, int64(4), string(val))
case bytes.HasPrefix(key, keyID("validatorsKey:")):
case validatorsKey:
val, err := strconv.Atoi(string(key[14:]))
if err != nil {
return nil, err
}

return orderedcode.Append(nil, int64(5), int64(val))
case bytes.HasPrefix(key, keyID("consensusParamsKey:")):
case consensusParamsKey:
val, err := strconv.Atoi(string(key[19:]))
if err != nil {
return nil, err
}

return orderedcode.Append(nil, int64(6), int64(val))
case bytes.HasPrefix(key, keyID("abciResponsesKey:")):
case abciResponsesKey:
val, err := strconv.Atoi(string(key[17:]))
if err != nil {
return nil, err
}

return orderedcode.Append(nil, int64(7), int64(val))
case bytes.HasPrefix(key, keyID("stateKey")):
case stateStoreKey:
return orderedcode.Append(nil, int64(8))
case bytes.HasPrefix(key, []byte{0x00}): // committed evidence
case evidenceCommittedKey:
return convertEvidence(key, 9)
case bytes.HasPrefix(key, []byte{0x01}): // pending evidence
case evidencePendingKey:
return convertEvidence(key, 10)
case bytes.HasPrefix(key, keyID("lb/")):
case lightBlockKey:
if len(key) < 24 {
return nil, fmt.Errorf("light block evidence %q in invalid format", string(key))
}
Expand All @@ -186,9 +254,9 @@ func migrateKey(key keyID) (keyID, error) {
}

return orderedcode.Append(nil, int64(11), int64(val))
case bytes.HasPrefix(key, keyID("size")):
case lightSizeKey:
return orderedcode.Append(nil, int64(12))
case bytes.HasPrefix(key, keyID("tx.height")):
case txHeightKey:
parts := bytes.Split(key, []byte("/"))
if len(parts) != 4 {
return nil, fmt.Errorf("key has %d parts rather than 4", len(parts))
Expand All @@ -211,7 +279,7 @@ func migrateKey(key keyID) (keyID, error) {
}

return orderedcode.Append(nil, elems...)
case bytes.Count(key, []byte("/")) >= 3: // tx indexer
case abciEventKey:
parts := bytes.Split(key, []byte("/"))

elems := make([]interface{}, 0, 4)
Expand Down Expand Up @@ -247,7 +315,7 @@ func migrateKey(key keyID) (keyID, error) {
elems = append(elems, string(appKey), int64(val), int64(val2))
}
return orderedcode.Append(nil, elems...)
case keyIsHash(key):
case txHashKey:
return orderedcode.Append(nil, "tx.hash", string(key))
default:
return nil, fmt.Errorf("key %q is in the wrong format", string(key))
Expand Down
18 changes: 11 additions & 7 deletions scripts/keymigrate/migrate_test.go
Expand Up @@ -107,19 +107,19 @@ func TestMigration(t *testing.T) {

t.Run("Legacy", func(t *testing.T) {
for kind, le := range legacyPrefixes {
require.True(t, keyIsLegacy(le), kind)
require.True(t, checkKeyType(le).isLegacy(), kind)
}
})
t.Run("New", func(t *testing.T) {
for kind, ne := range newPrefixes {
require.False(t, keyIsLegacy(ne), kind)
require.False(t, checkKeyType(ne).isLegacy(), kind)
}
})
t.Run("Conversion", func(t *testing.T) {
for kind, le := range legacyPrefixes {
nk, err := migrateKey(le)
require.NoError(t, err, kind)
require.False(t, keyIsLegacy(nk), kind)
require.False(t, checkKeyType(nk).isLegacy(), kind)
}
})
t.Run("Hashes", func(t *testing.T) {
Expand All @@ -129,8 +129,12 @@ func TestMigration(t *testing.T) {
}
})
t.Run("ContrivedLegacyKeyDetection", func(t *testing.T) {
require.True(t, keyIsLegacy([]byte("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")))
require.False(t, keyIsLegacy([]byte("xxxxxxxxxxxxxxx/xxxxxxxxxxxxxxxx")))
// length 32: should appear to be a hash
require.Equal(t, txHashKey, checkKeyType([]byte("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")))

// length ≠ 32: should not appear to be a hash
require.Equal(t, nonLegacyKey, checkKeyType([]byte("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx--")))
require.Equal(t, nonLegacyKey, checkKeyType([]byte("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")))
})
})
})
Expand Down Expand Up @@ -204,15 +208,15 @@ func TestMigration(t *testing.T) {
require.Equal(t, size, len(keys))
legacyKeys := 0
for _, k := range keys {
if keyIsLegacy(k) {
if checkKeyType(k).isLegacy() {
legacyKeys++
}
}
require.Equal(t, size, legacyKeys)
})
t.Run("KeyIdempotency", func(t *testing.T) {
for _, key := range getNewPrefixKeys(t, 84) {
require.False(t, keyIsLegacy(key))
require.False(t, checkKeyType(key).isLegacy())
}
})
t.Run("Migrate", func(t *testing.T) {
Expand Down

0 comments on commit 5ee7343

Please sign in to comment.