Skip to content

Commit

Permalink
fix bad rune handling
Browse files Browse the repository at this point in the history
formatting
  • Loading branch information
BrannonKing committed Aug 19, 2021
1 parent d942089 commit 057d210
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 8 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/create-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ jobs:
go: [1.16]
os: [linux, darwin, windows]
ar: [amd64, arm64]
exclude:
- go: 1.16
os: windows
ar: arm64
runs-on: ubuntu-latest
steps:
- name: Set up Go
Expand Down
18 changes: 15 additions & 3 deletions claimtrie/node/noderepo/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,30 @@ func (repo *Pebble) IterateChildren(name []byte, f func(changes []change.Change)
start := make([]byte, len(name)+1) // zeros that last byte; need a constant len for stack alloc?
copy(start, name)

end := make([]byte, len(name)) // max name length is 255
copy(end, name)
validEnd := false
for i := len(name) - 1; i >= 0; i-- {
end[i]++
if end[i] != 0 {
validEnd = true
break
}
}
if !validEnd {
end = nil // uh, we think this means run to the end of the table
}

prefixIterOptions := &pebble.IterOptions{
LowerBound: start,
UpperBound: end,
}

iter := repo.db.NewIter(prefixIterOptions)
defer iter.Close()

for iter.First(); iter.Valid(); iter.Next() {
// NOTE! iter.Key() is ephemeral!
if len(iter.Key()) <= len(name) || !bytes.Equal(name, iter.Key()[:len(name)]) {
break
}
changes, err := unmarshalChanges(iter.Key(), iter.Value())
if err != nil {
return errors.Wrapf(err, "from unmarshaller at %s", iter.Key())
Expand Down
7 changes: 5 additions & 2 deletions claimtrie/normalization/case_folder.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ func init() {
func CaseFold(name []byte) []byte {
var b bytes.Buffer
b.Grow(len(name))
for _, r := range string(name) {
if r == utf8.RuneError {
for i := 0; i < len(name); {
r, w := utf8.DecodeRune(name[i:])
if r == utf8.RuneError && w < 2 {
// HACK: their RuneError is actually a valid character if coming from a width of 2 or more
return name
}
replacements := foldMap[r]
Expand All @@ -47,6 +49,7 @@ func CaseFold(name []byte) []byte {
} else {
b.WriteRune(r)
}
i += w
}
return b.Bytes()
}
14 changes: 11 additions & 3 deletions claimtrie/normalization/normalizer_icu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
package normalization

import (
"encoding/hex"
"github.com/stretchr/testify/assert"
"testing"
"unicode/utf8"
)

func TestNormalizationICU(t *testing.T) {
Expand All @@ -16,8 +18,14 @@ func BenchmarkNormalizeICU(b *testing.B) {
}

func TestBlock760150(t *testing.T) {
test := "Ꮖ-Ꮩ-Ꭺ-N--------Ꭺ-N-Ꮹ-Ꭼ-Ꮮ-Ꭺ-on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”"
a := normalizeGo([]byte(test))
b := normalizeICU([]byte(test))
test, _ := hex.DecodeString("43efbfbd")
assert.True(t, utf8.Valid(test))
a := normalizeGo(test)
b := normalizeICU(test)
assert.Equal(t, a, b)

test2 := "Ꮖ-Ꮩ-Ꭺ-N--------Ꭺ-N-Ꮹ-Ꭼ-Ꮮ-Ꭺ-on-Instagram_-“Our-next-destination-is-East-and-Southeast-Asia--selfie--asia”"
a = normalizeGo([]byte(test2))
b = normalizeICU([]byte(test2))
assert.Equal(t, a, b)
}

0 comments on commit 057d210

Please sign in to comment.