Skip to content

Commit

Permalink
fix: string lengths
Browse files Browse the repository at this point in the history
Fix string lengths for strings containing code points which require a
Surrogate pair encoding in utf16.

Fixes #388
  • Loading branch information
stevenh committed Nov 25, 2022
1 parent e311ec4 commit 7807497
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 18 deletions.
9 changes: 7 additions & 2 deletions builtin_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"regexp"
"strconv"
"strings"
"unicode/utf16"
"unicode/utf8"
)

Expand Down Expand Up @@ -72,18 +73,22 @@ func builtinString_concat(call FunctionCall) Value {

func lastIndexRune(s, substr string) int {
if i := strings.LastIndex(s, substr); i >= 0 {
return utf8.RuneCountInString(s[:i])
return utf16Length(s[:i])
}
return -1
}

func indexRune(s, substr string) int {
if i := strings.Index(s, substr); i >= 0 {
return utf8.RuneCountInString(s[:i])
return utf16Length(s[:i])
}
return -1
}

func utf16Length(s string) int {
return len(utf16.Encode([]rune(s)))
}

func builtinString_indexOf(call FunctionCall) Value {
checkObjectCoercible(call.runtime, call.This)
value := call.This.string()
Expand Down
5 changes: 5 additions & 0 deletions regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,11 @@ func TestRegExp_exec(t *testing.T) {
[ abc.input.length, abc.length, abc.input, abc.index, abc ];
`, "11,1,CE\uFFFFL\uFFDDbox127,5,box1")

test(`
var abc = /\w{3}\d?/.exec("CE😋box127");
[ abc.input.length, abc.length, abc.input, abc.index, abc ];
`, "10,1,CE😋box127,4,box1")

test(`RegExp.prototype.exec.length`, 1)
test(`RegExp.prototype.exec.prototype`, "undefined")
})
Expand Down
1 change: 1 addition & 0 deletions string_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ func TestString_length(t *testing.T) {

test(`"abc".length`, 3)
test(`"uñiçode".length`, 7)
test(`"😋".length`, 2)
})
}

Expand Down
12 changes: 4 additions & 8 deletions type_regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package otto
import (
"fmt"
"regexp"
"unicode/utf8"
"unicode/utf16"

"github.com/robertkrimen/otto/parser"
)
Expand Down Expand Up @@ -131,13 +131,9 @@ func execResultToArray(runtime *_runtime, target string, result []int) *_object
}
matchIndex := result[0]
if matchIndex != 0 {
matchIndex = 0
// Find the rune index in the string, not the byte index
for index := 0; index < result[0]; {
_, size := utf8.DecodeRuneInString(target[index:])
matchIndex += 1
index += size
}
// Find the ut16 rune index in the string, not the byte index.
matchPrefix := target[:matchIndex]
matchIndex = len(utf16.Encode([]rune(matchPrefix)))
}
match := runtime.newArrayOf(valueArray)
match.defineProperty("input", toValue_string(target), 0111, false)
Expand Down
18 changes: 10 additions & 8 deletions type_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package otto

import (
"strconv"
"unicode/utf16"
"unicode/utf8"
)

Expand All @@ -26,20 +27,22 @@ func (str _stringASCII) String() string {
}

type _stringWide struct {
string string
length int
runes []rune
string string
value16 []uint16
}

func (str _stringWide) Length() int {
return str.length
if str.value16 == nil {
str.value16 = utf16.Encode([]rune(str.string))
}
return len(str.value16)
}

func (str _stringWide) At(at int) rune {
if str.runes == nil {
str.runes = []rune(str.string)
if str.value16 == nil {
str.value16 = utf16.Encode([]rune(str.string))
}
return str.runes[at]
return rune(str.value16[at])
}

func (str _stringWide) String() string {
Expand All @@ -58,7 +61,6 @@ func _newStringObject(str string) _stringObject {
wide:
return &_stringWide{
string: str,
length: utf8.RuneCountInString(str),
}
}

Expand Down

0 comments on commit 7807497

Please sign in to comment.