Skip to content

Commit

Permalink
Some performance improvements by fast-tracking property search on ASC…
Browse files Browse the repository at this point in the history
…II characters.
  • Loading branch information
rivo committed Jan 27, 2024
1 parent 97691fc commit b74d4dc
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 9 deletions.
4 changes: 2 additions & 2 deletions grapheme.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, new
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = property(graphemeCodePoints, r)
prop = propertyGraphemes(r)
} else {
prop = state >> shiftGraphemePropState
}
Expand Down Expand Up @@ -284,7 +284,7 @@ func FirstGraphemeClusterInString(str string, state int) (cluster, rest string,
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = property(graphemeCodePoints, r)
prop = propertyGraphemes(r)
} else {
prop = state >> shiftGraphemePropState
}
Expand Down
2 changes: 1 addition & 1 deletion graphemerules.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ var grTransitions = map[[2]int][3]int{
// table) and whether a cluster boundary was detected.
func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) {
// Determine the property of the next character.
prop = property(graphemeCodePoints, r)
prop = propertyGraphemes(r)

// Find the applicable transition.
transition, ok := grTransitions[[2]int{state, prop}]
Expand Down
6 changes: 3 additions & 3 deletions linerules.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
defer func() {
// Transition into LB30.
if newState == lbCP || newState == lbNUCP {
ea := property(eastAsianWidth, r)
ea := propertyEastAsianWidth(r)
if ea != prF && ea != prW && ea != prH {
newState |= lbCPeaFWHBit
}
Expand Down Expand Up @@ -424,7 +424,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
// LB30 (part one).
if rule > 300 {
if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP {
ea := property(eastAsianWidth, r)
ea := propertyEastAsianWidth(r)
if ea != prF && ea != prW && ea != prH {
return lbOP, LineDontBreak
}
Expand Down Expand Up @@ -460,7 +460,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
return prAny, LineDontBreak
}
}
graphemeProperty := property(graphemeCodePoints, r)
graphemeProperty := propertyGraphemes(r)
if graphemeProperty == prExtendedPictographic && generalCategory == gcCn {
return lbExtPicCn, LineCanBreak
}
Expand Down
30 changes: 30 additions & 0 deletions properties.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,33 @@ func propertyWithGenCat(dictionary [][4]int, r rune) (property, generalCategory
entry := propertySearch(dictionary, r)
return entry[2], entry[3]
}

// propertyGraphemes returns the Unicode grapheme cluster property value of the
// given code point while fast tracking ASCII characters.
func propertyGraphemes(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prAny
}
if r == 0x0a {
return prLF
}
if r == 0x0d {
return prCR
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prControl
}
return property(graphemeCodePoints, r)
}

// propertyEastAsianWidth returns the Unicode East Asian Width property value of
// the given code point while fast tracking ASCII characters.
func propertyEastAsianWidth(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prNa
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prN
}
return property(eastAsianWidth, r)
}
4 changes: 2 additions & 2 deletions step.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState i
if len(b) <= length { // If we're already past the end, there is nothing else to parse.
var prop int
if state < 0 {
prop = property(graphemeCodePoints, r)
prop = propertyGraphemes(r)
} else {
prop = state >> shiftPropState
}
Expand Down Expand Up @@ -179,7 +179,7 @@ func StepString(str string, state int) (cluster, rest string, boundaries int, ne
// Extract the first rune.
r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse.
prop := property(graphemeCodePoints, r)
prop := propertyGraphemes(r)
return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
}

Expand Down
2 changes: 1 addition & 1 deletion width.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func runeWidth(r rune, graphemeProperty int) int {
return 4
}

switch property(eastAsianWidth, r) {
switch propertyEastAsianWidth(r) {
case prW, prF:
return 2
case prA:
Expand Down

0 comments on commit b74d4dc

Please sign in to comment.