Skip to content

Commit

Permalink
Replace go-runewidth with uniseg
Browse files Browse the repository at this point in the history
Replace the use of `RuneWidth` and `StringWidth` from `mattn/go-
runewidth` with equivalent functions from `rivo/uniseg`.

It is important to be aware that using `RuneWidth` will not be accurate
as the width of a rune cannot be determined in isolation. This requires
a shift to thinking about grapheme clusters instead.

Unfortunately due to the complexity of identifying grapheme clusters,
there has been some signifcant performance regressions in two
functions:

- PrintableRuneWidth: 10x slower
- TruncateString: 4x slower

Two other functions have had performance improvements:

- MarginString: 2x faster
- PaddingString: 2x faster

The documentation for `rivo/uniseg` mentions the use of `Step` and
`StepString` performing "orders of magnitude faster" than using the
`NewGraphemes` method. However, implementing these changes only
resulted in a 10% performance increase.

Signed-off-by: Michael Lorant <michael.lorant@nine.com.au>
  • Loading branch information
mikelorant committed Jan 26, 2024
1 parent 83f6379 commit e59a5fa
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 46 deletions.
22 changes: 11 additions & 11 deletions ansi/buffer.go
Expand Up @@ -3,7 +3,7 @@ package ansi
import (
"bytes"

"github.com/mattn/go-runewidth"
"github.com/rivo/uniseg"
)

// Buffer is a buffer aware of ANSI escape sequences.
Expand All @@ -19,22 +19,22 @@ func (w Buffer) PrintableRuneWidth() int {

// PrintableRuneWidth returns the cell width of the given string.
func PrintableRuneWidth(s string) int {
var n int
var n []rune
var ansi bool

for _, c := range s {
if c == Marker {
switch {
case c == Marker:
// ANSI escape sequence
ansi = true
} else if ansi {
if IsTerminator(c) {
// ANSI sequence terminated
ansi = false
}
} else {
n += runewidth.RuneWidth(c)
case ansi && IsTerminator(c):
// ANSI sequence terminated
ansi = false
case ansi:
default:
n = append(n, c)
}
}

return n
return uniseg.StringWidth(string(n))
}
2 changes: 1 addition & 1 deletion go.mod
Expand Up @@ -2,4 +2,4 @@ module github.com/muesli/reflow

go 1.13

require github.com/mattn/go-runewidth v0.0.14
require github.com/rivo/uniseg v0.4.4
6 changes: 2 additions & 4 deletions go.sum
@@ -1,4 +1,2 @@
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
4 changes: 2 additions & 2 deletions padding/padding.go
Expand Up @@ -5,8 +5,8 @@ import (
"io"
"strings"

"github.com/mattn/go-runewidth"
"github.com/muesli/reflow/ansi"
"github.com/rivo/uniseg"
)

type PaddingFunc func(w io.Writer)
Expand Down Expand Up @@ -71,7 +71,7 @@ func (w *Writer) Write(b []byte) (int, error) {
w.ansi = false
}
} else {
w.lineLen += runewidth.StringWidth(string(c))
w.lineLen += uniseg.StringWidth(string(c))

if c == '\n' {
// end of current line
Expand Down
25 changes: 14 additions & 11 deletions truncate/truncate.go
Expand Up @@ -4,7 +4,7 @@ import (
"bytes"
"io"

"github.com/mattn/go-runewidth"
"github.com/rivo/uniseg"

"github.com/muesli/reflow/ansi"
)
Expand Down Expand Up @@ -79,17 +79,20 @@ func (w *Writer) Write(b []byte) (int, error) {
w.width -= uint(tw)
var curWidth uint

for _, c := range string(b) {
if c == ansi.Marker {
gr := uniseg.NewGraphemes(string(b))
for gr.Next() {
rs := gr.Runes()

switch {
case len(rs) == 1 && rs[0] == ansi.Marker:
// ANSI escape sequence
w.ansi = true
} else if w.ansi {
if ansi.IsTerminator(c) {
// ANSI sequence terminated
w.ansi = false
}
} else {
curWidth += uint(runewidth.RuneWidth(c))
case len(rs) == 1 && w.ansi && ansi.IsTerminator(rs[0]):
// ANSI sequence terminated
w.ansi = false
case w.ansi:
default:
curWidth += uint(gr.Width())
}

if curWidth > w.width {
Expand All @@ -100,7 +103,7 @@ func (w *Writer) Write(b []byte) (int, error) {
return n, err
}

_, err := w.ansiWriter.Write([]byte(string(c)))
_, err := w.ansiWriter.Write([]byte(gr.Str()))
if err != nil {
return 0, err
}
Expand Down
37 changes: 20 additions & 17 deletions wrap/wrap.go
Expand Up @@ -5,8 +5,8 @@ import (
"strings"
"unicode"

"github.com/mattn/go-runewidth"
"github.com/muesli/reflow/ansi"
"github.com/rivo/uniseg"
)

var (
Expand Down Expand Up @@ -67,6 +67,7 @@ func String(s string, limit int) string {

func (w *Wrap) Write(b []byte) (int, error) {
s := strings.Replace(string(b), "\t", strings.Repeat(" ", w.TabWidth), -1)

if !w.KeepNewlines {
s = strings.Replace(s, "\n", "", -1)
}
Expand All @@ -78,37 +79,39 @@ func (w *Wrap) Write(b []byte) (int, error) {
return w.buf.Write(b)
}

for _, c := range s {
if c == ansi.Marker {
gr := uniseg.NewGraphemes(s)
for gr.Next() {
rs := gr.Runes()

switch {
case len(rs) == 1 && rs[0] == ansi.Marker:
w.ansi = true
} else if w.ansi {
if ansi.IsTerminator(c) {
w.ansi = false
}
} else if inGroup(w.Newline, c) {
case len(rs) == 1 && w.ansi && ansi.IsTerminator(rs[0]):
w.ansi = false
case w.ansi:
case len(rs) == 1 && inGroup(w.Newline, rs[0]):
w.addNewLine()
w.forcefulNewline = false
continue
} else {
width := runewidth.RuneWidth(c)

if w.lineLen+width > w.Limit {
default:
if w.lineLen+gr.Width() > w.Limit {
w.addNewLine()
w.forcefulNewline = true
}

if w.lineLen == 0 {
if w.forcefulNewline && !w.PreserveSpace && unicode.IsSpace(c) {
switch {
case w.lineLen == 0:
if len(rs) == 1 && w.forcefulNewline && !w.PreserveSpace && unicode.IsSpace(rs[0]) {
continue
}
} else {
default:
w.forcefulNewline = false
}

w.lineLen += width
w.lineLen += gr.Width()
}

_, _ = w.buf.WriteRune(c)
_, _ = w.buf.WriteString(gr.Str())
}

return len(b), nil
Expand Down

0 comments on commit e59a5fa

Please sign in to comment.