Skip to content

Commit

Permalink
Replace go-runewidth with uniseg
Browse files Browse the repository at this point in the history
Replace the use of `RuneWidth` and `StringWidth` from `mattn/go-
runewidth` with equivalent functions from `rivo/uniseg`.

It is important to be aware that using `RuneWidth` will not be accurate
as the width of a rune cannot be determined in isolation. This requires
a shift to thinking about grapheme clusters instead.

Unfortunately due to the complexity of identifying grapheme clusters,
there has been some signifcant performance regressions in two
functions:

- PrintableRuneWidth: 10x slower
- TruncateString: 4x slower

Two other functions have had performance improvements:

- MarginString: 2x faster
- PaddingString: 2x faster

The documentation for `rivo/uniseg` mentions the use of `Step` and
`StepString` performing "orders of magnitude faster" than using the
`NewGraphemes` method. However, implementing these changes only
resulted in a 10% performance increase.

Signed-off-by: Michael Lorant <michael.lorant@nine.com.au>
  • Loading branch information
mikelorant committed Jan 29, 2024
1 parent 83f6379 commit 7edce3e
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Expand Up @@ -5,7 +5,7 @@ jobs:
build:
strategy:
matrix:
go-version: [~1.11, ^1]
go-version: [~1.18, ^1]
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
env:
Expand Down
22 changes: 11 additions & 11 deletions ansi/buffer.go
Expand Up @@ -3,7 +3,7 @@ package ansi
import (
"bytes"

"github.com/mattn/go-runewidth"
"github.com/rivo/uniseg"
)

// Buffer is a buffer aware of ANSI escape sequences.
Expand All @@ -19,22 +19,22 @@ func (w Buffer) PrintableRuneWidth() int {

// PrintableRuneWidth returns the cell width of the given string.
func PrintableRuneWidth(s string) int {
var n int
n := make([]rune, 0, len(s))
var ansi bool

for _, c := range s {
if c == Marker {
switch {
case c == Marker:
// ANSI escape sequence
ansi = true
} else if ansi {
if IsTerminator(c) {
// ANSI sequence terminated
ansi = false
}
} else {
n += runewidth.RuneWidth(c)
case ansi && IsTerminator(c):
// ANSI sequence terminated
ansi = false
case ansi:
default:
n = append(n, c)
}
}

return n
return uniseg.StringWidth(string(n))
}
4 changes: 2 additions & 2 deletions go.mod
@@ -1,5 +1,5 @@
module github.com/muesli/reflow

go 1.13
go 1.18

require github.com/mattn/go-runewidth v0.0.14
require github.com/rivo/uniseg v0.4.6
6 changes: 2 additions & 4 deletions go.sum
@@ -1,4 +1,2 @@
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.6 h1:Sovz9sDSwbOz9tgUy8JpT+KgCkPYJEN/oYzlJiYTNLg=
github.com/rivo/uniseg v0.4.6/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
4 changes: 2 additions & 2 deletions padding/padding.go
Expand Up @@ -5,8 +5,8 @@ import (
"io"
"strings"

"github.com/mattn/go-runewidth"
"github.com/muesli/reflow/ansi"
"github.com/rivo/uniseg"
)

type PaddingFunc func(w io.Writer)
Expand Down Expand Up @@ -71,7 +71,7 @@ func (w *Writer) Write(b []byte) (int, error) {
w.ansi = false
}
} else {
w.lineLen += runewidth.StringWidth(string(c))
w.lineLen += uniseg.StringWidth(string(c))

if c == '\n' {
// end of current line
Expand Down
39 changes: 23 additions & 16 deletions truncate/truncate.go
Expand Up @@ -4,7 +4,7 @@ import (
"bytes"
"io"

"github.com/mattn/go-runewidth"
"github.com/rivo/uniseg"

"github.com/muesli/reflow/ansi"
)
Expand All @@ -13,27 +13,27 @@ type Writer struct {
width uint
tail string

ansiWriter *ansi.Writer
ansiWriter ansi.Writer
buf bytes.Buffer
ansi bool
}

func NewWriter(width uint, tail string) *Writer {
w := &Writer{
w := Writer{
width: width,
tail: tail,
}
w.ansiWriter = &ansi.Writer{
w.ansiWriter = ansi.Writer{
Forward: &w.buf,
}
return w
return &w
}

func NewWriterPipe(forward io.Writer, width uint, tail string) *Writer {
return &Writer{
width: width,
tail: tail,
ansiWriter: &ansi.Writer{
ansiWriter: ansi.Writer{
Forward: forward,
},
}
Expand Down Expand Up @@ -79,17 +79,24 @@ func (w *Writer) Write(b []byte) (int, error) {
w.width -= uint(tw)
var curWidth uint

for _, c := range string(b) {
if c == ansi.Marker {
rest := b
state := -1
var cluster []byte

for len(rest) > 0 {
var width int
cluster, rest, width, state = uniseg.FirstGraphemeCluster(rest, state)

switch {
case len(cluster) == 1 && rune(cluster[0]) == ansi.Marker:
// ANSI escape sequence
w.ansi = true
} else if w.ansi {
if ansi.IsTerminator(c) {
// ANSI sequence terminated
w.ansi = false
}
} else {
curWidth += uint(runewidth.RuneWidth(c))
case len(cluster) == 1 && w.ansi && ansi.IsTerminator(rune(cluster[0])):
// ANSI sequence terminated
w.ansi = false
case w.ansi:
default:
curWidth += uint(width)
}

if curWidth > w.width {
Expand All @@ -100,7 +107,7 @@ func (w *Writer) Write(b []byte) (int, error) {
return n, err
}

_, err := w.ansiWriter.Write([]byte(string(c)))
_, err := w.ansiWriter.Write(cluster)
if err != nil {
return 0, err
}
Expand Down
2 changes: 1 addition & 1 deletion truncate/truncate_test.go
Expand Up @@ -170,7 +170,7 @@ func TestWriter_Error(t *testing.T) {

f := &Writer{
width: 2,
ansiWriter: &ansi.Writer{Forward: fakeWriter{}},
ansiWriter: ansi.Writer{Forward: fakeWriter{}},
}

if _, err := f.Write([]byte("foo")); err != fakeErr {
Expand Down
35 changes: 20 additions & 15 deletions wrap/wrap.go
Expand Up @@ -5,8 +5,8 @@ import (
"strings"
"unicode"

"github.com/mattn/go-runewidth"
"github.com/muesli/reflow/ansi"
"github.com/rivo/uniseg"
)

var (
Expand Down Expand Up @@ -78,37 +78,42 @@ func (w *Wrap) Write(b []byte) (int, error) {
return w.buf.Write(b)
}

for _, c := range s {
if c == ansi.Marker {
state := -1
var cluster string

for len(s) > 0 {
cluster, s, width, state = uniseg.FirstGraphemeClusterInString(s, state)
rs := []rune(cluster)

switch {
case len(rs) == 1 && rs[0] == ansi.Marker:
w.ansi = true
} else if w.ansi {
if ansi.IsTerminator(c) {
w.ansi = false
}
} else if inGroup(w.Newline, c) {
case len(rs) == 1 && w.ansi && ansi.IsTerminator(rs[0]):
w.ansi = false
case w.ansi:
case len(rs) == 1 && inGroup(w.Newline, rs[0]):
w.addNewLine()
w.forcefulNewline = false
continue
} else {
width := runewidth.RuneWidth(c)

default:
if w.lineLen+width > w.Limit {
w.addNewLine()
w.forcefulNewline = true
}

if w.lineLen == 0 {
if w.forcefulNewline && !w.PreserveSpace && unicode.IsSpace(c) {
switch {
case w.lineLen == 0:
if len(rs) == 1 && w.forcefulNewline && !w.PreserveSpace && unicode.IsSpace(rs[0]) {
continue
}
} else {
default:
w.forcefulNewline = false
}

w.lineLen += width
}

_, _ = w.buf.WriteRune(c)
_, _ = w.buf.WriteString(cluster)
}

return len(b), nil
Expand Down

0 comments on commit 7edce3e

Please sign in to comment.