From 7edce3ea0d5bd88f31e63ce46d18201a0ed0fd6a Mon Sep 17 00:00:00 2001 From: Michael Lorant Date: Fri, 26 Jan 2024 15:01:30 +1100 Subject: [PATCH] Replace go-runewidth with uniseg Replace the use of `RuneWidth` and `StringWidth` from `mattn/go- runewidth` with equivalent functions from `rivo/uniseg`. It is important to be aware that using `RuneWidth` will not be accurate as the width of a rune cannot be determined in isolation. This requires a shift to thinking about grapheme clusters instead. Unfortunately due to the complexity of identifying grapheme clusters, there has been some signifcant performance regressions in two functions: - PrintableRuneWidth: 10x slower - TruncateString: 4x slower Two other functions have had performance improvements: - MarginString: 2x faster - PaddingString: 2x faster The documentation for `rivo/uniseg` mentions the use of `Step` and `StepString` performing "orders of magnitude faster" than using the `NewGraphemes` method. However, implementing these changes only resulted in a 10% performance increase. Signed-off-by: Michael Lorant --- .github/workflows/build.yml | 2 +- ansi/buffer.go | 22 ++++++++++----------- go.mod | 4 ++-- go.sum | 6 ++---- padding/padding.go | 4 ++-- truncate/truncate.go | 39 ++++++++++++++++++++++--------------- truncate/truncate_test.go | 2 +- wrap/wrap.go | 35 +++++++++++++++++++-------------- 8 files changed, 62 insertions(+), 52 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a3e0867..15af530 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,7 +5,7 @@ jobs: build: strategy: matrix: - go-version: [~1.11, ^1] + go-version: [~1.18, ^1] os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} env: diff --git a/ansi/buffer.go b/ansi/buffer.go index 471bcaf..96c6661 100644 --- a/ansi/buffer.go +++ b/ansi/buffer.go @@ -3,7 +3,7 @@ package ansi import ( "bytes" - "github.com/mattn/go-runewidth" + "github.com/rivo/uniseg" ) // Buffer is a buffer aware of ANSI escape sequences. @@ -19,22 +19,22 @@ func (w Buffer) PrintableRuneWidth() int { // PrintableRuneWidth returns the cell width of the given string. func PrintableRuneWidth(s string) int { - var n int + n := make([]rune, 0, len(s)) var ansi bool for _, c := range s { - if c == Marker { + switch { + case c == Marker: // ANSI escape sequence ansi = true - } else if ansi { - if IsTerminator(c) { - // ANSI sequence terminated - ansi = false - } - } else { - n += runewidth.RuneWidth(c) + case ansi && IsTerminator(c): + // ANSI sequence terminated + ansi = false + case ansi: + default: + n = append(n, c) } } - return n + return uniseg.StringWidth(string(n)) } diff --git a/go.mod b/go.mod index 8aa39aa..37467ba 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/muesli/reflow -go 1.13 +go 1.18 -require github.com/mattn/go-runewidth v0.0.14 +require github.com/rivo/uniseg v0.4.6 diff --git a/go.sum b/go.sum index 2579e19..e464460 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,2 @@ -github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= -github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= -github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.6 h1:Sovz9sDSwbOz9tgUy8JpT+KgCkPYJEN/oYzlJiYTNLg= +github.com/rivo/uniseg v0.4.6/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= diff --git a/padding/padding.go b/padding/padding.go index 707ec46..2ece223 100644 --- a/padding/padding.go +++ b/padding/padding.go @@ -5,8 +5,8 @@ import ( "io" "strings" - "github.com/mattn/go-runewidth" "github.com/muesli/reflow/ansi" + "github.com/rivo/uniseg" ) type PaddingFunc func(w io.Writer) @@ -71,7 +71,7 @@ func (w *Writer) Write(b []byte) (int, error) { w.ansi = false } } else { - w.lineLen += runewidth.StringWidth(string(c)) + w.lineLen += uniseg.StringWidth(string(c)) if c == '\n' { // end of current line diff --git a/truncate/truncate.go b/truncate/truncate.go index 5aab5f8..88ef432 100644 --- a/truncate/truncate.go +++ b/truncate/truncate.go @@ -4,7 +4,7 @@ import ( "bytes" "io" - "github.com/mattn/go-runewidth" + "github.com/rivo/uniseg" "github.com/muesli/reflow/ansi" ) @@ -13,27 +13,27 @@ type Writer struct { width uint tail string - ansiWriter *ansi.Writer + ansiWriter ansi.Writer buf bytes.Buffer ansi bool } func NewWriter(width uint, tail string) *Writer { - w := &Writer{ + w := Writer{ width: width, tail: tail, } - w.ansiWriter = &ansi.Writer{ + w.ansiWriter = ansi.Writer{ Forward: &w.buf, } - return w + return &w } func NewWriterPipe(forward io.Writer, width uint, tail string) *Writer { return &Writer{ width: width, tail: tail, - ansiWriter: &ansi.Writer{ + ansiWriter: ansi.Writer{ Forward: forward, }, } @@ -79,17 +79,24 @@ func (w *Writer) Write(b []byte) (int, error) { w.width -= uint(tw) var curWidth uint - for _, c := range string(b) { - if c == ansi.Marker { + rest := b + state := -1 + var cluster []byte + + for len(rest) > 0 { + var width int + cluster, rest, width, state = uniseg.FirstGraphemeCluster(rest, state) + + switch { + case len(cluster) == 1 && rune(cluster[0]) == ansi.Marker: // ANSI escape sequence w.ansi = true - } else if w.ansi { - if ansi.IsTerminator(c) { - // ANSI sequence terminated - w.ansi = false - } - } else { - curWidth += uint(runewidth.RuneWidth(c)) + case len(cluster) == 1 && w.ansi && ansi.IsTerminator(rune(cluster[0])): + // ANSI sequence terminated + w.ansi = false + case w.ansi: + default: + curWidth += uint(width) } if curWidth > w.width { @@ -100,7 +107,7 @@ func (w *Writer) Write(b []byte) (int, error) { return n, err } - _, err := w.ansiWriter.Write([]byte(string(c))) + _, err := w.ansiWriter.Write(cluster) if err != nil { return 0, err } diff --git a/truncate/truncate_test.go b/truncate/truncate_test.go index b9933e0..52d5a39 100644 --- a/truncate/truncate_test.go +++ b/truncate/truncate_test.go @@ -170,7 +170,7 @@ func TestWriter_Error(t *testing.T) { f := &Writer{ width: 2, - ansiWriter: &ansi.Writer{Forward: fakeWriter{}}, + ansiWriter: ansi.Writer{Forward: fakeWriter{}}, } if _, err := f.Write([]byte("foo")); err != fakeErr { diff --git a/wrap/wrap.go b/wrap/wrap.go index b6f2a80..dfc20f4 100644 --- a/wrap/wrap.go +++ b/wrap/wrap.go @@ -5,8 +5,8 @@ import ( "strings" "unicode" - "github.com/mattn/go-runewidth" "github.com/muesli/reflow/ansi" + "github.com/rivo/uniseg" ) var ( @@ -78,37 +78,42 @@ func (w *Wrap) Write(b []byte) (int, error) { return w.buf.Write(b) } - for _, c := range s { - if c == ansi.Marker { + state := -1 + var cluster string + + for len(s) > 0 { + cluster, s, width, state = uniseg.FirstGraphemeClusterInString(s, state) + rs := []rune(cluster) + + switch { + case len(rs) == 1 && rs[0] == ansi.Marker: w.ansi = true - } else if w.ansi { - if ansi.IsTerminator(c) { - w.ansi = false - } - } else if inGroup(w.Newline, c) { + case len(rs) == 1 && w.ansi && ansi.IsTerminator(rs[0]): + w.ansi = false + case w.ansi: + case len(rs) == 1 && inGroup(w.Newline, rs[0]): w.addNewLine() w.forcefulNewline = false continue - } else { - width := runewidth.RuneWidth(c) - + default: if w.lineLen+width > w.Limit { w.addNewLine() w.forcefulNewline = true } - if w.lineLen == 0 { - if w.forcefulNewline && !w.PreserveSpace && unicode.IsSpace(c) { + switch { + case w.lineLen == 0: + if len(rs) == 1 && w.forcefulNewline && !w.PreserveSpace && unicode.IsSpace(rs[0]) { continue } - } else { + default: w.forcefulNewline = false } w.lineLen += width } - _, _ = w.buf.WriteRune(c) + _, _ = w.buf.WriteString(cluster) } return len(b), nil