Skip to content

Commit

Permalink
[ruby/reline] Improve the performance of get_mbchar_width
Browse files Browse the repository at this point in the history
It is about three times faster to use one big regexp instead of
sequential matching.

ruby/reline@e36f6c0707
  • Loading branch information
mame authored and aycabta committed Dec 4, 2020
1 parent b3e0db8 commit 76cac4c
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 28 deletions.
40 changes: 25 additions & 15 deletions lib/reline/unicode.rb
Expand Up @@ -72,20 +72,32 @@ def self.escape_for_print(str)
}.join
end

require 'reline/unicode/east_asian_width'

MBCharWidthRE = /
(?<width_2_1>
[#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
)
| (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
| (?<width_0>^\p{M})
| (?<width_2_2>
#{ EastAsianWidth::TYPE_F }
| #{ EastAsianWidth::TYPE_W }
)
| (?<width_1>
#{ EastAsianWidth::TYPE_H }
| #{ EastAsianWidth::TYPE_NA }
| #{ EastAsianWidth::TYPE_N }
)
/x

def self.get_mbchar_width(mbchar)
case mbchar.encode(Encoding::UTF_8)
when *EscapedChars # ^ + char, such as ^M, ^H, ^[, ...
2
when /^\u{2E3B}/ # THREE-EM DASH
3
when /^\p{M}/
0
when EastAsianWidth::TYPE_A
Reline.ambiguous_width
when EastAsianWidth::TYPE_F, EastAsianWidth::TYPE_W
2
when EastAsianWidth::TYPE_H, EastAsianWidth::TYPE_NA, EastAsianWidth::TYPE_N
1
m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
case
when m[:width_2_1], m[:width_2_2] then 2
when m[:width_3] then 3
when m[:width_0] then 0
when m[:width_1] then 1
else
nil
end
Expand Down Expand Up @@ -591,5 +603,3 @@ def self.vi_first_print(line)
[byte_size, width]
end
end

require 'reline/unicode/east_asian_width'
26 changes: 13 additions & 13 deletions lib/reline/unicode/east_asian_width.rb
@@ -1,27 +1,27 @@
class Reline::Unicode::EastAsianWidth
# This is based on EastAsianWidth.txt
# http://www.unicode.org/Public/13.0.0/ucd/EastAsianWidth.txt
# EastAsianWidth.txt

# Fullwidth
TYPE_F = /^([#{ %W(
TYPE_F = /^[#{ %W(
\u{3000}
\u{FF01}-\u{FF60}
\u{FFE0}-\u{FFE6}
).join }])/
).join }]/

# Halfwidth
TYPE_H = /^([#{ %W(
TYPE_H = /^[#{ %W(
\u{20A9}
\u{FF61}-\u{FFBE}
\u{FFC2}-\u{FFC7}
\u{FFCA}-\u{FFCF}
\u{FFD2}-\u{FFD7}
\u{FFDA}-\u{FFDC}
\u{FFE8}-\u{FFEE}
).join }])/
).join }]/

# Wide
TYPE_W = /^([#{ %W(
TYPE_W = /^[#{ %W(
\u{1100}-\u{115F}
\u{231A}-\u{231B}
\u{2329}-\u{232A}
Expand Down Expand Up @@ -136,21 +136,21 @@ class Reline::Unicode::EastAsianWidth
\u{1FAD0}-\u{1FAD6}
\u{20000}-\u{2FFFD}
\u{30000}-\u{3FFFD}
).join }])/
).join }]/

# Narrow
TYPE_NA = /^([#{ %W(
TYPE_NA = /^[#{ %W(
\u{0020}-\u{007E}
\u{00A2}-\u{00A3}
\u{00A5}-\u{00A6}
\u{00AC}
\u{00AF}
\u{27E6}-\u{27ED}
\u{2985}-\u{2986}
).join }])/
).join }]/

# Ambiguous
TYPE_A = /^([#{ %W(
TYPE_A = /^[#{ %W(
\u{00A1}
\u{00A4}
\u{00A7}-\u{00A8}
Expand Down Expand Up @@ -330,10 +330,10 @@ class Reline::Unicode::EastAsianWidth
\u{E0100}-\u{E01EF}
\u{F0000}-\u{FFFFD}
\u{100000}-\u{10FFFD}
).join }])/
).join }]/

# Neutral
TYPE_N = /^([#{ %W(
TYPE_N = /^[#{ %W(
\u{0000}-\u{001F}
\u{007F}-\u{00A0}
\u{00A9}
Expand Down Expand Up @@ -1160,5 +1160,5 @@ class Reline::Unicode::EastAsianWidth
\u{1FBF0}-\u{1FBF9}
\u{E0001}
\u{E0020}-\u{E007F}
).join }])/
).join }]/
end

0 comments on commit 76cac4c

Please sign in to comment.