@@ -5,6 +5,18 @@ if ARGV.empty?
5
5
exit 1
6
6
end
7
7
8
+ def unicode_width ( type , category )
9
+ return 0 if category == 'Mn' # Nonspacing Mark
10
+ case type
11
+ when 'F' , 'W' # Fullwidth, Wide
12
+ 2
13
+ when 'H' , 'Na' , 'N' # Halfwidth, Narrow, Neutral
14
+ 1
15
+ when 'A' # Ambiguous
16
+ -1
17
+ end
18
+ end
19
+
8
20
open ( ARGV . first , 'rt' ) do |f |
9
21
if m = f . gets . match ( /^# EastAsianWidth-(\d +\. \d +\. \d +)\. txt/ )
10
22
unicode_version = m [ 1 ]
@@ -13,66 +25,31 @@ open(ARGV.first, 'rt') do |f|
13
25
unicode_version = nil
14
26
end
15
27
16
- list = [ ]
28
+ widths = [ ]
17
29
f . each_line do |line |
18
- next unless m = line . match ( /^(\h +)(?:\. \. (\h +))?\s *;\s *(\w +)\s +#.+/ )
30
+ next unless /^(?<first> \h +)(?:\. \. (?<last> \h +))?\s *;\s *(?<type> \w +)\s +# +(?<category>[^ ]+)/ =~ line
19
31
20
- first = m [ 1 ] . to_i ( 16 )
21
- last = m [ 2 ] &.to_i ( 16 ) || first
22
- type = m [ 3 ] . to_sym
23
- if !list . empty? and ( list . last [ :range ] . last + 1 ) == first and list . last [ :type ] == type
24
- list . last [ :range ] = ( list . last [ :range ] . first ..last )
25
- else
26
- # [\u{D800}-\u{DFFF}] cause error.
27
- unless ( ( 0xD800 ..0xDFFF ) . to_a & ( first ..last ) . to_a ) . empty?
28
- unless ( first ..0xD7FF ) . to_a . empty?
29
- list << {
30
- range : ( first ..0xD7FF ) ,
31
- type : type . to_sym
32
- }
33
- end
34
- unless ( 0xE000 ..last ) . to_a . empty?
35
- list << {
36
- range : ( first ..0xD7FF ) ,
37
- type : type . to_sym
38
- }
39
- end
40
- else
41
- list << {
42
- range : ( first ..last ) ,
43
- type : type . to_sym
44
- }
45
- end
46
- end
32
+ range = first . to_i ( 16 ) ..( last || first ) . to_i ( 16 )
33
+ widths . fill ( unicode_width ( type , category ) , range )
47
34
end
48
- grouped = list . group_by { |item | item [ :type ] } . map { |item | [ item . first , item . last . map { |row | row [ :range ] } ] } . to_h
49
- grouped = %i{ F H W Na A N } . map { |type | [ type , grouped [ type ] ] }
50
- puts <<EOH
51
- class Reline::Unicode::EastAsianWidth
52
- # This is based on EastAsianWidth.txt
53
- # UNICODE_VERSION = #{ unicode_version ? "'#{ unicode_version } '" : 'nil' }
54
35
55
- EOH
56
- puts grouped . map { |item |
57
- type , ranges = item
58
- output = " # %s\n " %
59
- case type
60
- when :F then 'Fullwidth'
61
- when :H then 'Halfwidth'
62
- when :W then 'Wide'
63
- when :Na then 'Narrow'
64
- when :A then 'Ambiguous'
65
- when :N then 'Neutral'
66
- end
67
- output += " TYPE_%s = /^[\# { %%W(\n " % type . upcase
68
- output += ranges . map { |range |
69
- if range . first == range . last
70
- ' \u{%04X}' % range . first
71
- else
72
- ' \u{%04X}-\u{%04X}' % [ range . first , range . last ]
73
- end
74
- } . join ( "\n " )
75
- output += "\n ).join }]/\n "
76
- } . join ( "\n " )
77
- puts 'end'
36
+ # EscapedPairs
37
+ [ *0x00 ..0x1F , 0x7F ] . each { |ord | widths [ ord ] = 2 }
38
+ # printable ASCII chars
39
+ ( 0x20 ..0x7E ) . each { |ord | widths [ ord ] = 1 }
40
+
41
+ chunks = widths . each_with_index . chunk { |width , _idx | width || 1 }
42
+ chunk_last_ords = chunks . map { |width , chunk | [ chunk . last . last , width ] }
43
+ chunk_last_ords << [ 0x7fffffff , 1 ]
44
+
45
+ puts <<~EOH
46
+ class Reline::Unicode::EastAsianWidth
47
+ # This is based on EastAsianWidth.txt
48
+ # UNICODE_VERSION = #{ unicode_version ? "'#{ unicode_version } '" : 'nil' }
49
+
50
+ CHUNK_LAST, CHUNK_WIDTH = [
51
+ #{ chunk_last_ords . map { |ord , width | " [0x#{ ord . to_s ( 16 ) } , #{ width } ]" } . join ( ",\n " ) }
52
+ ].transpose.map(&:freeze)
53
+ end
54
+ EOH
78
55
end
0 commit comments