-
Notifications
You must be signed in to change notification settings - Fork 180
/
strscan.rb
373 lines (300 loc) · 8.5 KB
/
strscan.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
# truffleruby_primitives: true
# Copyright (c) 2013, Brian Shirai
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# 3. Neither the name of the library nor the names of its contributors may be
# used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Modifications made by the Truffle team are:
#
# Copyright (c) 2017, 2019 Oracle and/or its affiliates. All rights reserved. This
# code is released under a tri EPL/GPL/LGPL license. You can use it,
# redistribute it and/or modify it under the terms of the:
#
# Eclipse Public License version 2.0, or
# GNU General Public License version 2, or
# GNU Lesser General Public License version 2.1.
class ScanError < StandardError
end
class StringScanner
Id = 'None$Id'.freeze
Version = '1.0.0'.freeze
attr_reader :pos
alias_method :pointer, :pos
def initialize(string, dup = false, fixed_anchor: false)
if string.instance_of? String
@original = string
@string = string
else
@original = StringValue(string)
@string = String.new @original
end
reset_state
@fixed_anchor = Primitive.as_boolean(fixed_anchor)
end
def pos=(n)
n = Integer(n)
n += @string.bytesize if n < 0
if n < 0 or n > @string.bytesize
raise RangeError, "index out of range (#{n})"
end
@pos = n
end
alias_method :pointer=, :pos=
def [](n)
if @match
raise TypeError, "no implicit conversion of #{n.class} into Integer" if Range === n
@match[n]
end
end
def beginning_of_line?
@pos == 0 or @string.getbyte(@pos-1) == 10
end
alias_method :bol?, :beginning_of_line?
def captures
@match&.captures
end
def charpos
@string.byteslice(0, @pos).length
end
def check(pattern)
scan_internal pattern, false, true, true
end
def check_until(pattern)
scan_internal pattern, false, true, false
end
def clear
warn 'StringScanner#clear is obsolete; use #terminate instead' if $VERBOSE
terminate
end
def concat(str)
@string << StringValue(str)
self
end
alias_method :<<, :concat
def empty?
warn 'StringScanner#empty? is obsolete; use #eos? instead?' if $VERBOSE
eos?
end
def eos?
raise ArgumentError, 'uninitialized StringScanner object' unless @string
@pos >= @string.bytesize
end
def exist?(pattern)
scan_internal pattern, false, false, false
end
def fixed_anchor?
@fixed_anchor
end
def get_byte
if eos?
@match = nil
return nil
end
# We need to match one byte, regardless of the string encoding
pos = @pos
@match = Primitive.matchdata_create_single_group(/./mn, @string, pos, pos + 1)
@prev_pos = pos
@pos = pos + 1
@string.byteslice(@prev_pos, 1)
end
def getbyte
warn 'StringScanner#getbyte is obsolete; use #get_byte instead' if $VERBOSE
get_byte
end
def getch
scan(/./m)
end
def inspect
if defined? @string
if eos?
str = "#<#{self.class} fin>"
else
pos = @pos
if string.bytesize - pos > 5
rest = "#{string[pos..pos+4]}..."
else
rest = string[pos..string.bytesize]
end
if pos > 0
if pos > 5
prev = "...#{string[pos-5...pos]}"
else
prev = string[0...pos]
end
str = "#<#{self.class} #{pos}/#{string.bytesize} #{prev.inspect} @ #{rest.inspect}>"
else
str = "#<#{self.class} #{pos}/#{string.bytesize} @ #{rest.inspect}>"
end
end
str
else
"#<#{self.class} (uninitialized)>"
end
end
def match?(pattern)
scan_internal pattern, false, false, true
end
def matched
@match&.to_s
end
def matched?
Primitive.as_boolean(@match)
end
def matched_size
Primitive.match_data_byte_end(@match, 0) - Primitive.match_data_byte_begin(@match, 0) if @match
end
def post_match
@match&.post_match
end
def pre_match
@string.byteslice(0, Primitive.match_data_byte_begin(@match, 0)) if @match
end
private def reset_state
@prev_pos = @pos = 0
@match = nil
end
def reset
reset_state
self
end
def rest
@string.byteslice(@pos, @string.bytesize - @pos)
end
def rest?
!eos?
end
def rest_size
@string.bytesize - @pos
end
def restsize
warn 'StringScanner#restsize is obsolete; use #rest_size instead' if $VERBOSE
rest_size
end
def scan(pattern)
scan_internal pattern, true, true, true
end
def scan_until(pattern)
scan_internal pattern, true, true, false
end
def scan_full(pattern, advance_pos, getstr)
scan_internal pattern, advance_pos, getstr, true
end
def search_full(pattern, advance_pos, getstr)
scan_internal pattern, advance_pos, getstr, false
end
def self.must_C_version
self
end
def size
@match&.size
end
def skip(pattern)
scan_internal pattern, true, false, true
end
def skip_until(pattern)
scan_internal pattern, true, false, false
end
def string
@original
end
def string=(string)
reset_state
if string.instance_of? String
@original = string
@string = string
else
@original = StringValue(string)
@string = String.new @original
end
end
def terminate
@match = nil
@pos = string.bytesize
self
end
def unscan
raise ScanError if @match.nil?
@pos = @prev_pos
@prev_pos = nil
@match = nil
self
end
def values_at(*args)
@match&.values_at(*args)
end
def peek(len)
raise ArgumentError if len < 0
return '' if len.zero?
@string.byteslice(@pos, len)
end
def peep(len)
warn 'StringScanner#peep is obsolete; use #peek instead' if $VERBOSE
peek len
end
private def scan_check_args(pattern, headonly)
case pattern
when String
raise TypeError, 'wrong argument type String (expected Regexp)' unless headonly
when Regexp
else
raise TypeError, "bad pattern argument: #{pattern.inspect}"
end
raise ArgumentError, 'uninitialized StringScanner object' unless @string
end
# This method is kept very small so that it should fit within 100
# AST nodes and can be split. This is done to avoid indirect calls
# to TRegex.
private def scan_internal(pattern, advance_pos, getstr, headonly)
scan_check_args(pattern, headonly)
if Primitive.object_kind_of?(pattern, String)
md = scan_internal_string_pattern(pattern)
else
start = @fixed_anchor ? 0 : @pos
md = Truffle::RegexpOperations.match_in_region pattern, @string, @pos, @string.bytesize, headonly, start
Primitive.matchdata_fixup_positions(md, start) if md
end
if md
@match = md
scan_internal_set_pos_and_str(advance_pos, getstr, md)
else
@match = nil
end
end
private def scan_internal_string_pattern(pattern)
# always headonly=true, see #scan_check_args
pos = @pos
if @string.byteslice(pos..).start_with?(pattern)
Primitive.matchdata_create_single_group(pattern, @string.dup, pos, pos + pattern.bytesize)
else
nil
end
end
private def scan_internal_set_pos_and_str(advance_pos, getstr, md)
fin = Primitive.match_data_byte_end(md, 0)
@prev_pos = @pos
@pos = fin if advance_pos
width = fin - @prev_pos
return width unless getstr
@string.byteslice(@prev_pos, width)
end
end