Skip to content

Commit 5c6523d

Browse files
committed
parser: fix a keep bug that some texts may be dropped unexpectedly
Ruby: [Bug #18245] [ruby-core:105587] Reported by Hassan Abdul Rehman.
1 parent ba7b735 commit 5c6523d

File tree

2 files changed

+88
-34
lines changed

2 files changed

+88
-34
lines changed

lib/csv/parser.rb

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ def eos?
166166
end
167167

168168
def keep_start
169+
adjust_last_keep
169170
@keeps.push([@scanner.pos, nil])
170171
end
171172

@@ -196,32 +197,48 @@ def keep_back
196197
end
197198

198199
def keep_drop
199-
@keeps.pop
200+
_, buffer = @keeps.pop
201+
return unless buffer
202+
203+
last_keep = @keeps.last
204+
return unless last_keep
205+
206+
if last_keep[1]
207+
last_keep[1] << buffer
208+
else
209+
last_keep[1] = buffer
210+
end
200211
end
201212

202213
def rest
203214
@scanner.rest
204215
end
205216

206217
private
218+
def adjust_last_keep
219+
keep = @keeps.last
220+
return if keep.nil?
221+
222+
keep_start = keep[0]
223+
return if @scanner.pos == keep_start
224+
225+
string = @scanner.string
226+
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
227+
if keep_data
228+
keep_buffer = keep[1]
229+
if keep_buffer
230+
keep_buffer << keep_data
231+
else
232+
keep[1] = keep_data.dup
233+
end
234+
end
235+
keep[0] = 0
236+
end
237+
207238
def read_chunk
208239
return false if @last_scanner
209240

210-
unless @keeps.empty?
211-
keep = @keeps.last
212-
keep_start = keep[0]
213-
string = @scanner.string
214-
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
215-
if keep_data
216-
keep_buffer = keep[1]
217-
if keep_buffer
218-
keep_buffer << keep_data
219-
else
220-
keep[1] = keep_data.dup
221-
end
222-
end
223-
keep[0] = 0
224-
end
241+
adjust_last_keep
225242

226243
input = @inputs.first
227244
case input
@@ -728,28 +745,26 @@ def may_quoted?
728745
sample[0, 128].index(@quote_character)
729746
end
730747

731-
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
732-
if SCANNER_TEST
733-
class UnoptimizedStringIO
734-
def initialize(string)
735-
@io = StringIO.new(string, "rb:#{string.encoding}")
736-
end
748+
class UnoptimizedStringIO # :nodoc:
749+
def initialize(string)
750+
@io = StringIO.new(string, "rb:#{string.encoding}")
751+
end
737752

738-
def gets(*args)
739-
@io.gets(*args)
740-
end
753+
def gets(*args)
754+
@io.gets(*args)
755+
end
741756

742-
def each_line(*args, &block)
743-
@io.each_line(*args, &block)
744-
end
757+
def each_line(*args, &block)
758+
@io.each_line(*args, &block)
759+
end
745760

746-
def eof?
747-
@io.eof?
748-
end
761+
def eof?
762+
@io.eof?
749763
end
764+
end
750765

751-
SCANNER_TEST_CHUNK_SIZE =
752-
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
766+
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
767+
if SCANNER_TEST
753768
def build_scanner
754769
inputs = @samples.collect do |sample|
755770
UnoptimizedStringIO.new(sample)
@@ -759,9 +774,11 @@ def build_scanner
759774
else
760775
inputs << @input
761776
end
777+
chunk_size =
778+
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
762779
InputsScanner.new(inputs,
763780
@encoding,
764-
chunk_size: SCANNER_TEST_CHUNK_SIZE)
781+
chunk_size: chunk_size)
765782
end
766783
else
767784
def build_scanner

test/csv/parse/test_inputs_scanner.rb

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
require_relative "../helper"
2+
3+
class TestCSVParseInputsScanner < Test::Unit::TestCase
4+
include Helper
5+
6+
def test_keep_over_chunks_nested_back
7+
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
8+
scanner = CSV::Parser::InputsScanner.new([input],
9+
Encoding::UTF_8,
10+
nil,
11+
chunk_size: 2)
12+
scanner.keep_start
13+
assert_equal("abc", scanner.scan_all(/[a-c]+/))
14+
scanner.keep_start
15+
assert_equal("def", scanner.scan_all(/[d-f]+/))
16+
scanner.keep_back
17+
scanner.keep_back
18+
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
19+
end
20+
21+
22+
def test_keep_over_chunks_nested_drop_back
23+
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
24+
scanner = CSV::Parser::InputsScanner.new([input],
25+
Encoding::UTF_8,
26+
nil,
27+
chunk_size: 3)
28+
scanner.keep_start
29+
assert_equal("ab", scanner.scan(/../))
30+
scanner.keep_start
31+
assert_equal("c", scanner.scan(/./))
32+
assert_equal("d", scanner.scan(/./))
33+
scanner.keep_drop
34+
scanner.keep_back
35+
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
36+
end
37+
end

0 commit comments

Comments
 (0)