Skip to content

Commit

Permalink
parser: fix a keep bug that some texts may be dropped unexpectedly
Browse files Browse the repository at this point in the history
Ruby: [Bug #18245] [ruby-core:105587]

Reported by Hassan Abdul Rehman.
  • Loading branch information
kou committed Dec 24, 2021
1 parent ba7b735 commit 5c6523d
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 34 deletions.
85 changes: 51 additions & 34 deletions lib/csv/parser.rb
Expand Up @@ -166,6 +166,7 @@ def eos?
end

def keep_start
adjust_last_keep
@keeps.push([@scanner.pos, nil])
end

Expand Down Expand Up @@ -196,32 +197,48 @@ def keep_back
end

def keep_drop
@keeps.pop
_, buffer = @keeps.pop
return unless buffer

last_keep = @keeps.last
return unless last_keep

if last_keep[1]
last_keep[1] << buffer
else
last_keep[1] = buffer
end
end

def rest
@scanner.rest
end

private
def adjust_last_keep
keep = @keeps.last
return if keep.nil?

keep_start = keep[0]
return if @scanner.pos == keep_start

string = @scanner.string
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
if keep_data
keep_buffer = keep[1]
if keep_buffer
keep_buffer << keep_data
else
keep[1] = keep_data.dup
end
end
keep[0] = 0
end

def read_chunk
return false if @last_scanner

unless @keeps.empty?
keep = @keeps.last
keep_start = keep[0]
string = @scanner.string
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
if keep_data
keep_buffer = keep[1]
if keep_buffer
keep_buffer << keep_data
else
keep[1] = keep_data.dup
end
end
keep[0] = 0
end
adjust_last_keep

input = @inputs.first
case input
Expand Down Expand Up @@ -728,28 +745,26 @@ def may_quoted?
sample[0, 128].index(@quote_character)
end

SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
if SCANNER_TEST
class UnoptimizedStringIO
def initialize(string)
@io = StringIO.new(string, "rb:#{string.encoding}")
end
class UnoptimizedStringIO # :nodoc:
def initialize(string)
@io = StringIO.new(string, "rb:#{string.encoding}")
end

def gets(*args)
@io.gets(*args)
end
def gets(*args)
@io.gets(*args)
end

def each_line(*args, &block)
@io.each_line(*args, &block)
end
def each_line(*args, &block)
@io.each_line(*args, &block)
end

def eof?
@io.eof?
end
def eof?
@io.eof?
end
end

SCANNER_TEST_CHUNK_SIZE =
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
if SCANNER_TEST
def build_scanner
inputs = @samples.collect do |sample|
UnoptimizedStringIO.new(sample)
Expand All @@ -759,9 +774,11 @@ def build_scanner
else
inputs << @input
end
chunk_size =
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
InputsScanner.new(inputs,
@encoding,
chunk_size: SCANNER_TEST_CHUNK_SIZE)
chunk_size: chunk_size)
end
else
def build_scanner
Expand Down
37 changes: 37 additions & 0 deletions test/csv/parse/test_inputs_scanner.rb
@@ -0,0 +1,37 @@
require_relative "../helper"

class TestCSVParseInputsScanner < Test::Unit::TestCase
include Helper

def test_keep_over_chunks_nested_back
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
scanner = CSV::Parser::InputsScanner.new([input],
Encoding::UTF_8,
nil,
chunk_size: 2)
scanner.keep_start
assert_equal("abc", scanner.scan_all(/[a-c]+/))
scanner.keep_start
assert_equal("def", scanner.scan_all(/[d-f]+/))
scanner.keep_back
scanner.keep_back
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
end


def test_keep_over_chunks_nested_drop_back
input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl")
scanner = CSV::Parser::InputsScanner.new([input],
Encoding::UTF_8,
nil,
chunk_size: 3)
scanner.keep_start
assert_equal("ab", scanner.scan(/../))
scanner.keep_start
assert_equal("c", scanner.scan(/./))
assert_equal("d", scanner.scan(/./))
scanner.keep_drop
scanner.keep_back
assert_equal("abcdefg", scanner.scan_all(/[a-g]+/))
end
end

0 comments on commit 5c6523d

Please sign in to comment.