diff --git a/lib/csv.rb b/lib/csv.rb index 72f339a..2c47ead 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -341,6 +341,7 @@ # liberal_parsing: false, # nil_value: nil, # empty_value: "", +# strip: false, # # For generating. # write_headers: nil, # quote_empty: true, @@ -348,7 +349,6 @@ # write_converters: nil, # write_nil_value: nil, # write_empty_value: "", -# strip: false, # } # # ==== Options for Parsing @@ -366,8 +366,9 @@ # - +header_converters+: Specifies the header converters to be used. # - +skip_blanks+: Specifies whether blanks lines are to be ignored. # - +skip_lines+: Specifies how comments lines are to be recognized. -# - +strip+: Specifies whether leading and trailing whitespace are -# to be stripped from fields.. +# - +strip+: Specifies whether leading and trailing whitespace are to be +# stripped from fields. This must be compatible with +col_sep+; if it is not, +# then an +ArgumentError+ exception will be raised. # - +liberal_parsing+: Specifies whether \CSV should attempt to parse # non-compliant data. # - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field. @@ -946,6 +947,7 @@ def initialize(message, line_number) liberal_parsing: false, nil_value: nil, empty_value: "", + strip: false, # For generating. write_headers: nil, quote_empty: true, @@ -953,7 +955,6 @@ def initialize(message, line_number) write_converters: nil, write_nil_value: nil, write_empty_value: "", - strip: false, }.freeze class << self @@ -1879,11 +1880,11 @@ def initialize(data, encoding: nil, nil_value: nil, empty_value: "", + strip: false, quote_empty: true, write_converters: nil, write_nil_value: nil, - write_empty_value: "", - strip: false) + write_empty_value: "") raise ArgumentError.new("Cannot parse nil as CSV") if data.nil? if data.is_a?(String) diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 3334acf..f87db3b 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -361,6 +361,7 @@ def prepare prepare_skip_lines prepare_strip prepare_separators + validate_strip_and_col_sep_options prepare_quoted prepare_unquoted prepare_line @@ -531,6 +532,28 @@ def prepare_separators @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding)) end + # This method verifies that there are no (obvious) ambiguities with the + # provided +col_sep+ and +strip+ parsing options. For example, if +col_sep+ + # and +strip+ were both equal to +\t+, then there would be no clear way to + # parse the input. + def validate_strip_and_col_sep_options + return unless @strip + + if @strip.is_a?(String) + if @column_separator.start_with?(@strip) || @column_separator.end_with?(@strip) + raise ArgumentError, + "The provided strip (#{@escaped_strip}) and " \ + "col_sep (#{@escaped_column_separator}) options are incompatible." + end + else + if Regexp.new("\\A[#{@escaped_strip}]|[#{@escaped_strip}]\\z").match?(@column_separator) + raise ArgumentError, + "The provided strip (true) and " \ + "col_sep (#{@escaped_column_separator}) options are incompatible." + end + end + end + def prepare_quoted if @quote_character @quotes = Regexp.new(@escaped_quote_character + diff --git a/test/csv/parse/test_strip.rb b/test/csv/parse/test_strip.rb index 3564fcb..c5e3520 100644 --- a/test/csv/parse/test_strip.rb +++ b/test/csv/parse/test_strip.rb @@ -80,4 +80,33 @@ def test_do_not_strip_crlf %Q{"a" ,"b " \r\n}, strip: true)) end + + def test_col_sep_incompatible_true + message = "The provided strip (true) and " \ + "col_sep (\\t) options are incompatible." + assert_raise_with_message(ArgumentError, message) do + CSV.parse_line(%Q{"a"\t"b"\n}, + col_sep: "\t", + strip: true) + end + end + + def test_col_sep_incompatible_string + message = "The provided strip (\\t) and " \ + "col_sep (\\t) options are incompatible." + assert_raise_with_message(ArgumentError, message) do + CSV.parse_line(%Q{"a"\t"b"\n}, + col_sep: "\t", + strip: "\t") + end + end + + def test_col_sep_compatible_string + assert_equal( + ["a", "b"], + CSV.parse_line(%Q{\va\tb\v\n}, + col_sep: "\t", + strip: "\v") + ) + end end