diff --git a/lib/csv.rb b/lib/csv.rb index d4dc569b838f40..81fa99aa993970 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -854,6 +854,15 @@ def initialize(message, line_number) end end + # The error thrown when the parser encounters invalid encoding in CSV. + class InvalidEncodingError < MalformedCSVError + attr_reader :encoding + def initialize(encoding, line_number) + @encoding = encoding + super("Invalid byte sequence in #{encoding}", line_number) + end + end + # # A FieldInfo Struct contains details about a field's position in the data # source it was read from. CSV will pass this Struct to some blocks that make diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index ed9297fe305ac6..4da87fbac8f049 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -414,8 +414,7 @@ def parse(&block) else lineno = @lineno + 1 end - message = "Invalid byte sequence in #{@encoding}" - raise MalformedCSVError.new(message, lineno) + raise InvalidEncodingError.new(@encoding, lineno) rescue UnexpectedError => error if @scanner ignore_broken_line @@ -876,8 +875,7 @@ def build_scanner !line.valid_encoding? end if index - message = "Invalid byte sequence in #{@encoding}" - raise MalformedCSVError.new(message, @lineno + index + 1) + raise InvalidEncodingError.new(@encoding, @lineno + index + 1) end end Scanner.new(string) diff --git a/test/csv/interface/test_read.rb b/test/csv/interface/test_read.rb index 001177036ab3c6..9b35dc2e01c794 100644 --- a/test/csv/interface/test_read.rb +++ b/test/csv/interface/test_read.rb @@ -113,11 +113,11 @@ def test_open_encoding_invalid file << "\u{1F600},\u{1F601}" end CSV.open(@input.path, encoding: "EUC-JP") do |csv| - error = assert_raise(CSV::MalformedCSVError) do + error = assert_raise(CSV::InvalidEncodingError) do csv.shift end - assert_equal("Invalid byte sequence in EUC-JP in line 1.", - error.message) + assert_equal([Encoding::EUC_JP, "Invalid byte sequence in EUC-JP in line 1."], + [error.encoding, error.message]) end end diff --git a/test/csv/test_encodings.rb b/test/csv/test_encodings.rb index 032569da7a354e..55a7a60f2e54f7 100644 --- a/test/csv/test_encodings.rb +++ b/test/csv/test_encodings.rb @@ -280,12 +280,12 @@ def test_row_separator_detection_with_invalid_encoding def test_invalid_encoding_row_error csv = CSV.new("valid,x\rinvalid,\xF8\r".force_encoding("UTF-8"), encoding: "UTF-8", row_sep: "\r") - error = assert_raise(CSV::MalformedCSVError) do + error = assert_raise(CSV::InvalidEncodingError) do csv.shift csv.shift end - assert_equal("Invalid byte sequence in UTF-8 in line 2.", - error.message) + assert_equal([Encoding::UTF_8, "Invalid byte sequence in UTF-8 in line 2."], + [error.encoding, error.message]) end def test_string_input_transcode