diff --git a/CHANGELOG.md b/CHANGELOG.md index e3d5e40e..1fabdf83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated naming (rows/lines -> records, columns -> fields, differentiate between lines and records) - Rename `errorOnDifferentFieldCount()` to `ignoreDifferentFieldCount()` - Raised the maximum field size to 16 MiB to match SUPER data type capabilities of Amazon Redshift +- Throw `CsvParseException` instead of `IOException` when maximum field size is exceeded +- Limit the maximum field count per record to 16,384 to avoid OutOfMemoryErrors ## [2.2.2] - 2023-05-13 ### Added diff --git a/lib/src/intTest/java/blackbox/reader/CsvReaderTest.java b/lib/src/intTest/java/blackbox/reader/CsvReaderTest.java index a0fea184..dbc22bc3 100644 --- a/lib/src/intTest/java/blackbox/reader/CsvReaderTest.java +++ b/lib/src/intTest/java/blackbox/reader/CsvReaderTest.java @@ -26,9 +26,9 @@ import de.siegmar.fastcsv.reader.CloseableIterator; import de.siegmar.fastcsv.reader.CommentStrategy; +import de.siegmar.fastcsv.reader.CsvParseException; import de.siegmar.fastcsv.reader.CsvReader; import de.siegmar.fastcsv.reader.CsvRecord; -import de.siegmar.fastcsv.reader.MalformedCsvException; import testutil.CsvRecordAssert; @SuppressWarnings({ @@ -149,7 +149,7 @@ void differentFieldCountFail() { crb.ignoreDifferentFieldCount(false); assertThatThrownBy(() -> readAll("foo\nbar,\"baz\nbax\"")) - .isInstanceOf(MalformedCsvException.class) + .isInstanceOf(CsvParseException.class) .hasMessage("Record 2 has 2 fields, but first record had 1 fields"); } @@ -242,6 +242,18 @@ void refillBufferInDataWithQuote() { .endsWith("a\"b\"c", "d", "XX"); } + // fields exceed + + @Test + void fieldsExceed() { + final char[] buf = new char[16 * 1024]; + Arrays.fill(buf, ','); + + assertThatThrownBy(() -> crb.build(new CharArrayReader(buf)).stream().count()) + .isInstanceOf(CsvParseException.class) + .hasMessage("Maximum number of fields exceeded: %s", buf.length); + } + // buffer exceed @Test @@ -255,9 +267,8 @@ void bufferExceed() { buf[buf.length - 1] = (byte) 'X'; assertThatThrownBy(() -> crb.build(new CharArrayReader(buf)).iterator().next()) - .isInstanceOf(UncheckedIOException.class) - .hasMessage("IOException when reading first record") - .rootCause().hasMessage("Maximum buffer size %s is not enough to read data of a single field. " + .isInstanceOf(CsvParseException.class) + .hasMessage("Maximum buffer size %s is not enough to read data of a single field. " + "Typically, this happens if quotation started but did not end within this buffer's " + "maximum boundary.", buf.length); } @@ -273,9 +284,8 @@ void bufferExceedSubsequentRecord() { iterator.next(); assertThatThrownBy(iterator::next) - .isInstanceOf(UncheckedIOException.class) - .hasMessage("IOException when reading record that started in line 2") - .rootCause().hasMessage("Maximum buffer size %s is not enough to read data of a single field. " + .isInstanceOf(CsvParseException.class) + .hasMessage("Maximum buffer size %s is not enough to read data of a single field. " + "Typically, this happens if quotation started but did not end within this buffer's " + "maximum boundary.", buf.length); } diff --git a/lib/src/main/java/de/siegmar/fastcsv/reader/MalformedCsvException.java b/lib/src/main/java/de/siegmar/fastcsv/reader/CsvParseException.java similarity index 72% rename from lib/src/main/java/de/siegmar/fastcsv/reader/MalformedCsvException.java rename to lib/src/main/java/de/siegmar/fastcsv/reader/CsvParseException.java index c5b30b1b..21cfe4ca 100644 --- a/lib/src/main/java/de/siegmar/fastcsv/reader/MalformedCsvException.java +++ b/lib/src/main/java/de/siegmar/fastcsv/reader/CsvParseException.java @@ -3,7 +3,7 @@ /** * Exception to be thrown when malformed csv data is read. */ -public class MalformedCsvException extends RuntimeException { +public class CsvParseException extends RuntimeException { private static final long serialVersionUID = 1L; @@ -12,7 +12,7 @@ public class MalformedCsvException extends RuntimeException { * * @param message the cause for this exception */ - public MalformedCsvException(final String message) { + public CsvParseException(final String message) { super(message); } diff --git a/lib/src/main/java/de/siegmar/fastcsv/reader/CsvReader.java b/lib/src/main/java/de/siegmar/fastcsv/reader/CsvReader.java index 92a84904..bd363bc8 100644 --- a/lib/src/main/java/de/siegmar/fastcsv/reader/CsvReader.java +++ b/lib/src/main/java/de/siegmar/fastcsv/reader/CsvReader.java @@ -155,7 +155,7 @@ private CsvRecord fetchRow() throws IOException { if (firstRecordFieldCount == -1) { firstRecordFieldCount = fieldCount; } else if (fieldCount != firstRecordFieldCount) { - throw new MalformedCsvException( + throw new CsvParseException( String.format("Record %d has %d fields, but first record had %d fields", csvRecord.getStartingLineNumber(), fieldCount, firstRecordFieldCount)); } @@ -310,7 +310,7 @@ public CsvReaderBuilder skipEmptyLines(final boolean skipEmptyLines) { } /** - * Defines if an {@link MalformedCsvException} should be thrown if records do contain a + * Defines if an {@link CsvParseException} should be thrown if records do contain a * different number of fields. * * @param ignoreDifferentFieldCount if exception should be suppressed, when CSV data contains diff --git a/lib/src/main/java/de/siegmar/fastcsv/reader/RecordHandler.java b/lib/src/main/java/de/siegmar/fastcsv/reader/RecordHandler.java index 5ddb6fab..b737afbe 100644 --- a/lib/src/main/java/de/siegmar/fastcsv/reader/RecordHandler.java +++ b/lib/src/main/java/de/siegmar/fastcsv/reader/RecordHandler.java @@ -3,6 +3,7 @@ final class RecordHandler { private static final int INITIAL_FIELDS_SIZE = 32; + private static final int MAX_FIELDS_SIZE = 16 * 1024; private final FieldModifier fieldModifier; private int len; @@ -44,6 +45,9 @@ void add(String value, final boolean quoted) { private void extendCapacity() { len *= 2; + if (len > MAX_FIELDS_SIZE) { + throw new CsvParseException("Maximum number of fields exceeded: " + MAX_FIELDS_SIZE); + } final String[] newFields = new String[len]; System.arraycopy(fields, 0, newFields, 0, idx); fields = newFields; diff --git a/lib/src/main/java/de/siegmar/fastcsv/reader/RecordReader.java b/lib/src/main/java/de/siegmar/fastcsv/reader/RecordReader.java index fa4a5d91..1257ba95 100644 --- a/lib/src/main/java/de/siegmar/fastcsv/reader/RecordReader.java +++ b/lib/src/main/java/de/siegmar/fastcsv/reader/RecordReader.java @@ -325,12 +325,10 @@ private boolean fetchData() throws IOException { return false; } - private static char[] extendAndRelocate(final char[] buf, final int begin) - throws IOException { - + private static char[] extendAndRelocate(final char[] buf, final int begin) { final int newBufferSize = buf.length * 2; if (newBufferSize > MAX_BUFFER_SIZE) { - throw new IOException("Maximum buffer size " + MAX_BUFFER_SIZE + " is not enough " + throw new CsvParseException("Maximum buffer size " + MAX_BUFFER_SIZE + " is not enough " + "to read data of a single field. Typically, this happens if quotation " + "started but did not end within this buffer's maximum boundary."); }