Skip to content

Commit

Permalink
Limit the maximum field count per record to 16,384 to avoid OutOfMemo…
Browse files Browse the repository at this point in the history
…ryErrors; also align exceptions
  • Loading branch information
osiegmar committed Dec 5, 2023
1 parent c9f5f6d commit b316263
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 16 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Updated naming (rows/lines -> records, columns -> fields, differentiate between lines and records)
- Rename `errorOnDifferentFieldCount()` to `ignoreDifferentFieldCount()`
- Raised the maximum field size to 16 MiB to match SUPER data type capabilities of Amazon Redshift
- Throw `CsvParseException` instead of `IOException` when maximum field size is exceeded
- Limit the maximum field count per record to 16,384 to avoid OutOfMemoryErrors

## [2.2.2] - 2023-05-13
### Added
Expand Down
26 changes: 18 additions & 8 deletions lib/src/intTest/java/blackbox/reader/CsvReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@

import de.siegmar.fastcsv.reader.CloseableIterator;
import de.siegmar.fastcsv.reader.CommentStrategy;
import de.siegmar.fastcsv.reader.CsvParseException;
import de.siegmar.fastcsv.reader.CsvReader;
import de.siegmar.fastcsv.reader.CsvRecord;
import de.siegmar.fastcsv.reader.MalformedCsvException;
import testutil.CsvRecordAssert;

@SuppressWarnings({
Expand Down Expand Up @@ -149,7 +149,7 @@ void differentFieldCountFail() {
crb.ignoreDifferentFieldCount(false);

assertThatThrownBy(() -> readAll("foo\nbar,\"baz\nbax\""))
.isInstanceOf(MalformedCsvException.class)
.isInstanceOf(CsvParseException.class)
.hasMessage("Record 2 has 2 fields, but first record had 1 fields");
}

Expand Down Expand Up @@ -242,6 +242,18 @@ void refillBufferInDataWithQuote() {
.endsWith("a\"b\"c", "d", "XX");
}

// fields exceed

@Test
void fieldsExceed() {
final char[] buf = new char[16 * 1024];
Arrays.fill(buf, ',');

assertThatThrownBy(() -> crb.build(new CharArrayReader(buf)).stream().count())
.isInstanceOf(CsvParseException.class)
.hasMessage("Maximum number of fields exceeded: %s", buf.length);
}

// buffer exceed

@Test
Expand All @@ -255,9 +267,8 @@ void bufferExceed() {
buf[buf.length - 1] = (byte) 'X';

assertThatThrownBy(() -> crb.build(new CharArrayReader(buf)).iterator().next())
.isInstanceOf(UncheckedIOException.class)
.hasMessage("IOException when reading first record")
.rootCause().hasMessage("Maximum buffer size %s is not enough to read data of a single field. "
.isInstanceOf(CsvParseException.class)
.hasMessage("Maximum buffer size %s is not enough to read data of a single field. "
+ "Typically, this happens if quotation started but did not end within this buffer's "
+ "maximum boundary.", buf.length);
}
Expand All @@ -273,9 +284,8 @@ void bufferExceedSubsequentRecord() {
iterator.next();

assertThatThrownBy(iterator::next)
.isInstanceOf(UncheckedIOException.class)
.hasMessage("IOException when reading record that started in line 2")
.rootCause().hasMessage("Maximum buffer size %s is not enough to read data of a single field. "
.isInstanceOf(CsvParseException.class)
.hasMessage("Maximum buffer size %s is not enough to read data of a single field. "
+ "Typically, this happens if quotation started but did not end within this buffer's "
+ "maximum boundary.", buf.length);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
/**
* Exception to be thrown when malformed csv data is read.
*/
public class MalformedCsvException extends RuntimeException {
public class CsvParseException extends RuntimeException {

private static final long serialVersionUID = 1L;

Expand All @@ -12,7 +12,7 @@ public class MalformedCsvException extends RuntimeException {
*
* @param message the cause for this exception
*/
public MalformedCsvException(final String message) {
public CsvParseException(final String message) {
super(message);
}

Expand Down
4 changes: 2 additions & 2 deletions lib/src/main/java/de/siegmar/fastcsv/reader/CsvReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ private CsvRecord fetchRow() throws IOException {
if (firstRecordFieldCount == -1) {
firstRecordFieldCount = fieldCount;
} else if (fieldCount != firstRecordFieldCount) {
throw new MalformedCsvException(
throw new CsvParseException(
String.format("Record %d has %d fields, but first record had %d fields",
csvRecord.getStartingLineNumber(), fieldCount, firstRecordFieldCount));
}
Expand Down Expand Up @@ -310,7 +310,7 @@ public CsvReaderBuilder skipEmptyLines(final boolean skipEmptyLines) {
}

/**
* Defines if an {@link MalformedCsvException} should be thrown if records do contain a
* Defines if an {@link CsvParseException} should be thrown if records do contain a
* different number of fields.
*
* @param ignoreDifferentFieldCount if exception should be suppressed, when CSV data contains
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
final class RecordHandler {

private static final int INITIAL_FIELDS_SIZE = 32;
private static final int MAX_FIELDS_SIZE = 16 * 1024;
private final FieldModifier fieldModifier;

private int len;
Expand Down Expand Up @@ -44,6 +45,9 @@ void add(String value, final boolean quoted) {

private void extendCapacity() {
len *= 2;
if (len > MAX_FIELDS_SIZE) {
throw new CsvParseException("Maximum number of fields exceeded: " + MAX_FIELDS_SIZE);
}
final String[] newFields = new String[len];
System.arraycopy(fields, 0, newFields, 0, idx);
fields = newFields;
Expand Down
6 changes: 2 additions & 4 deletions lib/src/main/java/de/siegmar/fastcsv/reader/RecordReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -325,12 +325,10 @@ private boolean fetchData() throws IOException {
return false;
}

private static char[] extendAndRelocate(final char[] buf, final int begin)
throws IOException {

private static char[] extendAndRelocate(final char[] buf, final int begin) {
final int newBufferSize = buf.length * 2;
if (newBufferSize > MAX_BUFFER_SIZE) {
throw new IOException("Maximum buffer size " + MAX_BUFFER_SIZE + " is not enough "
throw new CsvParseException("Maximum buffer size " + MAX_BUFFER_SIZE + " is not enough "
+ "to read data of a single field. Typically, this happens if quotation "
+ "started but did not end within this buffer's maximum boundary.");
}
Expand Down

0 comments on commit b316263

Please sign in to comment.