Skip to content

Commit

Permalink
Add support for multi-line fields supported by RFC4180 when escapeCol…
Browse files Browse the repository at this point in the history
…umnDelimitersCSV is set to true
  • Loading branch information
funkyjive committed Feb 22, 2024
1 parent ba88da8 commit 3a536a1
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,47 @@ private void initFileReader(InputStreamReader sr, String encoding, String demlim
}
}

/* RFC4180 specifies that rules for quoted fields. It allows quoted string data to contain newlines data
provided the contents otherwise conforms to the rules for escaping quotes. For example, the following is valid:
"a","b","c"
"aaa","b <-- newline is retained in data field
bb","c"
"aa","bb","cc"
We cannot simply use fileReader.readLine() to read these records but instead must continue reading until we reach
a newline that is not contained within quotes.
*/
private String readLineEscapeDelimiters() throws SQLServerException {
int quoteCount = 0;
StringBuilder sb = new StringBuilder();
try {
int c;
while ((c = fileReader.read()) != -1) {
if((c == '\n' || c == '\r') && quoteCount % 2 == 0) { // newlines only end the record if we are not in quotes
fileReader.mark(1);
c = fileReader.read(); // we might have read \r of a \r\n, if so we need to read the \n as well
if(c != '\n') {
fileReader.reset(); // only delimited by \n, unread last char so it goes into the next record
}
break;
}
sb.append((char)c);
if( c == '"') {
quoteCount++;
}
}
if (c == -1 && quoteCount % 2 != 0) { // stream ended, but we are within quotes -- data problem
throw new SQLServerException(SQLServerException.getErrString("R_InvalidCSVQuotes"),null,0,null);
}
if(c == -1) { // keep semantics of readLine() by returning a null when there is no more data
return null;
}
} catch (IOException e) {
throw new SQLServerException(e.getMessage(),null,0,e);
}
return sb.toString();
}

private void initLoggerResources() {
super.loggerPackageName = "com.microsoft.sqlserver.jdbc.SQLServerBulkCSVFileRecord";
}
Expand Down Expand Up @@ -526,7 +567,7 @@ else if ((null != columnNames) && (columnNames.length >= positionInSource))
@Override
public boolean next() throws SQLServerException {
try {
currentLine = fileReader.readLine();
currentLine = escapeDelimiters ? readLineEscapeDelimiters() : fileReader.readLine();
} catch (IOException e) {
throw new SQLServerException(e.getMessage(), null, 0, e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public void testEscapeColumnDelimitersCSV() throws Exception {
/*
* The list below is the copy of inputFileDelimiterEsc ape with quotes removed.
*/
String[][] expectedEscaped = new String[11][4];
String[][] expectedEscaped = new String[12][4];
expectedEscaped[0] = new String[] {"test", " test\"", "no@split", " testNoQuote", ""};
expectedEscaped[1] = new String[] {null, null, null, null, ""};
expectedEscaped[2] = new String[] {"\"", "test\"test", "test@\" test", null, ""};
Expand All @@ -166,6 +166,7 @@ public void testEscapeColumnDelimitersCSV() throws Exception {
expectedEscaped[8] = new String[] {"1997", "Ford", "E350", "Super@ \"luxurious\" truck", ""};
expectedEscaped[9] = new String[] {"1997", "Ford", "E350", "E63", ""};
expectedEscaped[10] = new String[] {"1997", "Ford", "E350", " Super luxurious truck ", ""};
expectedEscaped[11] = new String[] {"1997", "F\r\no\r\nr\r\nd", "E350", "\"Super\" \"luxurious\" \"truck\"", ""};

try (Connection con = getConnection(); Statement stmt = con.createStatement();
SQLServerBulkCopy bulkCopy = new SQLServerBulkCopy(con);
Expand Down
4 changes: 4 additions & 0 deletions src/test/resources/BulkCopyCSVTestInputDelimiterEscape.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@
9@1997@Ford@E350@"Super@ ""luxurious"" truck"@
10@1997@ "Ford" @E350@ "E63"@
11@1997@Ford@E350@" Super luxurious truck "@
12@1997@"F
o
r
d"@"E350"@"""Super"" ""luxurious"" ""truck"""@

0 comments on commit 3a536a1

Please sign in to comment.