This repository has been archived by the owner on May 7, 2023. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
254 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/// CSVParser parses a stream of lines into a list of vals compliant with | ||
/// the CSV standard | ||
/// | ||
/// See RFC4180 for details on CSV standard | ||
class CsvParser { | ||
/// Takes a single line and parses it into a list of values according to the | ||
/// csv standard (see RFC4180) | ||
static List<String> parseLine(String line) { | ||
final records = <String>[]; | ||
var i = 0; | ||
StringBuffer record; | ||
while (i < line.length) { | ||
record = StringBuffer(); | ||
if (line[i] == "\"") { | ||
// if the csv field begins with a double quote, parse it with | ||
// proper character escaping - see RC4180 2.5-2.7 | ||
i = parseEscapedField(line, record, i); | ||
} else { | ||
i = parseField(line, record, i); | ||
} | ||
records.add(record.toString()); | ||
// increment past the current char (a comma or EOL) | ||
i++; | ||
} | ||
// special case for a line that ends with comma (ie a blank field) | ||
if (line[line.length - 1] == ",") records.add(""); | ||
return records; | ||
} | ||
|
||
/// Parse and write chars to buff until a comma is reached, then return the | ||
/// the index after the last char consumed | ||
static int parseField(String line, StringBuffer record, int startIndex) { | ||
var i = startIndex; | ||
while (i < line.length && line[i] != ",") { | ||
if (line[i] == "\"") { | ||
throw FormatException("A field contained an unescaped double quote. " | ||
"See section 2.5 of https://tools.ietf.org/html/rfc4180.\n" | ||
"character $i of line:\n$line\n"); | ||
} | ||
record.write(line[i]); | ||
i++; | ||
} | ||
return i; | ||
} | ||
|
||
/// Like _parseField, but with support for character escaping | ||
static int parseEscapedField( | ||
String line, StringBuffer record, int startIndex) { | ||
var i = startIndex; | ||
assert( | ||
line[i] == "\"", | ||
"parseEscapedField was called on an unescaped field at" | ||
" char $i of line $line"); | ||
// increment past the first char (a double quote) | ||
i++; | ||
while (i < line.length) { | ||
if (line[i] == "\"") { | ||
if (i + 1 < line.length && line[i + 1] == "\"") { | ||
// A double quote preceded by a double quote is escaped - increment | ||
// past this double quote and write the next one to record | ||
i++; | ||
} else { | ||
// Single double quote, this is the end of the escaped sequence | ||
return i + 1; | ||
} | ||
} | ||
record.write(line[i]); | ||
i++; | ||
} | ||
// reached end of line without closing the escape quote | ||
throw FormatException( | ||
"A field contained an escape quote without a closing escape quote. " | ||
"See section 2.5 of https://tools.ietf.org/html/rfc4180.\n" | ||
"character $i of line:\n$line\n"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import 'package:df/src/util/csv_parser.dart'; | ||
import 'package:test/test.dart'; | ||
|
||
void main() { | ||
test("test parseField", () { | ||
StringBuffer record; | ||
String line; | ||
|
||
line = "a,bc,def"; | ||
// parse first field | ||
record = StringBuffer(); | ||
expect(CsvParser.parseField(line, record, 0), 1); | ||
expect(record.toString(), "a"); | ||
|
||
// parse second field with two chars | ||
record = StringBuffer(); | ||
expect(CsvParser.parseField(line, record, 2), 4); | ||
expect(record.toString(), "bc"); | ||
|
||
// parse final field | ||
record = StringBuffer(); | ||
expect(CsvParser.parseField(line, record, 5), 8); | ||
expect(record.toString(), "def"); | ||
|
||
// a double quote in an unescaped field throws an error | ||
record = StringBuffer(); | ||
line = "a,b\",c"; | ||
expect(() => CsvParser.parseField(line, record, 2), | ||
throwsA(isA<FormatException>())); | ||
}); | ||
|
||
test("test parseEscapedField", () { | ||
StringBuffer record; | ||
String line; | ||
|
||
line = "\"a\",\"b,c\""; | ||
// escape quotes aren't added to record | ||
record = StringBuffer(); | ||
expect(CsvParser.parseEscapedField(line, record, 0), 3); | ||
expect(record.toString(), "a"); | ||
|
||
// parse an escaped field with a comma | ||
record = StringBuffer(); | ||
expect(CsvParser.parseEscapedField(line, record, 4), 9); | ||
expect(record.toString(), "b,c"); | ||
|
||
line = "a,\"b\"\"\",c"; | ||
// A properly escaped double quote is added to record | ||
record = StringBuffer(); | ||
expect(CsvParser.parseEscapedField(line, record, 2), 7); | ||
expect(record.toString(), "b\""); | ||
|
||
// A FormatException is thrown if there's a hanging escape quote | ||
line = "a,\"b,c"; | ||
record = StringBuffer(); | ||
expect(() => CsvParser.parseEscapedField(line, record, 2), | ||
throwsA(isA<FormatException>())); | ||
}); | ||
|
||
test("test parseLine", () { | ||
StringBuffer record; | ||
String line; | ||
|
||
// parse a generic line with no escaping | ||
line = "a,bc,def"; | ||
record = StringBuffer(); | ||
expect(CsvParser.parseLine(line), <dynamic>["a", "bc", "def"]); | ||
|
||
// parse a generic line with a blank final field | ||
line = "a,b,"; | ||
record = StringBuffer(); | ||
expect(CsvParser.parseLine(line), <dynamic>["a", "b", ""]); | ||
|
||
// parse a line with basic escaping | ||
line = "a,\"bc\",\"def\""; | ||
record = StringBuffer(); | ||
expect(CsvParser.parseLine(line), <dynamic>["a", "bc", "def"]); | ||
|
||
// parse a line with escaped commas | ||
line = "a,\"b,c\",\"d,e,f\""; | ||
record = StringBuffer(); | ||
expect(CsvParser.parseLine(line), <dynamic>["a", "b,c", "d,e,f"]); | ||
|
||
// parse a line with escaped double quotes and commas | ||
line = "a,\"b\"\"c\",\"d,e,f\"\"\""; | ||
record = StringBuffer(); | ||
expect(CsvParser.parseLine(line), <dynamic>["a", "b\"c", "d,e,f\""]); | ||
|
||
// parse a line with an unclosed escape quote | ||
line = "a,\"b\"\",c"; | ||
record = StringBuffer(); | ||
expect(() => CsvParser.parseLine(line), throwsA(isA<FormatException>())); | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters