diff --git a/lib/scanner/csv-scanner/csv-scanner.c b/lib/scanner/csv-scanner/csv-scanner.c index 3eb0fb6621..5916aeb725 100644 --- a/lib/scanner/csv-scanner/csv-scanner.c +++ b/lib/scanner/csv-scanner/csv-scanner.c @@ -176,6 +176,30 @@ _parse_left_whitespace(CSVScanner *self) _skip_whitespace(&self->src); } +static gint +_decode_xdigit(gchar xdigit) +{ + if (xdigit >= '0' && xdigit <= '9') + return xdigit - '0'; + if (xdigit >= 'a' && xdigit <= 'f') + return xdigit - 'a' + 10; + if (xdigit >= 'A' && xdigit <= 'F') + return xdigit - 'A' + 10; + return -1; +} + +static gint +_decode_xbyte(gchar xdigit1, gchar xdigit2) +{ + gint nibble_hi, nibble_lo; + + nibble_hi = _decode_xdigit(xdigit1); + nibble_lo = _decode_xdigit(xdigit2); + if (nibble_hi < 0 || nibble_lo < 0) + return -1; + return (nibble_hi << 4) + nibble_lo; +} + static void _parse_character_with_quotation(CSVScanner *self) { @@ -213,6 +237,16 @@ _parse_character_with_quotation(CSVScanner *self) case 'v': ch = '\v'; break; + case 'x': + if (*(self->src+1) && *(self->src+2)) + { + ch = _decode_xbyte(*(self->src+1), *(self->src+2)); + if (ch >= 0) + self->src += 2; + else + ch = 'x'; + } + break; default: break; } diff --git a/lib/scanner/csv-scanner/tests/test_csv_scanner.c b/lib/scanner/csv-scanner/tests/test_csv_scanner.c index dd91b7ead5..270cad1e0f 100644 --- a/lib/scanner/csv-scanner/tests/test_csv_scanner.c +++ b/lib/scanner/csv-scanner/tests/test_csv_scanner.c @@ -359,6 +359,62 @@ Test(csv_scanner, escape_backslash_sequences) csv_scanner_deinit(&scanner); } +Test(csv_scanner, escape_backslash_x_sequences) +{ + const gchar *columns[] = { "foo", "bar", NULL }; + + _default_options_with_flags(columns, CSV_SCANNER_STRIP_WHITESPACE); + + csv_scanner_options_set_dialect(&options, CSV_SCANNER_ESCAPE_BACKSLASH_WITH_SEQUENCES); + csv_scanner_init(&scanner, &options, "foo,\"\\x41\\x00\\x40\""); + + cr_expect(_column_name_equals("foo")); + cr_expect(!_scan_complete()); + + cr_expect(_scan_next()); + cr_expect(_column_name_equals("foo")); + cr_expect(!_scan_complete()); + + cr_expect(_scan_next()); + cr_expect(_column_name_equals("bar")); + cr_expect(_column_nv_equals("bar", "A\x00@")); + cr_expect(!_scan_complete()); + + /* go past the last column */ + cr_expect(!_scan_next()); + cr_expect(_scan_complete()); + cr_expect(_column_name_unset()); + csv_scanner_deinit(&scanner); +} + +Test(csv_scanner, escape_backslash_invalid_x_sequence) +{ + const gchar *columns[] = { "foo", "bar", NULL }; + + _default_options_with_flags(columns, CSV_SCANNER_STRIP_WHITESPACE); + + csv_scanner_options_set_dialect(&options, CSV_SCANNER_ESCAPE_BACKSLASH_WITH_SEQUENCES); + csv_scanner_init(&scanner, &options, "foo,\"\\x4Q\""); + + cr_expect(_column_name_equals("foo")); + cr_expect(!_scan_complete()); + + cr_expect(_scan_next()); + cr_expect(_column_name_equals("foo")); + cr_expect(!_scan_complete()); + + cr_expect(_scan_next()); + cr_expect(_column_name_equals("bar")); + cr_expect(_column_nv_equals("bar", "x4Q")); + cr_expect(!_scan_complete()); + + /* go past the last column */ + cr_expect(!_scan_next()); + cr_expect(_scan_complete()); + cr_expect(_column_name_unset()); + csv_scanner_deinit(&scanner); +} + static void setup(void) {